Skip to main content

ab_riscv_interpreter/v/zve64x/
perm.rs

1//! Zve64x permutation instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zve64x_perm_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::zve64x_helpers;
9use crate::{ExecutableInstruction, ExecutionError, ProgramCounter, RegisterFile, VirtualMemory};
10use ab_riscv_macros::instruction_execution;
11use ab_riscv_primitives::prelude::*;
12use core::fmt;
13use core::ops::ControlFlow;
14
15#[instruction_execution]
16impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
17    ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
18    for Zve64xPermInstruction<Reg>
19where
20    Reg: Register,
21    Regs: RegisterFile<Reg>,
22    ExtState: VectorRegistersExt<Reg, CustomError>,
23    [(); ExtState::ELEN as usize]:,
24    [(); ExtState::VLEN as usize]:,
25    [(); ExtState::VLENB as usize]:,
26    Memory: VirtualMemory,
27    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
28    CustomError: fmt::Debug,
29{
30    #[inline(always)]
31    fn execute(
32        self,
33        regs: &mut Regs,
34        ext_state: &mut ExtState,
35        _memory: &mut Memory,
36        program_counter: &mut PC,
37        _system_instruction_handler: &mut InstructionHandler,
38    ) -> Result<ControlFlow<()>, ExecutionError<Reg::Type, CustomError>> {
39        match self {
40            // vmv.x.s rd, vs2
41            // Copies sign-extended element 0 of vs2 (at current SEW) to GPR rd.
42            // Requires valid vtype (needs SEW to know element width).
43            // Does not use vl or masking; always reads element 0.
44            // Resets vstart per spec §6.3.
45            Self::VmvXS { rd, vs2 } => {
46                if !ext_state.vector_instructions_allowed() {
47                    Err(ExecutionError::IllegalInstruction {
48                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
49                    })?;
50                }
51                let vtype = ext_state
52                    .vtype()
53                    .ok_or(ExecutionError::IllegalInstruction {
54                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
55                    })?;
56                let sew = vtype.vsew();
57                // SAFETY: element 0 is always within register v(vs2_base), byte offset 0;
58                // VLENB >= sew.bytes() for all legal vtype configurations.
59                let raw = unsafe {
60                    zve64x_perm_helpers::read_element_0_u64(ext_state.read_vreg(), vs2.bits(), sew)
61                };
62                let sign_extended = zve64x_perm_helpers::sign_extend_to_reg::<Reg>(raw, sew);
63                regs.write(rd, sign_extended);
64                ext_state.mark_vs_dirty();
65                ext_state.reset_vstart();
66            }
67            // vmv.s.x vd, rs1
68            // Copies scalar GPR rs1 (zero-extended / truncated to SEW) into element 0 of vd.
69            // When vl == 0, the write is suppressed but vstart is still reset.
70            // Resets vstart per spec §6.3.
71            Self::VmvSX { vd, rs1 } => {
72                if !ext_state.vector_instructions_allowed() {
73                    Err(ExecutionError::IllegalInstruction {
74                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
75                    })?;
76                }
77                let vtype = ext_state
78                    .vtype()
79                    .ok_or(ExecutionError::IllegalInstruction {
80                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
81                    })?;
82                let sew = vtype.vsew();
83                let vl = ext_state.vl();
84                let vstart = u32::from(ext_state.vstart());
85                // Per spec §16.1: update only when vstart < vl.
86                if vstart < vl {
87                    let scalar = regs.read(rs1).as_u64();
88                    // SAFETY: element 0 always fits.
89                    unsafe {
90                        zve64x_perm_helpers::write_element_0_u64(
91                            ext_state.write_vreg(),
92                            vd.bits(),
93                            sew,
94                            scalar,
95                        );
96                    }
97                }
98                ext_state.mark_vs_dirty();
99                ext_state.reset_vstart();
100            }
101            // vslideup.vx vd, vs2, rs1, vm
102            // Slides elements of vs2 up by the scalar offset in rs1.
103            // Elements vd[0..offset] are unchanged (tail-undisturbed for those positions).
104            // Elements vd[i] for offset <= i < vl get vs2[i - offset].
105            // Per spec §16.3.1: vd must not overlap vs2.
106            Self::VslideupVx { vd, vs2, rs1, vm } => {
107                if !ext_state.vector_instructions_allowed() {
108                    Err(ExecutionError::IllegalInstruction {
109                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
110                    })?;
111                }
112                let vtype = ext_state
113                    .vtype()
114                    .ok_or(ExecutionError::IllegalInstruction {
115                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
116                    })?;
117                let group_regs = vtype.vlmul().register_count();
118                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
119                    program_counter,
120                    vd,
121                    group_regs,
122                )?;
123                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
124                    program_counter,
125                    vs2,
126                    group_regs,
127                )?;
128                // vd must not overlap vs2
129                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
130                    program_counter,
131                    vd,
132                    vs2,
133                    group_regs,
134                )?;
135                if !vm && vd.bits() == 0 {
136                    Err(ExecutionError::IllegalInstruction {
137                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
138                    })?;
139                }
140                let sew = vtype.vsew();
141                let vl = ext_state.vl();
142                let vstart = u32::from(ext_state.vstart());
143                let offset = regs.read(rs1).as_u64();
144                // SAFETY: alignment and no-overlap verified above; vl <= VLMAX.
145                unsafe {
146                    zve64x_perm_helpers::execute_slideup(
147                        ext_state, vd, vs2, vm, vl, vstart, sew, offset,
148                    );
149                }
150            }
151            // vslideup.vi vd, vs2, uimm, vm
152            // Same as vslideup.vx but offset is a 5-bit unsigned immediate.
153            Self::VslideupVi { vd, vs2, uimm, vm } => {
154                if !ext_state.vector_instructions_allowed() {
155                    Err(ExecutionError::IllegalInstruction {
156                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
157                    })?;
158                }
159                let vtype = ext_state
160                    .vtype()
161                    .ok_or(ExecutionError::IllegalInstruction {
162                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
163                    })?;
164                let group_regs = vtype.vlmul().register_count();
165                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
166                    program_counter,
167                    vd,
168                    group_regs,
169                )?;
170                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
171                    program_counter,
172                    vs2,
173                    group_regs,
174                )?;
175                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
176                    program_counter,
177                    vd,
178                    vs2,
179                    group_regs,
180                )?;
181                if !vm && vd.bits() == 0 {
182                    Err(ExecutionError::IllegalInstruction {
183                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
184                    })?;
185                }
186                let sew = vtype.vsew();
187                let vl = ext_state.vl();
188                let vstart = u32::from(ext_state.vstart());
189                let offset = u64::from(uimm);
190                // SAFETY: same as VslideupVx.
191                unsafe {
192                    zve64x_perm_helpers::execute_slideup(
193                        ext_state, vd, vs2, vm, vl, vstart, sew, offset,
194                    );
195                }
196            }
197            // vslidedown.vx vd, vs2, rs1, vm
198            // Element vd[i] = vs2[i + offset] if i + offset < VLMAX, else 0.
199            // vd may overlap vs2 for slidedown.
200            Self::VslidedownVx { vd, vs2, rs1, vm } => {
201                if !ext_state.vector_instructions_allowed() {
202                    Err(ExecutionError::IllegalInstruction {
203                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
204                    })?;
205                }
206                let vtype = ext_state
207                    .vtype()
208                    .ok_or(ExecutionError::IllegalInstruction {
209                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
210                    })?;
211                let group_regs = vtype.vlmul().register_count();
212                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
213                    program_counter,
214                    vd,
215                    group_regs,
216                )?;
217                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
218                    program_counter,
219                    vs2,
220                    group_regs,
221                )?;
222                if !vm && vd.bits() == 0 {
223                    Err(ExecutionError::IllegalInstruction {
224                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
225                    })?;
226                }
227                let sew = vtype.vsew();
228                let vl = ext_state.vl();
229                let vstart = u32::from(ext_state.vstart());
230                let vlmax = ext_state.vlmax_for_vtype(vtype);
231                let offset = regs.read(rs1).as_u64();
232                // SAFETY: alignment verified above; vl <= VLMAX; offset clamped in helper.
233                unsafe {
234                    zve64x_perm_helpers::execute_slidedown(
235                        ext_state, vd, vs2, vm, vl, vstart, sew, vlmax, offset,
236                    );
237                }
238            }
239            // vslidedown.vi vd, vs2, uimm, vm
240            // Same as vslidedown.vx but offset is a 5-bit unsigned immediate.
241            Self::VslidedownVi { vd, vs2, uimm, vm } => {
242                if !ext_state.vector_instructions_allowed() {
243                    Err(ExecutionError::IllegalInstruction {
244                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
245                    })?;
246                }
247                let vtype = ext_state
248                    .vtype()
249                    .ok_or(ExecutionError::IllegalInstruction {
250                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
251                    })?;
252                let group_regs = vtype.vlmul().register_count();
253                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
254                    program_counter,
255                    vd,
256                    group_regs,
257                )?;
258                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
259                    program_counter,
260                    vs2,
261                    group_regs,
262                )?;
263                if !vm && vd.bits() == 0 {
264                    Err(ExecutionError::IllegalInstruction {
265                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
266                    })?;
267                }
268                let sew = vtype.vsew();
269                let vl = ext_state.vl();
270                let vstart = u32::from(ext_state.vstart());
271                let vlmax = ext_state.vlmax_for_vtype(vtype);
272                let offset = u64::from(uimm);
273                // SAFETY: same as VslidedownVx.
274                unsafe {
275                    zve64x_perm_helpers::execute_slidedown(
276                        ext_state, vd, vs2, vm, vl, vstart, sew, vlmax, offset,
277                    );
278                }
279            }
280            // vslide1up.vx vd, vs2, rs1, vm
281            // Element 0 of vd gets the scalar value rs1 (written at SEW width).
282            // Elements vd[i] for 1 <= i < vl get vs2[i - 1].
283            // vd must not overlap vs2.
284            Self::Vslide1upVx { vd, vs2, rs1, vm } => {
285                if !ext_state.vector_instructions_allowed() {
286                    Err(ExecutionError::IllegalInstruction {
287                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
288                    })?;
289                }
290                let vtype = ext_state
291                    .vtype()
292                    .ok_or(ExecutionError::IllegalInstruction {
293                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
294                    })?;
295                let group_regs = vtype.vlmul().register_count();
296                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
297                    program_counter,
298                    vd,
299                    group_regs,
300                )?;
301                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
302                    program_counter,
303                    vs2,
304                    group_regs,
305                )?;
306                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
307                    program_counter,
308                    vd,
309                    vs2,
310                    group_regs,
311                )?;
312                if !vm && vd.bits() == 0 {
313                    Err(ExecutionError::IllegalInstruction {
314                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
315                    })?;
316                }
317                let sew = vtype.vsew();
318                let vl = ext_state.vl();
319                let vstart = u32::from(ext_state.vstart());
320                let scalar = regs.read(rs1).as_u64();
321                // SAFETY: alignment and no-overlap verified; vl <= VLMAX.
322                unsafe {
323                    zve64x_perm_helpers::execute_slide1up(
324                        ext_state, vd, vs2, vm, vl, vstart, sew, scalar,
325                    );
326                }
327            }
328            // vslide1down.vx vd, vs2, rs1, vm
329            // Element vd[i] = vs2[i + 1] for 0 <= i < vl - 1.
330            // Element vd[vl - 1] gets the scalar value rs1.
331            // vd may overlap vs2 for slide1down.
332            Self::Vslide1downVx { vd, vs2, rs1, vm } => {
333                if !ext_state.vector_instructions_allowed() {
334                    Err(ExecutionError::IllegalInstruction {
335                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
336                    })?;
337                }
338                let vtype = ext_state
339                    .vtype()
340                    .ok_or(ExecutionError::IllegalInstruction {
341                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
342                    })?;
343                let group_regs = vtype.vlmul().register_count();
344                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
345                    program_counter,
346                    vd,
347                    group_regs,
348                )?;
349                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
350                    program_counter,
351                    vs2,
352                    group_regs,
353                )?;
354                if !vm && vd.bits() == 0 {
355                    Err(ExecutionError::IllegalInstruction {
356                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
357                    })?;
358                }
359                let sew = vtype.vsew();
360                let vl = ext_state.vl();
361                let vstart = u32::from(ext_state.vstart());
362                let scalar = regs.read(rs1).as_u64();
363                // SAFETY: alignment verified; vl <= VLMAX; overlap permitted by spec.
364                unsafe {
365                    zve64x_perm_helpers::execute_slide1down(
366                        ext_state, vd, vs2, vm, vl, vstart, sew, scalar,
367                    );
368                }
369            }
370            // vrgather.vv vd, vs2, vs1, vm
371            // vd[i] = (vs1[i] < VLMAX) ? vs2[vs1[i]] : 0
372            // vd must not overlap vs1 or vs2.
373            Self::VrgatherVv { vd, vs2, vs1, vm } => {
374                if !ext_state.vector_instructions_allowed() {
375                    Err(ExecutionError::IllegalInstruction {
376                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
377                    })?;
378                }
379                let vtype = ext_state
380                    .vtype()
381                    .ok_or(ExecutionError::IllegalInstruction {
382                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
383                    })?;
384                let group_regs = vtype.vlmul().register_count();
385                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
386                    program_counter,
387                    vd,
388                    group_regs,
389                )?;
390                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
391                    program_counter,
392                    vs2,
393                    group_regs,
394                )?;
395                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
396                    program_counter,
397                    vs1,
398                    group_regs,
399                )?;
400                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
401                    program_counter,
402                    vd,
403                    vs2,
404                    group_regs,
405                )?;
406                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
407                    program_counter,
408                    vd,
409                    vs1,
410                    group_regs,
411                )?;
412                if !vm && vd.bits() == 0 {
413                    Err(ExecutionError::IllegalInstruction {
414                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
415                    })?;
416                }
417                let sew = vtype.vsew();
418                let vl = ext_state.vl();
419                let vstart = u32::from(ext_state.vstart());
420                let vlmax = ext_state.vlmax_for_vtype(vtype);
421                // SAFETY: all alignment and overlap constraints verified above; vl <= VLMAX.
422                unsafe {
423                    zve64x_perm_helpers::execute_rgather_vv(
424                        ext_state, vd, vs2, vs1, vm, vl, vstart, sew, vlmax,
425                    );
426                }
427            }
428            // vrgather.vx vd, vs2, rs1, vm
429            // All active elements of vd get vs2[rs1] if rs1 < VLMAX, else 0.
430            // vd must not overlap vs2.
431            Self::VrgatherVx { vd, vs2, rs1, vm } => {
432                if !ext_state.vector_instructions_allowed() {
433                    Err(ExecutionError::IllegalInstruction {
434                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
435                    })?;
436                }
437                let vtype = ext_state
438                    .vtype()
439                    .ok_or(ExecutionError::IllegalInstruction {
440                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
441                    })?;
442                let group_regs = vtype.vlmul().register_count();
443                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
444                    program_counter,
445                    vd,
446                    group_regs,
447                )?;
448                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
449                    program_counter,
450                    vs2,
451                    group_regs,
452                )?;
453                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
454                    program_counter,
455                    vd,
456                    vs2,
457                    group_regs,
458                )?;
459                if !vm && vd.bits() == 0 {
460                    Err(ExecutionError::IllegalInstruction {
461                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
462                    })?;
463                }
464                let sew = vtype.vsew();
465                let vl = ext_state.vl();
466                let vstart = u32::from(ext_state.vstart());
467                let vlmax = ext_state.vlmax_for_vtype(vtype);
468                let index = regs.read(rs1).as_u64();
469                // SAFETY: alignment and no-overlap verified; vl <= VLMAX.
470                unsafe {
471                    zve64x_perm_helpers::execute_rgather_scalar(
472                        ext_state, vd, vs2, vm, vl, vstart, sew, vlmax, index,
473                    );
474                }
475            }
476            // vrgather.vi vd, vs2, uimm, vm
477            // Same as vrgather.vx but index is a 5-bit unsigned immediate.
478            Self::VrgatherVi { vd, vs2, uimm, vm } => {
479                if !ext_state.vector_instructions_allowed() {
480                    Err(ExecutionError::IllegalInstruction {
481                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
482                    })?;
483                }
484                let vtype = ext_state
485                    .vtype()
486                    .ok_or(ExecutionError::IllegalInstruction {
487                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
488                    })?;
489                let group_regs = vtype.vlmul().register_count();
490                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
491                    program_counter,
492                    vd,
493                    group_regs,
494                )?;
495                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
496                    program_counter,
497                    vs2,
498                    group_regs,
499                )?;
500                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
501                    program_counter,
502                    vd,
503                    vs2,
504                    group_regs,
505                )?;
506                if !vm && vd.bits() == 0 {
507                    Err(ExecutionError::IllegalInstruction {
508                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
509                    })?;
510                }
511                let sew = vtype.vsew();
512                let vl = ext_state.vl();
513                let vstart = u32::from(ext_state.vstart());
514                let vlmax = ext_state.vlmax_for_vtype(vtype);
515                let index = u64::from(uimm);
516                // SAFETY: same as VrgatherVx.
517                unsafe {
518                    zve64x_perm_helpers::execute_rgather_scalar(
519                        ext_state, vd, vs2, vm, vl, vstart, sew, vlmax, index,
520                    );
521                }
522            }
523            // vrgatherei16.vv vd, vs2, vs1, vm
524            // Like vrgather.vv but vs1 always uses EEW=16 (regardless of SEW).
525            // EMUL_vs1 = (16 / SEW) * LMUL; must be in [1/8, 8] else illegal.
526            // vd must not overlap vs1 or vs2.
527            Self::Vrgatherei16Vv { vd, vs2, vs1, vm } => {
528                if !ext_state.vector_instructions_allowed() {
529                    Err(ExecutionError::IllegalInstruction {
530                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
531                    })?;
532                }
533                let vtype = ext_state
534                    .vtype()
535                    .ok_or(ExecutionError::IllegalInstruction {
536                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
537                    })?;
538                let group_regs = vtype.vlmul().register_count();
539                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
540                    program_counter,
541                    vd,
542                    group_regs,
543                )?;
544                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
545                    program_counter,
546                    vs2,
547                    group_regs,
548                )?;
549                // Compute EMUL for vs1 index register (EEW=16).
550                let index_group_regs = vtype
551                    .vlmul()
552                    .index_register_count(
553                        ab_riscv_primitives::instructions::v::Eew::E16,
554                        vtype.vsew(),
555                    )
556                    .ok_or(ExecutionError::IllegalInstruction {
557                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
558                    })?;
559                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
560                    program_counter,
561                    vs1,
562                    index_group_regs,
563                )?;
564                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
565                    program_counter,
566                    vd,
567                    vs2,
568                    group_regs,
569                )?;
570                // vd and vs1 have different group sizes (group_regs vs index_group_regs),
571                // so the symmetric helper would use the wrong size for one of the intervals.
572                zve64x_perm_helpers::check_no_overlap_asymmetric::<Reg, _, _, _>(
573                    program_counter,
574                    vd,
575                    group_regs,
576                    vs1,
577                    index_group_regs,
578                )?;
579                if !vm && vd.bits() == 0 {
580                    Err(ExecutionError::IllegalInstruction {
581                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
582                    })?;
583                }
584                let sew = vtype.vsew();
585                let vl = ext_state.vl();
586                let vstart = u32::from(ext_state.vstart());
587                let vlmax = ext_state.vlmax_for_vtype(vtype);
588                // SAFETY: all alignment and overlap constraints verified; vl <= VLMAX;
589                // vs1 uses EEW=16 with computed index_group_regs.
590                unsafe {
591                    zve64x_perm_helpers::execute_rgatherei16(
592                        ext_state,
593                        vd,
594                        vs2,
595                        vs1,
596                        vm,
597                        vl,
598                        vstart,
599                        sew,
600                        vlmax,
601                        index_group_regs,
602                    );
603                }
604            }
605            // vmerge.vvm / vmv.v.v
606            // When vm=true: vmv.v.v vd, vs1 - broadcast all active elements from vs1.
607            //   vs2 is ignored; no overlap restriction on vd/vs2.
608            // When vm=false: vmerge.vvm vd, vs2, vs1, v0
609            //   vd[i] = v0[i] ? vs1[i] : vs2[i]
610            //   vd must not overlap v0 (mask source).
611            Self::VmergeVvm { vd, vs2, vs1, vm } => {
612                if !ext_state.vector_instructions_allowed() {
613                    Err(ExecutionError::IllegalInstruction {
614                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
615                    })?;
616                }
617                let vtype = ext_state
618                    .vtype()
619                    .ok_or(ExecutionError::IllegalInstruction {
620                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
621                    })?;
622                let group_regs = vtype.vlmul().register_count();
623                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
624                    program_counter,
625                    vd,
626                    group_regs,
627                )?;
628                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
629                    program_counter,
630                    vs1,
631                    group_regs,
632                )?;
633                if !vm {
634                    // vmerge: vs2 is read, vd must not overlap v0
635                    zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
636                        program_counter,
637                        vs2,
638                        group_regs,
639                    )?;
640                    zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
641                        program_counter,
642                        vd,
643                        VReg::V0,
644                        group_regs,
645                    )?;
646                }
647                let sew = vtype.vsew();
648                let vl = ext_state.vl();
649                let vstart = u32::from(ext_state.vstart());
650                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
651                unsafe {
652                    zve64x_perm_helpers::execute_merge_vv(
653                        ext_state, vd, vs2, vs1, vm, vl, vstart, sew,
654                    );
655                }
656            }
657            // vmerge.vxm / vmv.v.x
658            // When vm=true: vmv.v.x vd, rs1 - broadcast scalar to all active elements.
659            // When vm=false: vmerge.vxm - vd[i] = v0[i] ? rs1 : vs2[i]
660            Self::VmergeVxm { vd, vs2, rs1, vm } => {
661                if !ext_state.vector_instructions_allowed() {
662                    Err(ExecutionError::IllegalInstruction {
663                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
664                    })?;
665                }
666                let vtype = ext_state
667                    .vtype()
668                    .ok_or(ExecutionError::IllegalInstruction {
669                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
670                    })?;
671                let group_regs = vtype.vlmul().register_count();
672                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
673                    program_counter,
674                    vd,
675                    group_regs,
676                )?;
677                if !vm {
678                    zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
679                        program_counter,
680                        vs2,
681                        group_regs,
682                    )?;
683                    zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
684                        program_counter,
685                        vd,
686                        VReg::V0,
687                        group_regs,
688                    )?;
689                }
690                let sew = vtype.vsew();
691                let vl = ext_state.vl();
692                let vstart = u32::from(ext_state.vstart());
693                let scalar = regs.read(rs1).as_u64();
694                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
695                unsafe {
696                    zve64x_perm_helpers::execute_merge_scalar(
697                        ext_state, vd, vs2, vm, vl, vstart, sew, scalar,
698                    );
699                }
700            }
701            // vmerge.vim / vmv.v.i
702            // When vm=true: vmv.v.i vd, simm5 - broadcast sign-extended immediate.
703            // When vm=false: vmerge.vim - vd[i] = v0[i] ? simm5 : vs2[i]
704            Self::VmergeVim { vd, vs2, simm5, vm } => {
705                if !ext_state.vector_instructions_allowed() {
706                    Err(ExecutionError::IllegalInstruction {
707                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
708                    })?;
709                }
710                let vtype = ext_state
711                    .vtype()
712                    .ok_or(ExecutionError::IllegalInstruction {
713                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
714                    })?;
715                let group_regs = vtype.vlmul().register_count();
716                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
717                    program_counter,
718                    vd,
719                    group_regs,
720                )?;
721                if !vm {
722                    zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
723                        program_counter,
724                        vs2,
725                        group_regs,
726                    )?;
727                    zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
728                        program_counter,
729                        vd,
730                        VReg::V0,
731                        group_regs,
732                    )?;
733                }
734                let sew = vtype.vsew();
735                let vl = ext_state.vl();
736                let vstart = u32::from(ext_state.vstart());
737                // Sign-extend imm to u64 so the low sew_bytes are correct for all SEW.
738                let scalar = i64::from(simm5).cast_unsigned();
739                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
740                unsafe {
741                    zve64x_perm_helpers::execute_merge_scalar(
742                        ext_state, vd, vs2, vm, vl, vstart, sew, scalar,
743                    );
744                }
745            }
746            // vcompress.vm vd, vs2, vs1
747            // Packs active elements of vs2 (where vs1 mask bit is set) sequentially into vd.
748            // Always unmasked (vm=1 in encoding); vs1 is the explicit mask operand.
749            // vd must not overlap vs1 or vs2.
750            Self::VcompressVm { vd, vs2, vs1 } => {
751                if !ext_state.vector_instructions_allowed() {
752                    Err(ExecutionError::IllegalInstruction {
753                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
754                    })?;
755                }
756                let vtype = ext_state
757                    .vtype()
758                    .ok_or(ExecutionError::IllegalInstruction {
759                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
760                    })?;
761                // Spec §16.5: vstart must be zero.
762                if ext_state.vstart() != 0 {
763                    Err(ExecutionError::IllegalInstruction {
764                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
765                    })?;
766                }
767                let group_regs = vtype.vlmul().register_count();
768                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
769                    program_counter,
770                    vd,
771                    group_regs,
772                )?;
773                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
774                    program_counter,
775                    vs2,
776                    group_regs,
777                )?;
778                // vs1 is always a single mask register (no LMUL grouping)
779                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
780                    program_counter,
781                    vd,
782                    vs2,
783                    group_regs,
784                )?;
785                // vs1 is a mask register; check it doesn't overlap vd
786                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(program_counter, vd, vs1, 1)?;
787                let sew = vtype.vsew();
788                let vl = ext_state.vl();
789                unsafe {
790                    zve64x_perm_helpers::execute_compress(ext_state, vd, vs2, vs1, vl, sew);
791                }
792            }
793            // vmv1r.v vd, vs2
794            // Whole register move: copies 1 register.
795            // No masking, no vtype/vl dependency.
796            Self::Vmv1rV { vd, vs2 } => {
797                if !ext_state.vector_instructions_allowed() {
798                    Err(ExecutionError::IllegalInstruction {
799                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
800                    })?;
801                }
802                // SAFETY: both vd.bits() and vs2.bits() are always in [0, 32) by VReg invariant;
803                // copying 1 register always fits.
804                unsafe {
805                    zve64x_perm_helpers::execute_whole_reg_move(
806                        ext_state.write_vreg(),
807                        vd.bits(),
808                        vs2.bits(),
809                        1,
810                    );
811                }
812                ext_state.mark_vs_dirty();
813                ext_state.reset_vstart();
814            }
815            // vmv2r.v vd, vs2
816            // Whole register move: copies 2 registers.
817            // vd and vs2 must be aligned to 2 (checked here per spec §17.6).
818            Self::Vmv2rV { vd, vs2 } => {
819                if !ext_state.vector_instructions_allowed() {
820                    Err(ExecutionError::IllegalInstruction {
821                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
822                    })?;
823                }
824                if !vd.bits().is_multiple_of(2) || !vs2.bits().is_multiple_of(2) {
825                    Err(ExecutionError::IllegalInstruction {
826                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
827                    })?;
828                }
829                // SAFETY: alignment verified; 2 registers from aligned base always stay in [0, 32).
830                unsafe {
831                    zve64x_perm_helpers::execute_whole_reg_move(
832                        ext_state.write_vreg(),
833                        vd.bits(),
834                        vs2.bits(),
835                        2,
836                    );
837                }
838                ext_state.mark_vs_dirty();
839                ext_state.reset_vstart();
840            }
841            // vmv4r.v vd, vs2
842            // Whole register move: copies 4 registers.
843            Self::Vmv4rV { vd, vs2 } => {
844                if !ext_state.vector_instructions_allowed() {
845                    Err(ExecutionError::IllegalInstruction {
846                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
847                    })?;
848                }
849                if !vd.bits().is_multiple_of(4) || !vs2.bits().is_multiple_of(4) {
850                    Err(ExecutionError::IllegalInstruction {
851                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
852                    })?;
853                }
854                // SAFETY: alignment verified; 4 registers from aligned base always stay in [0, 32).
855                unsafe {
856                    zve64x_perm_helpers::execute_whole_reg_move(
857                        ext_state.write_vreg(),
858                        vd.bits(),
859                        vs2.bits(),
860                        4,
861                    );
862                }
863                ext_state.mark_vs_dirty();
864                ext_state.reset_vstart();
865            }
866            // vmv8r.v vd, vs2
867            // Whole register move: copies 8 registers.
868            Self::Vmv8rV { vd, vs2 } => {
869                if !ext_state.vector_instructions_allowed() {
870                    Err(ExecutionError::IllegalInstruction {
871                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
872                    })?;
873                }
874                if !vd.bits().is_multiple_of(8) || !vs2.bits().is_multiple_of(8) {
875                    Err(ExecutionError::IllegalInstruction {
876                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
877                    })?;
878                }
879                // SAFETY: alignment verified; 8 registers from aligned base always stay in [0, 32).
880                unsafe {
881                    zve64x_perm_helpers::execute_whole_reg_move(
882                        ext_state.write_vreg(),
883                        vd.bits(),
884                        vs2.bits(),
885                        8,
886                    );
887                }
888                ext_state.mark_vs_dirty();
889                ext_state.reset_vstart();
890            }
891        }
892
893        Ok(ControlFlow::Continue(()))
894    }
895}