Skip to main content

ab_riscv_interpreter/v/zvexx/
perm.rs

1//! ZveXx permutation instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zvexx_perm_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zvexx::zvexx_helpers;
9use crate::{
10    ExecutableInstruction, ExecutableInstructionCsr, ExecutableInstructionOperands, ExecutionError,
11    ProgramCounter, RegisterFile, Rs1Rs2OperandValues, Rs1Rs2Operands, VirtualMemory,
12};
13use ab_riscv_macros::instruction_execution;
14use ab_riscv_primitives::prelude::*;
15use core::fmt;
16use core::ops::ControlFlow;
17
18#[instruction_execution]
19impl<Reg> ExecutableInstructionOperands for ZveXxPermInstruction<Reg> where Reg: Register {}
20
21#[instruction_execution]
22impl<Reg, ExtState, CustomError> ExecutableInstructionCsr<ExtState, CustomError>
23    for ZveXxPermInstruction<Reg>
24where
25    Reg: Register,
26{
27}
28
29#[instruction_execution]
30impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
31    ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
32    for ZveXxPermInstruction<Reg>
33where
34    Reg: Register,
35    Regs: RegisterFile<Reg>,
36    ExtState: VectorRegistersExt<Reg, CustomError>,
37    [(); ExtState::ELEN as usize]:,
38    [(); ExtState::VLEN as usize]:,
39    [(); ExtState::VLENB as usize]:,
40    Memory: VirtualMemory,
41    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
42    CustomError: fmt::Debug,
43{
44    #[inline(always)]
45    fn execute(
46        self,
47        Rs1Rs2OperandValues {
48            rs1_value,
49            rs2_value: _,
50        }: Rs1Rs2OperandValues<<Self::Reg as Register>::Type>,
51        regs: &mut Regs,
52        ext_state: &mut ExtState,
53        _memory: &mut Memory,
54        program_counter: &mut PC,
55        _system_instruction_handler: &mut InstructionHandler,
56    ) -> Result<
57        ControlFlow<(), (Self::Reg, <Self::Reg as Register>::Type)>,
58        ExecutionError<Reg::Type, CustomError>,
59    > {
60        match self {
61            // vmv.x.s rd, vs2
62            // Copies sign-extended element 0 of vs2 (at current SEW) to GPR rd.
63            // Requires valid vtype (needs SEW to know element width).
64            // Does not use vl or masking; always reads element 0.
65            // Resets vstart per spec §6.3.
66            Self::VmvXS { rd, vs2 } => {
67                if !ext_state.vector_instructions_allowed() {
68                    return Err(ExecutionError::IllegalInstruction {
69                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
70                    });
71                }
72                let vtype = ext_state
73                    .vtype()
74                    .ok_or(ExecutionError::IllegalInstruction {
75                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
76                    })?;
77                let sew = vtype.vsew();
78                // SAFETY: element 0 is always within register vs2, byte offset 0;
79                // VLENB >= sew.bytes() for all legal vtype configurations.
80                let raw = unsafe {
81                    zvexx_perm_helpers::read_element_0_u64(ext_state.read_vregs(), vs2, sew)
82                };
83                let sign_extended = zvexx_perm_helpers::sign_extend_to_reg::<Reg>(raw, sew);
84                regs.write(rd, sign_extended);
85                ext_state.mark_vs_dirty();
86                ext_state.reset_vstart();
87            }
88            // vmv.s.x vd, rs1
89            // Copies scalar GPR rs1 (zero-extended / truncated to SEW) into element 0 of vd.
90            // When vl == 0, the write is suppressed but vstart is still reset.
91            // Resets vstart per spec §6.3.
92            Self::VmvSX { vd, rs1: _ } => {
93                if !ext_state.vector_instructions_allowed() {
94                    return Err(ExecutionError::IllegalInstruction {
95                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
96                    });
97                }
98                let vtype = ext_state
99                    .vtype()
100                    .ok_or(ExecutionError::IllegalInstruction {
101                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
102                    })?;
103                let sew = vtype.vsew();
104                let vl = ext_state.vl();
105                let vstart = ext_state.vstart();
106                // Per spec §16.1: update only when vstart < vl.
107                if u32::from(vstart) < vl {
108                    let scalar = rs1_value.as_i64().cast_unsigned();
109                    // SAFETY: element 0 always fits.
110                    unsafe {
111                        zvexx_perm_helpers::write_element_0_u64(
112                            ext_state.write_vregs(),
113                            vd,
114                            sew,
115                            scalar,
116                        );
117                    }
118                }
119                ext_state.mark_vs_dirty();
120                ext_state.reset_vstart();
121            }
122            // vslideup.vx vd, vs2, rs1: _, vm
123            // Slides elements of vs2 up by the scalar offset in rs1.
124            // Elements vd[0..offset] are unchanged (tail-undisturbed for those positions).
125            // Elements vd[i] for offset <= i < vl get vs2[i - offset].
126            // Per spec §16.3.1: vd must not overlap vs2.
127            Self::VslideupVx {
128                vd,
129                vs2,
130                rs1: _,
131                vm,
132            } => {
133                if !ext_state.vector_instructions_allowed() {
134                    return Err(ExecutionError::IllegalInstruction {
135                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
136                    });
137                }
138                let vtype = ext_state
139                    .vtype()
140                    .ok_or(ExecutionError::IllegalInstruction {
141                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
142                    })?;
143                let group_regs = vtype.vlmul().register_count();
144                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
145                    program_counter,
146                    vd,
147                    group_regs,
148                )?;
149                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
150                    program_counter,
151                    vs2,
152                    group_regs,
153                )?;
154                // vd must not overlap vs2
155                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
156                    program_counter,
157                    vd,
158                    vs2,
159                    group_regs,
160                )?;
161                if !vm && vd == VReg::V0 {
162                    return Err(ExecutionError::IllegalInstruction {
163                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
164                    });
165                }
166                let sew = vtype.vsew();
167                let offset = rs1_value.as_u64();
168                // SAFETY: alignment and no-overlap verified above; vl <= VLMAX.
169                unsafe {
170                    zvexx_perm_helpers::execute_slideup(ext_state, vd, vs2, vm, sew, offset);
171                }
172            }
173            // vslideup.vi vd, vs2, uimm, vm
174            // Same as vslideup.vx but offset is a 5-bit unsigned immediate.
175            Self::VslideupVi { vd, vs2, uimm, vm } => {
176                if !ext_state.vector_instructions_allowed() {
177                    return Err(ExecutionError::IllegalInstruction {
178                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
179                    });
180                }
181                let vtype = ext_state
182                    .vtype()
183                    .ok_or(ExecutionError::IllegalInstruction {
184                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
185                    })?;
186                let group_regs = vtype.vlmul().register_count();
187                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
188                    program_counter,
189                    vd,
190                    group_regs,
191                )?;
192                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
193                    program_counter,
194                    vs2,
195                    group_regs,
196                )?;
197                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
198                    program_counter,
199                    vd,
200                    vs2,
201                    group_regs,
202                )?;
203                if !vm && vd == VReg::V0 {
204                    return Err(ExecutionError::IllegalInstruction {
205                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
206                    });
207                }
208                let sew = vtype.vsew();
209                let offset = u64::from(uimm);
210                // SAFETY: same as VslideupVx.
211                unsafe {
212                    zvexx_perm_helpers::execute_slideup(ext_state, vd, vs2, vm, sew, offset);
213                }
214            }
215            // vslidedown.vx vd, vs2, rs1: _, vm
216            // Element vd[i] = vs2[i + offset] if i + offset < VLMAX, else 0.
217            // vd may overlap vs2 for slidedown.
218            Self::VslidedownVx {
219                vd,
220                vs2,
221                rs1: _,
222                vm,
223            } => {
224                if !ext_state.vector_instructions_allowed() {
225                    return Err(ExecutionError::IllegalInstruction {
226                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
227                    });
228                }
229                let vtype = ext_state
230                    .vtype()
231                    .ok_or(ExecutionError::IllegalInstruction {
232                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
233                    })?;
234                let group_regs = vtype.vlmul().register_count();
235                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
236                    program_counter,
237                    vd,
238                    group_regs,
239                )?;
240                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
241                    program_counter,
242                    vs2,
243                    group_regs,
244                )?;
245                if !vm && vd == VReg::V0 {
246                    return Err(ExecutionError::IllegalInstruction {
247                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
248                    });
249                }
250                let sew = vtype.vsew();
251                let vlmax = ext_state.vlmax_for_vtype(vtype);
252                let offset = rs1_value.as_u64();
253                // SAFETY: alignment verified above; vl <= VLMAX; offset clamped in helper.
254                unsafe {
255                    zvexx_perm_helpers::execute_slidedown(
256                        ext_state, vd, vs2, vm, sew, vlmax, offset,
257                    );
258                }
259            }
260            // vslidedown.vi vd, vs2, uimm, vm
261            // Same as vslidedown.vx but offset is a 5-bit unsigned immediate.
262            Self::VslidedownVi { vd, vs2, uimm, vm } => {
263                if !ext_state.vector_instructions_allowed() {
264                    return Err(ExecutionError::IllegalInstruction {
265                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
266                    });
267                }
268                let vtype = ext_state
269                    .vtype()
270                    .ok_or(ExecutionError::IllegalInstruction {
271                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
272                    })?;
273                let group_regs = vtype.vlmul().register_count();
274                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
275                    program_counter,
276                    vd,
277                    group_regs,
278                )?;
279                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
280                    program_counter,
281                    vs2,
282                    group_regs,
283                )?;
284                if !vm && vd == VReg::V0 {
285                    return Err(ExecutionError::IllegalInstruction {
286                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
287                    });
288                }
289                let sew = vtype.vsew();
290                let vlmax = ext_state.vlmax_for_vtype(vtype);
291                let offset = u64::from(uimm);
292                // SAFETY: same as VslidedownVx.
293                unsafe {
294                    zvexx_perm_helpers::execute_slidedown(
295                        ext_state, vd, vs2, vm, sew, vlmax, offset,
296                    );
297                }
298            }
299            // vslide1up.vx vd, vs2, rs1: _, vm
300            // Element 0 of vd gets the scalar value rs1 (written at SEW width).
301            // Elements vd[i] for 1 <= i < vl get vs2[i - 1].
302            // vd must not overlap vs2.
303            Self::Vslide1upVx {
304                vd,
305                vs2,
306                rs1: _,
307                vm,
308            } => {
309                if !ext_state.vector_instructions_allowed() {
310                    return Err(ExecutionError::IllegalInstruction {
311                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
312                    });
313                }
314                let vtype = ext_state
315                    .vtype()
316                    .ok_or(ExecutionError::IllegalInstruction {
317                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
318                    })?;
319                let group_regs = vtype.vlmul().register_count();
320                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
321                    program_counter,
322                    vd,
323                    group_regs,
324                )?;
325                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
326                    program_counter,
327                    vs2,
328                    group_regs,
329                )?;
330                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
331                    program_counter,
332                    vd,
333                    vs2,
334                    group_regs,
335                )?;
336                if !vm && vd == VReg::V0 {
337                    return Err(ExecutionError::IllegalInstruction {
338                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
339                    });
340                }
341                let sew = vtype.vsew();
342                let scalar = rs1_value.as_i64().cast_unsigned();
343                // SAFETY: alignment and no-overlap verified; vl <= VLMAX.
344                unsafe {
345                    zvexx_perm_helpers::execute_slide1up(ext_state, vd, vs2, vm, sew, scalar);
346                }
347            }
348            // vslide1down.vx vd, vs2, rs1: _, vm
349            // Element vd[i] = vs2[i + 1] for 0 <= i < vl - 1.
350            // Element vd[vl - 1] gets the scalar value rs1.
351            // vd may overlap vs2 for slide1down.
352            Self::Vslide1downVx {
353                vd,
354                vs2,
355                rs1: _,
356                vm,
357            } => {
358                if !ext_state.vector_instructions_allowed() {
359                    return Err(ExecutionError::IllegalInstruction {
360                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
361                    });
362                }
363                let vtype = ext_state
364                    .vtype()
365                    .ok_or(ExecutionError::IllegalInstruction {
366                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
367                    })?;
368                let group_regs = vtype.vlmul().register_count();
369                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
370                    program_counter,
371                    vd,
372                    group_regs,
373                )?;
374                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
375                    program_counter,
376                    vs2,
377                    group_regs,
378                )?;
379                if !vm && vd == VReg::V0 {
380                    return Err(ExecutionError::IllegalInstruction {
381                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
382                    });
383                }
384                let sew = vtype.vsew();
385                let scalar = rs1_value.as_i64().cast_unsigned();
386                // SAFETY: alignment verified; vl <= VLMAX; overlap permitted by spec.
387                unsafe {
388                    zvexx_perm_helpers::execute_slide1down(ext_state, vd, vs2, vm, sew, scalar);
389                }
390            }
391            // vrgather.vv vd, vs2, vs1, vm
392            // vd[i] = (vs1[i] < VLMAX) ? vs2[vs1[i]] : 0
393            // vd must not overlap vs1 or vs2.
394            Self::VrgatherVv { vd, vs2, vs1, vm } => {
395                if !ext_state.vector_instructions_allowed() {
396                    return Err(ExecutionError::IllegalInstruction {
397                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
398                    });
399                }
400                let vtype = ext_state
401                    .vtype()
402                    .ok_or(ExecutionError::IllegalInstruction {
403                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
404                    })?;
405                let group_regs = vtype.vlmul().register_count();
406                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
407                    program_counter,
408                    vd,
409                    group_regs,
410                )?;
411                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
412                    program_counter,
413                    vs2,
414                    group_regs,
415                )?;
416                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
417                    program_counter,
418                    vs1,
419                    group_regs,
420                )?;
421                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
422                    program_counter,
423                    vd,
424                    vs2,
425                    group_regs,
426                )?;
427                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
428                    program_counter,
429                    vd,
430                    vs1,
431                    group_regs,
432                )?;
433                if !vm && vd == VReg::V0 {
434                    return Err(ExecutionError::IllegalInstruction {
435                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
436                    });
437                }
438                let sew = vtype.vsew();
439                let vlmax = ext_state.vlmax_for_vtype(vtype);
440                // SAFETY: all alignment and overlap constraints verified above; vl <= VLMAX.
441                unsafe {
442                    zvexx_perm_helpers::execute_rgather_vv(ext_state, vd, vs2, vs1, vm, sew, vlmax);
443                }
444            }
445            // vrgather.vx vd, vs2, rs1: _, vm
446            // All active elements of vd get vs2[rs1] if rs1 < VLMAX, else 0.
447            // vd must not overlap vs2.
448            Self::VrgatherVx {
449                vd,
450                vs2,
451                rs1: _,
452                vm,
453            } => {
454                if !ext_state.vector_instructions_allowed() {
455                    return Err(ExecutionError::IllegalInstruction {
456                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
457                    });
458                }
459                let vtype = ext_state
460                    .vtype()
461                    .ok_or(ExecutionError::IllegalInstruction {
462                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
463                    })?;
464                let group_regs = vtype.vlmul().register_count();
465                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
466                    program_counter,
467                    vd,
468                    group_regs,
469                )?;
470                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
471                    program_counter,
472                    vs2,
473                    group_regs,
474                )?;
475                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
476                    program_counter,
477                    vd,
478                    vs2,
479                    group_regs,
480                )?;
481                if !vm && vd == VReg::V0 {
482                    return Err(ExecutionError::IllegalInstruction {
483                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
484                    });
485                }
486                let sew = vtype.vsew();
487                let vlmax = ext_state.vlmax_for_vtype(vtype);
488                let index = rs1_value.as_u64();
489                // SAFETY: alignment and no-overlap verified; vl <= VLMAX.
490                unsafe {
491                    zvexx_perm_helpers::execute_rgather_scalar(
492                        ext_state, vd, vs2, vm, sew, vlmax, index,
493                    );
494                }
495            }
496            // vrgather.vi vd, vs2, uimm, vm
497            // Same as vrgather.vx but index is a 5-bit unsigned immediate.
498            Self::VrgatherVi { vd, vs2, uimm, vm } => {
499                if !ext_state.vector_instructions_allowed() {
500                    return Err(ExecutionError::IllegalInstruction {
501                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
502                    });
503                }
504                let vtype = ext_state
505                    .vtype()
506                    .ok_or(ExecutionError::IllegalInstruction {
507                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
508                    })?;
509                let group_regs = vtype.vlmul().register_count();
510                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
511                    program_counter,
512                    vd,
513                    group_regs,
514                )?;
515                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
516                    program_counter,
517                    vs2,
518                    group_regs,
519                )?;
520                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
521                    program_counter,
522                    vd,
523                    vs2,
524                    group_regs,
525                )?;
526                if !vm && vd == VReg::V0 {
527                    return Err(ExecutionError::IllegalInstruction {
528                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
529                    });
530                }
531                let sew = vtype.vsew();
532                let vlmax = ext_state.vlmax_for_vtype(vtype);
533                let index = u64::from(uimm);
534                // SAFETY: same as VrgatherVx.
535                unsafe {
536                    zvexx_perm_helpers::execute_rgather_scalar(
537                        ext_state, vd, vs2, vm, sew, vlmax, index,
538                    );
539                }
540            }
541            // vrgatherei16.vv vd, vs2, vs1, vm
542            // Like vrgather.vv but vs1 always uses EEW=16 (regardless of SEW).
543            // EMUL_vs1 = (16 / SEW) * LMUL; must be in [1/8, 8] else illegal.
544            // vd must not overlap vs1 or vs2.
545            Self::Vrgatherei16Vv { vd, vs2, vs1, vm } => {
546                if !ext_state.vector_instructions_allowed() {
547                    return Err(ExecutionError::IllegalInstruction {
548                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
549                    });
550                }
551                let vtype = ext_state
552                    .vtype()
553                    .ok_or(ExecutionError::IllegalInstruction {
554                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
555                    })?;
556                let group_regs = vtype.vlmul().register_count();
557                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
558                    program_counter,
559                    vd,
560                    group_regs,
561                )?;
562                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
563                    program_counter,
564                    vs2,
565                    group_regs,
566                )?;
567                // Compute EMUL for vs1 index register (EEW=16).
568                let index_group_regs = vtype
569                    .vlmul()
570                    .index_register_count(
571                        ab_riscv_primitives::instructions::v::Eew::E16,
572                        vtype.vsew(),
573                    )
574                    .ok_or(ExecutionError::IllegalInstruction {
575                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
576                    })?;
577                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
578                    program_counter,
579                    vs1,
580                    index_group_regs,
581                )?;
582                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
583                    program_counter,
584                    vd,
585                    vs2,
586                    group_regs,
587                )?;
588                // vd and vs1 have different group sizes (group_regs vs index_group_regs),
589                // so the symmetric helper would use the wrong size for one of the intervals.
590                zvexx_perm_helpers::check_no_overlap_asymmetric::<Reg, _, _, _>(
591                    program_counter,
592                    vd,
593                    group_regs,
594                    vs1,
595                    index_group_regs,
596                )?;
597                if !vm && vd == VReg::V0 {
598                    return Err(ExecutionError::IllegalInstruction {
599                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
600                    });
601                }
602                let sew = vtype.vsew();
603                let vlmax = ext_state.vlmax_for_vtype(vtype);
604                // SAFETY: all alignment and overlap constraints verified; vl <= VLMAX;
605                // vs1 uses EEW=16 with computed index_group_regs.
606                unsafe {
607                    zvexx_perm_helpers::execute_rgatherei16(
608                        ext_state,
609                        vd,
610                        vs2,
611                        vs1,
612                        vm,
613                        sew,
614                        vlmax,
615                        index_group_regs,
616                    );
617                }
618            }
619            // vmerge.vvm / vmv.v.v
620            // When vm=true: vmv.v.v vd, vs1 - broadcast all active elements from vs1.
621            //   vs2 is ignored; no overlap restriction on vd/vs2.
622            // When vm=false: vmerge.vvm vd, vs2, vs1, v0
623            //   vd[i] = v0[i] ? vs1[i] : vs2[i]
624            //   vd must not overlap v0 (mask source).
625            Self::VmergeVvm { vd, vs2, vs1, vm } => {
626                if !ext_state.vector_instructions_allowed() {
627                    return Err(ExecutionError::IllegalInstruction {
628                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
629                    });
630                }
631                let vtype = ext_state
632                    .vtype()
633                    .ok_or(ExecutionError::IllegalInstruction {
634                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
635                    })?;
636                let group_regs = vtype.vlmul().register_count();
637                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
638                    program_counter,
639                    vd,
640                    group_regs,
641                )?;
642                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
643                    program_counter,
644                    vs1,
645                    group_regs,
646                )?;
647                if !vm {
648                    // vmerge: vs2 is read, vd must not overlap v0
649                    zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
650                        program_counter,
651                        vs2,
652                        group_regs,
653                    )?;
654                    zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
655                        program_counter,
656                        vd,
657                        VReg::V0,
658                        group_regs,
659                    )?;
660                }
661                let sew = vtype.vsew();
662                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
663                unsafe {
664                    zvexx_perm_helpers::execute_merge_vv(ext_state, vd, vs2, vs1, vm, sew);
665                }
666            }
667            // vmerge.vxm / vmv.v.x
668            // When vm=true: vmv.v.x vd, rs1 - broadcast scalar to all active elements.
669            // When vm=false: vmerge.vxm - vd[i] = v0[i] ? rs1 : vs2[i]
670            Self::VmergeVxm {
671                vd,
672                vs2,
673                rs1: _,
674                vm,
675            } => {
676                if !ext_state.vector_instructions_allowed() {
677                    return Err(ExecutionError::IllegalInstruction {
678                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
679                    });
680                }
681                let vtype = ext_state
682                    .vtype()
683                    .ok_or(ExecutionError::IllegalInstruction {
684                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
685                    })?;
686                let group_regs = vtype.vlmul().register_count();
687                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
688                    program_counter,
689                    vd,
690                    group_regs,
691                )?;
692                if !vm {
693                    zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
694                        program_counter,
695                        vs2,
696                        group_regs,
697                    )?;
698                    zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
699                        program_counter,
700                        vd,
701                        VReg::V0,
702                        group_regs,
703                    )?;
704                }
705                let sew = vtype.vsew();
706                let scalar = rs1_value.as_i64().cast_unsigned();
707                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
708                unsafe {
709                    zvexx_perm_helpers::execute_merge_scalar(ext_state, vd, vs2, vm, sew, scalar);
710                }
711            }
712            // vmerge.vim / vmv.v.i
713            // When vm=true: vmv.v.i vd, simm5 - broadcast sign-extended immediate.
714            // When vm=false: vmerge.vim - vd[i] = v0[i] ? simm5 : vs2[i]
715            Self::VmergeVim { vd, vs2, simm5, vm } => {
716                if !ext_state.vector_instructions_allowed() {
717                    return Err(ExecutionError::IllegalInstruction {
718                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
719                    });
720                }
721                let vtype = ext_state
722                    .vtype()
723                    .ok_or(ExecutionError::IllegalInstruction {
724                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
725                    })?;
726                let group_regs = vtype.vlmul().register_count();
727                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
728                    program_counter,
729                    vd,
730                    group_regs,
731                )?;
732                if !vm {
733                    zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
734                        program_counter,
735                        vs2,
736                        group_regs,
737                    )?;
738                    zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
739                        program_counter,
740                        vd,
741                        VReg::V0,
742                        group_regs,
743                    )?;
744                }
745                let sew = vtype.vsew();
746                // Sign-extend imm to u64 so the low sew_bytes are correct for all SEW.
747                let scalar = i64::from(simm5).cast_unsigned();
748                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
749                unsafe {
750                    zvexx_perm_helpers::execute_merge_scalar(ext_state, vd, vs2, vm, sew, scalar);
751                }
752            }
753            // vcompress.vm vd, vs2, vs1
754            // Packs active elements of vs2 (where vs1 mask bit is set) sequentially into vd.
755            // Always unmasked (vm=1 in encoding); vs1 is the explicit mask operand.
756            // vd must not overlap vs1 or vs2.
757            Self::VcompressVm { vd, vs2, vs1 } => {
758                if !ext_state.vector_instructions_allowed() {
759                    return Err(ExecutionError::IllegalInstruction {
760                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
761                    });
762                }
763                let vtype = ext_state
764                    .vtype()
765                    .ok_or(ExecutionError::IllegalInstruction {
766                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
767                    })?;
768                // Spec §16.5: vstart must be zero.
769                if ext_state.vstart() != 0 {
770                    return Err(ExecutionError::IllegalInstruction {
771                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
772                    });
773                }
774                let group_regs = vtype.vlmul().register_count();
775                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
776                    program_counter,
777                    vd,
778                    group_regs,
779                )?;
780                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
781                    program_counter,
782                    vs2,
783                    group_regs,
784                )?;
785                // vs1 is always a single mask register (no LMUL grouping)
786                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
787                    program_counter,
788                    vd,
789                    vs2,
790                    group_regs,
791                )?;
792                // vs1 is a mask register; check it doesn't overlap vd
793                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(program_counter, vd, vs1, 1)?;
794                let sew = vtype.vsew();
795                let vl = ext_state.vl();
796                unsafe {
797                    zvexx_perm_helpers::execute_compress(ext_state, vd, vs2, vs1, vl, sew);
798                }
799            }
800            // vmv1r.v vd, vs2
801            // Whole register move: copies 1 register.
802            // No masking, no vtype/vl dependency.
803            Self::Vmv1rV { vd, vs2 } => {
804                if !ext_state.vector_instructions_allowed() {
805                    return Err(ExecutionError::IllegalInstruction {
806                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
807                    });
808                }
809                // SAFETY: both vd.to_bits() and vs2.to_bits() are always in [0, 32) by VReg
810                // invariant; copying 1 register always fits.
811                unsafe {
812                    zvexx_perm_helpers::execute_whole_reg_move(ext_state.write_vregs(), vd, vs2, 1);
813                }
814                ext_state.mark_vs_dirty();
815                ext_state.reset_vstart();
816            }
817            // vmv2r.v vd, vs2
818            // Whole register move: copies 2 registers.
819            // vd and vs2 must be aligned to 2 (checked here per spec §17.6).
820            Self::Vmv2rV { vd, vs2 } => {
821                if !ext_state.vector_instructions_allowed() {
822                    return Err(ExecutionError::IllegalInstruction {
823                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
824                    });
825                }
826                if !vd.to_bits().is_multiple_of(2) || !vs2.to_bits().is_multiple_of(2) {
827                    return Err(ExecutionError::IllegalInstruction {
828                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
829                    });
830                }
831                // SAFETY: alignment verified; 2 registers from aligned base always stay in [0, 32).
832                unsafe {
833                    zvexx_perm_helpers::execute_whole_reg_move(ext_state.write_vregs(), vd, vs2, 2);
834                }
835                ext_state.mark_vs_dirty();
836                ext_state.reset_vstart();
837            }
838            // vmv4r.v vd, vs2
839            // Whole register move: copies 4 registers.
840            Self::Vmv4rV { vd, vs2 } => {
841                if !ext_state.vector_instructions_allowed() {
842                    return Err(ExecutionError::IllegalInstruction {
843                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
844                    });
845                }
846                if !vd.to_bits().is_multiple_of(4) || !vs2.to_bits().is_multiple_of(4) {
847                    return Err(ExecutionError::IllegalInstruction {
848                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
849                    });
850                }
851                // SAFETY: alignment verified; 4 registers from aligned base always stay in [0, 32).
852                unsafe {
853                    zvexx_perm_helpers::execute_whole_reg_move(ext_state.write_vregs(), vd, vs2, 4);
854                }
855                ext_state.mark_vs_dirty();
856                ext_state.reset_vstart();
857            }
858            // vmv8r.v vd, vs2
859            // Whole register move: copies 8 registers.
860            Self::Vmv8rV { vd, vs2 } => {
861                if !ext_state.vector_instructions_allowed() {
862                    return Err(ExecutionError::IllegalInstruction {
863                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
864                    });
865                }
866                if !vd.to_bits().is_multiple_of(8) || !vs2.to_bits().is_multiple_of(8) {
867                    return Err(ExecutionError::IllegalInstruction {
868                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
869                    });
870                }
871                // SAFETY: alignment verified; 8 registers from aligned base always stay in [0, 32).
872                unsafe {
873                    zvexx_perm_helpers::execute_whole_reg_move(ext_state.write_vregs(), vd, vs2, 8);
874                }
875                ext_state.mark_vs_dirty();
876                ext_state.reset_vstart();
877            }
878        }
879
880        Ok(ControlFlow::Continue(Default::default()))
881    }
882}