Skip to main content

ab_riscv_interpreter/v/zvexx/
perm.rs

1//! ZveXx permutation instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zvexx_perm_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zvexx::zvexx_helpers;
9use crate::{
10    ExecutableInstruction, ExecutableInstructionCsr, ExecutableInstructionOperands, ExecutionError,
11    ProgramCounter, RegisterFile, Rs1Rs2OperandValues, Rs1Rs2Operands, VirtualMemory,
12};
13use ab_riscv_macros::instruction_execution;
14use ab_riscv_primitives::prelude::*;
15use core::fmt;
16use core::ops::ControlFlow;
17
18#[instruction_execution]
19impl<Reg> ExecutableInstructionOperands for ZveXxPermInstruction<Reg> where Reg: Register {}
20
21#[instruction_execution]
22impl<Reg, ExtState, CustomError> ExecutableInstructionCsr<ExtState, CustomError>
23    for ZveXxPermInstruction<Reg>
24where
25    Reg: Register,
26{
27}
28
29#[instruction_execution]
30impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
31    ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
32    for ZveXxPermInstruction<Reg>
33where
34    Reg: Register,
35    Regs: RegisterFile<Reg>,
36    ExtState: VectorRegistersExt<Reg, CustomError>,
37    [(); ExtState::ELEN as usize]:,
38    [(); ExtState::VLEN as usize]:,
39    [(); ExtState::VLENB as usize]:,
40    Memory: VirtualMemory,
41    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
42    CustomError: fmt::Debug,
43{
44    #[inline(always)]
45    fn execute(
46        self,
47        Rs1Rs2OperandValues {
48            rs1_value,
49            rs2_value: _,
50        }: Rs1Rs2OperandValues<<Self::Reg as Register>::Type>,
51        _regs: &mut Regs,
52        ext_state: &mut ExtState,
53        _memory: &mut Memory,
54        program_counter: &mut PC,
55        _system_instruction_handler: &mut InstructionHandler,
56    ) -> Result<
57        ControlFlow<(), (Self::Reg, <Self::Reg as Register>::Type)>,
58        ExecutionError<Reg::Type, CustomError>,
59    > {
60        match self {
61            // vmv.x.s rd, vs2
62            // Copies sign-extended element 0 of vs2 (at current SEW) to GPR rd.
63            // Requires valid vtype (needs SEW to know element width).
64            // Does not use vl or masking; always reads element 0.
65            // Resets vstart per spec §6.3.
66            Self::VmvXS { rd, vs2 } => {
67                if !ext_state.vector_instructions_allowed() {
68                    ::core::hint::cold_path();
69                    return Err(ExecutionError::IllegalInstruction {
70                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
71                    });
72                }
73                let Some(vtype) = ext_state.vtype() else {
74                    ::core::hint::cold_path();
75                    return Err(ExecutionError::IllegalInstruction {
76                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
77                    });
78                };
79                let sew = vtype.vsew();
80                // SAFETY: element 0 is always within register vs2, byte offset 0;
81                // VLENB >= sew.bytes() for all legal vtype configurations.
82                let raw = unsafe {
83                    zvexx_perm_helpers::read_element_0_u64(ext_state.read_vregs(), vs2, sew)
84                };
85                let sign_extended = zvexx_perm_helpers::sign_extend_to_reg::<Reg>(raw, sew);
86                ext_state.mark_vs_dirty();
87                ext_state.reset_vstart();
88
89                return Ok(ControlFlow::Continue((rd, sign_extended)));
90            }
91            // vmv.s.x vd, rs1
92            // Copies scalar GPR rs1 (zero-extended / truncated to SEW) into element 0 of vd.
93            // When vl == 0, the write is suppressed but vstart is still reset.
94            // Resets vstart per spec §6.3.
95            Self::VmvSX { vd, rs1: _ } => {
96                if !ext_state.vector_instructions_allowed() {
97                    ::core::hint::cold_path();
98                    return Err(ExecutionError::IllegalInstruction {
99                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
100                    });
101                }
102                let Some(vtype) = ext_state.vtype() else {
103                    ::core::hint::cold_path();
104                    return Err(ExecutionError::IllegalInstruction {
105                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
106                    });
107                };
108                let sew = vtype.vsew();
109                let vl = ext_state.vl();
110                let vstart = ext_state.vstart();
111                // Per spec §16.1: update only when vstart < vl.
112                if u32::from(vstart) < vl {
113                    let scalar = rs1_value.as_i64().cast_unsigned();
114                    // SAFETY: element 0 always fits.
115                    unsafe {
116                        zvexx_perm_helpers::write_element_0_u64(
117                            ext_state.write_vregs(),
118                            vd,
119                            sew,
120                            scalar,
121                        );
122                    }
123                }
124                ext_state.mark_vs_dirty();
125                ext_state.reset_vstart();
126            }
127            // vslideup.vx vd, vs2, rs1: _, vm
128            // Slides elements of vs2 up by the scalar offset in rs1.
129            // Elements vd[0..offset] are unchanged (tail-undisturbed for those positions).
130            // Elements vd[i] for offset <= i < vl get vs2[i - offset].
131            // Per spec §16.3.1: vd must not overlap vs2.
132            Self::VslideupVx {
133                vd,
134                vs2,
135                rs1: _,
136                vm,
137            } => {
138                if !ext_state.vector_instructions_allowed() {
139                    ::core::hint::cold_path();
140                    return Err(ExecutionError::IllegalInstruction {
141                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
142                    });
143                }
144                let Some(vtype) = ext_state.vtype() else {
145                    ::core::hint::cold_path();
146                    return Err(ExecutionError::IllegalInstruction {
147                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
148                    });
149                };
150                let group_regs = vtype.vlmul().register_count();
151                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
152                    program_counter,
153                    vd,
154                    group_regs,
155                )?;
156                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
157                    program_counter,
158                    vs2,
159                    group_regs,
160                )?;
161                // vd must not overlap vs2
162                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
163                    program_counter,
164                    vd,
165                    vs2,
166                    group_regs,
167                )?;
168                if !vm && vd == VReg::V0 {
169                    ::core::hint::cold_path();
170                    return Err(ExecutionError::IllegalInstruction {
171                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
172                    });
173                }
174                let sew = vtype.vsew();
175                let offset = rs1_value.as_u64();
176                // SAFETY: alignment and no-overlap verified above; vl <= VLMAX.
177                unsafe {
178                    zvexx_perm_helpers::execute_slideup(ext_state, vd, vs2, vm, sew, offset);
179                }
180            }
181            // vslideup.vi vd, vs2, uimm, vm
182            // Same as vslideup.vx but offset is a 5-bit unsigned immediate.
183            Self::VslideupVi { vd, vs2, uimm, vm } => {
184                if !ext_state.vector_instructions_allowed() {
185                    ::core::hint::cold_path();
186                    return Err(ExecutionError::IllegalInstruction {
187                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
188                    });
189                }
190                let Some(vtype) = ext_state.vtype() else {
191                    ::core::hint::cold_path();
192                    return Err(ExecutionError::IllegalInstruction {
193                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
194                    });
195                };
196                let group_regs = vtype.vlmul().register_count();
197                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
198                    program_counter,
199                    vd,
200                    group_regs,
201                )?;
202                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
203                    program_counter,
204                    vs2,
205                    group_regs,
206                )?;
207                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
208                    program_counter,
209                    vd,
210                    vs2,
211                    group_regs,
212                )?;
213                if !vm && vd == VReg::V0 {
214                    ::core::hint::cold_path();
215                    return Err(ExecutionError::IllegalInstruction {
216                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
217                    });
218                }
219                let sew = vtype.vsew();
220                let offset = u64::from(uimm);
221                // SAFETY: same as VslideupVx.
222                unsafe {
223                    zvexx_perm_helpers::execute_slideup(ext_state, vd, vs2, vm, sew, offset);
224                }
225            }
226            // vslidedown.vx vd, vs2, rs1: _, vm
227            // Element vd[i] = vs2[i + offset] if i + offset < VLMAX, else 0.
228            // vd may overlap vs2 for slidedown.
229            Self::VslidedownVx {
230                vd,
231                vs2,
232                rs1: _,
233                vm,
234            } => {
235                if !ext_state.vector_instructions_allowed() {
236                    ::core::hint::cold_path();
237                    return Err(ExecutionError::IllegalInstruction {
238                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
239                    });
240                }
241                let Some(vtype) = ext_state.vtype() else {
242                    ::core::hint::cold_path();
243                    return Err(ExecutionError::IllegalInstruction {
244                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
245                    });
246                };
247                let group_regs = vtype.vlmul().register_count();
248                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
249                    program_counter,
250                    vd,
251                    group_regs,
252                )?;
253                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
254                    program_counter,
255                    vs2,
256                    group_regs,
257                )?;
258                if !vm && vd == VReg::V0 {
259                    ::core::hint::cold_path();
260                    return Err(ExecutionError::IllegalInstruction {
261                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
262                    });
263                }
264                let sew = vtype.vsew();
265                let vlmax = ext_state.vlmax_for_vtype(vtype);
266                let offset = rs1_value.as_u64();
267                // SAFETY: alignment verified above; vl <= VLMAX; offset clamped in helper.
268                unsafe {
269                    zvexx_perm_helpers::execute_slidedown(
270                        ext_state, vd, vs2, vm, sew, vlmax, offset,
271                    );
272                }
273            }
274            // vslidedown.vi vd, vs2, uimm, vm
275            // Same as vslidedown.vx but offset is a 5-bit unsigned immediate.
276            Self::VslidedownVi { vd, vs2, uimm, vm } => {
277                if !ext_state.vector_instructions_allowed() {
278                    ::core::hint::cold_path();
279                    return Err(ExecutionError::IllegalInstruction {
280                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
281                    });
282                }
283                let Some(vtype) = ext_state.vtype() else {
284                    ::core::hint::cold_path();
285                    return Err(ExecutionError::IllegalInstruction {
286                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
287                    });
288                };
289                let group_regs = vtype.vlmul().register_count();
290                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
291                    program_counter,
292                    vd,
293                    group_regs,
294                )?;
295                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
296                    program_counter,
297                    vs2,
298                    group_regs,
299                )?;
300                if !vm && vd == VReg::V0 {
301                    ::core::hint::cold_path();
302                    return Err(ExecutionError::IllegalInstruction {
303                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
304                    });
305                }
306                let sew = vtype.vsew();
307                let vlmax = ext_state.vlmax_for_vtype(vtype);
308                let offset = u64::from(uimm);
309                // SAFETY: same as VslidedownVx.
310                unsafe {
311                    zvexx_perm_helpers::execute_slidedown(
312                        ext_state, vd, vs2, vm, sew, vlmax, offset,
313                    );
314                }
315            }
316            // vslide1up.vx vd, vs2, rs1: _, vm
317            // Element 0 of vd gets the scalar value rs1 (written at SEW width).
318            // Elements vd[i] for 1 <= i < vl get vs2[i - 1].
319            // vd must not overlap vs2.
320            Self::Vslide1upVx {
321                vd,
322                vs2,
323                rs1: _,
324                vm,
325            } => {
326                if !ext_state.vector_instructions_allowed() {
327                    ::core::hint::cold_path();
328                    return Err(ExecutionError::IllegalInstruction {
329                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
330                    });
331                }
332                let Some(vtype) = ext_state.vtype() else {
333                    ::core::hint::cold_path();
334                    return Err(ExecutionError::IllegalInstruction {
335                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
336                    });
337                };
338                let group_regs = vtype.vlmul().register_count();
339                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
340                    program_counter,
341                    vd,
342                    group_regs,
343                )?;
344                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
345                    program_counter,
346                    vs2,
347                    group_regs,
348                )?;
349                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
350                    program_counter,
351                    vd,
352                    vs2,
353                    group_regs,
354                )?;
355                if !vm && vd == VReg::V0 {
356                    ::core::hint::cold_path();
357                    return Err(ExecutionError::IllegalInstruction {
358                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
359                    });
360                }
361                let sew = vtype.vsew();
362                let scalar = rs1_value.as_i64().cast_unsigned();
363                // SAFETY: alignment and no-overlap verified; vl <= VLMAX.
364                unsafe {
365                    zvexx_perm_helpers::execute_slide1up(ext_state, vd, vs2, vm, sew, scalar);
366                }
367            }
368            // vslide1down.vx vd, vs2, rs1: _, vm
369            // Element vd[i] = vs2[i + 1] for 0 <= i < vl - 1.
370            // Element vd[vl - 1] gets the scalar value rs1.
371            // vd may overlap vs2 for slide1down.
372            Self::Vslide1downVx {
373                vd,
374                vs2,
375                rs1: _,
376                vm,
377            } => {
378                if !ext_state.vector_instructions_allowed() {
379                    ::core::hint::cold_path();
380                    return Err(ExecutionError::IllegalInstruction {
381                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
382                    });
383                }
384                let Some(vtype) = ext_state.vtype() else {
385                    ::core::hint::cold_path();
386                    return Err(ExecutionError::IllegalInstruction {
387                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
388                    });
389                };
390                let group_regs = vtype.vlmul().register_count();
391                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
392                    program_counter,
393                    vd,
394                    group_regs,
395                )?;
396                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
397                    program_counter,
398                    vs2,
399                    group_regs,
400                )?;
401                if !vm && vd == VReg::V0 {
402                    ::core::hint::cold_path();
403                    return Err(ExecutionError::IllegalInstruction {
404                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
405                    });
406                }
407                let sew = vtype.vsew();
408                let scalar = rs1_value.as_i64().cast_unsigned();
409                // SAFETY: alignment verified; vl <= VLMAX; overlap permitted by spec.
410                unsafe {
411                    zvexx_perm_helpers::execute_slide1down(ext_state, vd, vs2, vm, sew, scalar);
412                }
413            }
414            // vrgather.vv vd, vs2, vs1, vm
415            // vd[i] = (vs1[i] < VLMAX) ? vs2[vs1[i]] : 0
416            // vd must not overlap vs1 or vs2.
417            Self::VrgatherVv { vd, vs2, vs1, vm } => {
418                if !ext_state.vector_instructions_allowed() {
419                    ::core::hint::cold_path();
420                    return Err(ExecutionError::IllegalInstruction {
421                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
422                    });
423                }
424                let Some(vtype) = ext_state.vtype() else {
425                    ::core::hint::cold_path();
426                    return Err(ExecutionError::IllegalInstruction {
427                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
428                    });
429                };
430                let group_regs = vtype.vlmul().register_count();
431                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
432                    program_counter,
433                    vd,
434                    group_regs,
435                )?;
436                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
437                    program_counter,
438                    vs2,
439                    group_regs,
440                )?;
441                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
442                    program_counter,
443                    vs1,
444                    group_regs,
445                )?;
446                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
447                    program_counter,
448                    vd,
449                    vs2,
450                    group_regs,
451                )?;
452                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
453                    program_counter,
454                    vd,
455                    vs1,
456                    group_regs,
457                )?;
458                if !vm && vd == VReg::V0 {
459                    ::core::hint::cold_path();
460                    return Err(ExecutionError::IllegalInstruction {
461                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
462                    });
463                }
464                let sew = vtype.vsew();
465                let vlmax = ext_state.vlmax_for_vtype(vtype);
466                // SAFETY: all alignment and overlap constraints verified above; vl <= VLMAX.
467                unsafe {
468                    zvexx_perm_helpers::execute_rgather_vv(ext_state, vd, vs2, vs1, vm, sew, vlmax);
469                }
470            }
471            // vrgather.vx vd, vs2, rs1: _, vm
472            // All active elements of vd get vs2[rs1] if rs1 < VLMAX, else 0.
473            // vd must not overlap vs2.
474            Self::VrgatherVx {
475                vd,
476                vs2,
477                rs1: _,
478                vm,
479            } => {
480                if !ext_state.vector_instructions_allowed() {
481                    ::core::hint::cold_path();
482                    return Err(ExecutionError::IllegalInstruction {
483                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
484                    });
485                }
486                let Some(vtype) = ext_state.vtype() else {
487                    ::core::hint::cold_path();
488                    return Err(ExecutionError::IllegalInstruction {
489                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
490                    });
491                };
492                let group_regs = vtype.vlmul().register_count();
493                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
494                    program_counter,
495                    vd,
496                    group_regs,
497                )?;
498                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
499                    program_counter,
500                    vs2,
501                    group_regs,
502                )?;
503                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
504                    program_counter,
505                    vd,
506                    vs2,
507                    group_regs,
508                )?;
509                if !vm && vd == VReg::V0 {
510                    ::core::hint::cold_path();
511                    return Err(ExecutionError::IllegalInstruction {
512                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
513                    });
514                }
515                let sew = vtype.vsew();
516                let vlmax = ext_state.vlmax_for_vtype(vtype);
517                let index = rs1_value.as_u64();
518                // SAFETY: alignment and no-overlap verified; vl <= VLMAX.
519                unsafe {
520                    zvexx_perm_helpers::execute_rgather_scalar(
521                        ext_state, vd, vs2, vm, sew, vlmax, index,
522                    );
523                }
524            }
525            // vrgather.vi vd, vs2, uimm, vm
526            // Same as vrgather.vx but index is a 5-bit unsigned immediate.
527            Self::VrgatherVi { vd, vs2, uimm, vm } => {
528                if !ext_state.vector_instructions_allowed() {
529                    ::core::hint::cold_path();
530                    return Err(ExecutionError::IllegalInstruction {
531                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
532                    });
533                }
534                let Some(vtype) = ext_state.vtype() else {
535                    ::core::hint::cold_path();
536                    return Err(ExecutionError::IllegalInstruction {
537                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
538                    });
539                };
540                let group_regs = vtype.vlmul().register_count();
541                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
542                    program_counter,
543                    vd,
544                    group_regs,
545                )?;
546                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
547                    program_counter,
548                    vs2,
549                    group_regs,
550                )?;
551                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
552                    program_counter,
553                    vd,
554                    vs2,
555                    group_regs,
556                )?;
557                if !vm && vd == VReg::V0 {
558                    ::core::hint::cold_path();
559                    return Err(ExecutionError::IllegalInstruction {
560                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
561                    });
562                }
563                let sew = vtype.vsew();
564                let vlmax = ext_state.vlmax_for_vtype(vtype);
565                let index = u64::from(uimm);
566                // SAFETY: same as VrgatherVx.
567                unsafe {
568                    zvexx_perm_helpers::execute_rgather_scalar(
569                        ext_state, vd, vs2, vm, sew, vlmax, index,
570                    );
571                }
572            }
573            // vrgatherei16.vv vd, vs2, vs1, vm
574            // Like vrgather.vv but vs1 always uses EEW=16 (regardless of SEW).
575            // EMUL_vs1 = (16 / SEW) * LMUL; must be in [1/8, 8] else illegal.
576            // vd must not overlap vs1 or vs2.
577            Self::Vrgatherei16Vv { vd, vs2, vs1, vm } => {
578                if !ext_state.vector_instructions_allowed() {
579                    ::core::hint::cold_path();
580                    return Err(ExecutionError::IllegalInstruction {
581                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
582                    });
583                }
584                let Some(vtype) = ext_state.vtype() else {
585                    ::core::hint::cold_path();
586                    return Err(ExecutionError::IllegalInstruction {
587                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
588                    });
589                };
590                let group_regs = vtype.vlmul().register_count();
591                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
592                    program_counter,
593                    vd,
594                    group_regs,
595                )?;
596                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
597                    program_counter,
598                    vs2,
599                    group_regs,
600                )?;
601                // Compute EMUL for vs1 index register (EEW=16).
602                let index_group_regs = vtype
603                    .vlmul()
604                    .index_register_count(
605                        ab_riscv_primitives::instructions::v::Eew::E16,
606                        vtype.vsew(),
607                    )
608                    .ok_or(ExecutionError::IllegalInstruction {
609                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
610                    })?;
611                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
612                    program_counter,
613                    vs1,
614                    index_group_regs,
615                )?;
616                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
617                    program_counter,
618                    vd,
619                    vs2,
620                    group_regs,
621                )?;
622                // vd and vs1 have different group sizes (group_regs vs index_group_regs),
623                // so the symmetric helper would use the wrong size for one of the intervals.
624                zvexx_perm_helpers::check_no_overlap_asymmetric::<Reg, _, _, _>(
625                    program_counter,
626                    vd,
627                    group_regs,
628                    vs1,
629                    index_group_regs,
630                )?;
631                if !vm && vd == VReg::V0 {
632                    ::core::hint::cold_path();
633                    return Err(ExecutionError::IllegalInstruction {
634                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
635                    });
636                }
637                let sew = vtype.vsew();
638                let vlmax = ext_state.vlmax_for_vtype(vtype);
639                // SAFETY: all alignment and overlap constraints verified; vl <= VLMAX;
640                // vs1 uses EEW=16 with computed index_group_regs.
641                unsafe {
642                    zvexx_perm_helpers::execute_rgatherei16(
643                        ext_state,
644                        vd,
645                        vs2,
646                        vs1,
647                        vm,
648                        sew,
649                        vlmax,
650                        index_group_regs,
651                    );
652                }
653            }
654            // vmerge.vvm / vmv.v.v
655            // When vm=true: vmv.v.v vd, vs1 - broadcast all active elements from vs1.
656            //   vs2 is ignored; no overlap restriction on vd/vs2.
657            // When vm=false: vmerge.vvm vd, vs2, vs1, v0
658            //   vd[i] = v0[i] ? vs1[i] : vs2[i]
659            //   vd must not overlap v0 (mask source).
660            Self::VmergeVvm { vd, vs2, vs1, vm } => {
661                if !ext_state.vector_instructions_allowed() {
662                    ::core::hint::cold_path();
663                    return Err(ExecutionError::IllegalInstruction {
664                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
665                    });
666                }
667                let Some(vtype) = ext_state.vtype() else {
668                    ::core::hint::cold_path();
669                    return Err(ExecutionError::IllegalInstruction {
670                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
671                    });
672                };
673                let group_regs = vtype.vlmul().register_count();
674                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
675                    program_counter,
676                    vd,
677                    group_regs,
678                )?;
679                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
680                    program_counter,
681                    vs1,
682                    group_regs,
683                )?;
684                if !vm {
685                    // vmerge: vs2 is read, vd must not overlap v0
686                    zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
687                        program_counter,
688                        vs2,
689                        group_regs,
690                    )?;
691                    zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
692                        program_counter,
693                        vd,
694                        VReg::V0,
695                        group_regs,
696                    )?;
697                }
698                let sew = vtype.vsew();
699                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
700                unsafe {
701                    zvexx_perm_helpers::execute_merge_vv(ext_state, vd, vs2, vs1, vm, sew);
702                }
703            }
704            // vmerge.vxm / vmv.v.x
705            // When vm=true: vmv.v.x vd, rs1 - broadcast scalar to all active elements.
706            // When vm=false: vmerge.vxm - vd[i] = v0[i] ? rs1 : vs2[i]
707            Self::VmergeVxm {
708                vd,
709                vs2,
710                rs1: _,
711                vm,
712            } => {
713                if !ext_state.vector_instructions_allowed() {
714                    ::core::hint::cold_path();
715                    return Err(ExecutionError::IllegalInstruction {
716                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
717                    });
718                }
719                let Some(vtype) = ext_state.vtype() else {
720                    ::core::hint::cold_path();
721                    return Err(ExecutionError::IllegalInstruction {
722                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
723                    });
724                };
725                let group_regs = vtype.vlmul().register_count();
726                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
727                    program_counter,
728                    vd,
729                    group_regs,
730                )?;
731                if !vm {
732                    zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
733                        program_counter,
734                        vs2,
735                        group_regs,
736                    )?;
737                    zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
738                        program_counter,
739                        vd,
740                        VReg::V0,
741                        group_regs,
742                    )?;
743                }
744                let sew = vtype.vsew();
745                let scalar = rs1_value.as_i64().cast_unsigned();
746                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
747                unsafe {
748                    zvexx_perm_helpers::execute_merge_scalar(ext_state, vd, vs2, vm, sew, scalar);
749                }
750            }
751            // vmerge.vim / vmv.v.i
752            // When vm=true: vmv.v.i vd, simm5 - broadcast sign-extended immediate.
753            // When vm=false: vmerge.vim - vd[i] = v0[i] ? simm5 : vs2[i]
754            Self::VmergeVim { vd, vs2, simm5, vm } => {
755                if !ext_state.vector_instructions_allowed() {
756                    ::core::hint::cold_path();
757                    return Err(ExecutionError::IllegalInstruction {
758                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
759                    });
760                }
761                let Some(vtype) = ext_state.vtype() else {
762                    ::core::hint::cold_path();
763                    return Err(ExecutionError::IllegalInstruction {
764                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
765                    });
766                };
767                let group_regs = vtype.vlmul().register_count();
768                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
769                    program_counter,
770                    vd,
771                    group_regs,
772                )?;
773                if !vm {
774                    zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
775                        program_counter,
776                        vs2,
777                        group_regs,
778                    )?;
779                    zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
780                        program_counter,
781                        vd,
782                        VReg::V0,
783                        group_regs,
784                    )?;
785                }
786                let sew = vtype.vsew();
787                // Sign-extend imm to u64 so the low sew_bytes are correct for all SEW.
788                let scalar = i64::from(simm5).cast_unsigned();
789                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
790                unsafe {
791                    zvexx_perm_helpers::execute_merge_scalar(ext_state, vd, vs2, vm, sew, scalar);
792                }
793            }
794            // vcompress.vm vd, vs2, vs1
795            // Packs active elements of vs2 (where vs1 mask bit is set) sequentially into vd.
796            // Always unmasked (vm=1 in encoding); vs1 is the explicit mask operand.
797            // vd must not overlap vs1 or vs2.
798            Self::VcompressVm { vd, vs2, vs1 } => {
799                if !ext_state.vector_instructions_allowed() {
800                    ::core::hint::cold_path();
801                    return Err(ExecutionError::IllegalInstruction {
802                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
803                    });
804                }
805                let Some(vtype) = ext_state.vtype() else {
806                    ::core::hint::cold_path();
807                    return Err(ExecutionError::IllegalInstruction {
808                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
809                    });
810                };
811                // Spec §16.5: vstart must be zero.
812                if ext_state.vstart() != 0 {
813                    ::core::hint::cold_path();
814                    return Err(ExecutionError::IllegalInstruction {
815                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
816                    });
817                }
818                let group_regs = vtype.vlmul().register_count();
819                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
820                    program_counter,
821                    vd,
822                    group_regs,
823                )?;
824                zvexx_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
825                    program_counter,
826                    vs2,
827                    group_regs,
828                )?;
829                // vs1 is always a single mask register (no LMUL grouping)
830                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(
831                    program_counter,
832                    vd,
833                    vs2,
834                    group_regs,
835                )?;
836                // vs1 is a mask register; check it doesn't overlap vd
837                zvexx_perm_helpers::check_no_overlap::<Reg, _, _, _>(program_counter, vd, vs1, 1)?;
838                let sew = vtype.vsew();
839                let vl = ext_state.vl();
840                unsafe {
841                    zvexx_perm_helpers::execute_compress(ext_state, vd, vs2, vs1, vl, sew);
842                }
843            }
844            // vmv1r.v vd, vs2
845            // Whole register move: copies 1 register.
846            // No masking, no vtype/vl dependency.
847            Self::Vmv1rV { vd, vs2 } => {
848                if !ext_state.vector_instructions_allowed() {
849                    ::core::hint::cold_path();
850                    return Err(ExecutionError::IllegalInstruction {
851                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
852                    });
853                }
854                // SAFETY: both vd.to_bits() and vs2.to_bits() are always in [0, 32) by VReg
855                // invariant; copying 1 register always fits.
856                unsafe {
857                    zvexx_perm_helpers::execute_whole_reg_move::<1, _>(
858                        ext_state.write_vregs(),
859                        vd,
860                        vs2,
861                    );
862                }
863                ext_state.mark_vs_dirty();
864                ext_state.reset_vstart();
865            }
866            // vmv2r.v vd, vs2
867            // Whole register move: copies 2 registers.
868            // vd and vs2 must be aligned to 2 (checked here per spec §17.6).
869            Self::Vmv2rV { vd, vs2 } => {
870                if !ext_state.vector_instructions_allowed() {
871                    ::core::hint::cold_path();
872                    return Err(ExecutionError::IllegalInstruction {
873                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
874                    });
875                }
876                if !vd.to_bits().is_multiple_of(2) || !vs2.to_bits().is_multiple_of(2) {
877                    ::core::hint::cold_path();
878                    return Err(ExecutionError::IllegalInstruction {
879                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
880                    });
881                }
882                // SAFETY: alignment verified; 2 registers from aligned base always stay in [0, 32).
883                unsafe {
884                    zvexx_perm_helpers::execute_whole_reg_move::<2, _>(
885                        ext_state.write_vregs(),
886                        vd,
887                        vs2,
888                    );
889                }
890                ext_state.mark_vs_dirty();
891                ext_state.reset_vstart();
892            }
893            // vmv4r.v vd, vs2
894            // Whole register move: copies 4 registers.
895            Self::Vmv4rV { vd, vs2 } => {
896                if !ext_state.vector_instructions_allowed() {
897                    ::core::hint::cold_path();
898                    return Err(ExecutionError::IllegalInstruction {
899                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
900                    });
901                }
902                if !vd.to_bits().is_multiple_of(4) || !vs2.to_bits().is_multiple_of(4) {
903                    ::core::hint::cold_path();
904                    return Err(ExecutionError::IllegalInstruction {
905                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
906                    });
907                }
908                // SAFETY: alignment verified; 4 registers from aligned base always stay in [0, 32).
909                unsafe {
910                    zvexx_perm_helpers::execute_whole_reg_move::<4, _>(
911                        ext_state.write_vregs(),
912                        vd,
913                        vs2,
914                    );
915                }
916                ext_state.mark_vs_dirty();
917                ext_state.reset_vstart();
918            }
919            // vmv8r.v vd, vs2
920            // Whole register move: copies 8 registers.
921            Self::Vmv8rV { vd, vs2 } => {
922                if !ext_state.vector_instructions_allowed() {
923                    ::core::hint::cold_path();
924                    return Err(ExecutionError::IllegalInstruction {
925                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
926                    });
927                }
928                if !vd.to_bits().is_multiple_of(8) || !vs2.to_bits().is_multiple_of(8) {
929                    ::core::hint::cold_path();
930                    return Err(ExecutionError::IllegalInstruction {
931                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
932                    });
933                }
934                // SAFETY: alignment verified; 8 registers from aligned base always stay in [0, 32).
935                unsafe {
936                    zvexx_perm_helpers::execute_whole_reg_move::<8, _>(
937                        ext_state.write_vregs(),
938                        vd,
939                        vs2,
940                    );
941                }
942                ext_state.mark_vs_dirty();
943                ext_state.reset_vstart();
944            }
945        }
946
947        Ok(ControlFlow::Continue(Default::default()))
948    }
949}