Skip to main content

ab_riscv_interpreter/v/zve64x/
perm.rs

1//! Zve64x permutation instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zve64x_perm_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::zve64x_helpers;
9use crate::{
10    ExecutableInstruction, ExecutableInstructionCsr, ExecutableInstructionOperands, ExecutionError,
11    ProgramCounter, RegisterFile, Rs1Rs2OperandValues, Rs1Rs2Operands, VirtualMemory,
12};
13use ab_riscv_macros::instruction_execution;
14use ab_riscv_primitives::prelude::*;
15use core::fmt;
16use core::ops::ControlFlow;
17
18#[instruction_execution]
19impl<Reg> ExecutableInstructionOperands for Zve64xPermInstruction<Reg> where Reg: Register {}
20
21#[instruction_execution]
22impl<Reg, ExtState, CustomError> ExecutableInstructionCsr<ExtState, CustomError>
23    for Zve64xPermInstruction<Reg>
24where
25    Reg: Register,
26{
27}
28
29#[instruction_execution]
30impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
31    ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
32    for Zve64xPermInstruction<Reg>
33where
34    Reg: Register,
35    Regs: RegisterFile<Reg>,
36    ExtState: VectorRegistersExt<Reg, CustomError>,
37    [(); ExtState::ELEN as usize]:,
38    [(); ExtState::VLEN as usize]:,
39    [(); ExtState::VLENB as usize]:,
40    Memory: VirtualMemory,
41    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
42    CustomError: fmt::Debug,
43{
44    #[inline(always)]
45    fn execute(
46        self,
47        Rs1Rs2OperandValues {
48            rs1_value,
49            rs2_value: _,
50        }: Rs1Rs2OperandValues<<Self::Reg as Register>::Type>,
51        regs: &mut Regs,
52        ext_state: &mut ExtState,
53        _memory: &mut Memory,
54        program_counter: &mut PC,
55        _system_instruction_handler: &mut InstructionHandler,
56    ) -> Result<
57        ControlFlow<(), (Self::Reg, <Self::Reg as Register>::Type)>,
58        ExecutionError<Reg::Type, CustomError>,
59    > {
60        match self {
61            // vmv.x.s rd, vs2
62            // Copies sign-extended element 0 of vs2 (at current SEW) to GPR rd.
63            // Requires valid vtype (needs SEW to know element width).
64            // Does not use vl or masking; always reads element 0.
65            // Resets vstart per spec §6.3.
66            Self::VmvXS { rd, vs2 } => {
67                if !ext_state.vector_instructions_allowed() {
68                    Err(ExecutionError::IllegalInstruction {
69                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
70                    })?;
71                }
72                let vtype = ext_state
73                    .vtype()
74                    .ok_or(ExecutionError::IllegalInstruction {
75                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
76                    })?;
77                let sew = vtype.vsew();
78                // SAFETY: element 0 is always within register v(vs2_base), byte offset 0;
79                // VLENB >= sew.bytes() for all legal vtype configurations.
80                let raw = unsafe {
81                    zve64x_perm_helpers::read_element_0_u64(ext_state.read_vreg(), vs2.bits(), sew)
82                };
83                let sign_extended = zve64x_perm_helpers::sign_extend_to_reg::<Reg>(raw, sew);
84                regs.write(rd, sign_extended);
85                ext_state.mark_vs_dirty();
86                ext_state.reset_vstart();
87            }
88            // vmv.s.x vd, rs1
89            // Copies scalar GPR rs1 (zero-extended / truncated to SEW) into element 0 of vd.
90            // When vl == 0, the write is suppressed but vstart is still reset.
91            // Resets vstart per spec §6.3.
92            Self::VmvSX { vd, rs1: _ } => {
93                if !ext_state.vector_instructions_allowed() {
94                    Err(ExecutionError::IllegalInstruction {
95                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
96                    })?;
97                }
98                let vtype = ext_state
99                    .vtype()
100                    .ok_or(ExecutionError::IllegalInstruction {
101                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
102                    })?;
103                let sew = vtype.vsew();
104                let vl = ext_state.vl();
105                let vstart = u32::from(ext_state.vstart());
106                // Per spec §16.1: update only when vstart < vl.
107                if vstart < vl {
108                    let scalar = rs1_value.as_u64();
109                    // SAFETY: element 0 always fits.
110                    unsafe {
111                        zve64x_perm_helpers::write_element_0_u64(
112                            ext_state.write_vreg(),
113                            vd.bits(),
114                            sew,
115                            scalar,
116                        );
117                    }
118                }
119                ext_state.mark_vs_dirty();
120                ext_state.reset_vstart();
121            }
122            // vslideup.vx vd, vs2, rs1: _, vm
123            // Slides elements of vs2 up by the scalar offset in rs1.
124            // Elements vd[0..offset] are unchanged (tail-undisturbed for those positions).
125            // Elements vd[i] for offset <= i < vl get vs2[i - offset].
126            // Per spec §16.3.1: vd must not overlap vs2.
127            Self::VslideupVx {
128                vd,
129                vs2,
130                rs1: _,
131                vm,
132            } => {
133                if !ext_state.vector_instructions_allowed() {
134                    Err(ExecutionError::IllegalInstruction {
135                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
136                    })?;
137                }
138                let vtype = ext_state
139                    .vtype()
140                    .ok_or(ExecutionError::IllegalInstruction {
141                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
142                    })?;
143                let group_regs = vtype.vlmul().register_count();
144                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
145                    program_counter,
146                    vd,
147                    group_regs,
148                )?;
149                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
150                    program_counter,
151                    vs2,
152                    group_regs,
153                )?;
154                // vd must not overlap vs2
155                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
156                    program_counter,
157                    vd,
158                    vs2,
159                    group_regs,
160                )?;
161                if !vm && vd.bits() == 0 {
162                    Err(ExecutionError::IllegalInstruction {
163                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
164                    })?;
165                }
166                let sew = vtype.vsew();
167                let vl = ext_state.vl();
168                let vstart = u32::from(ext_state.vstart());
169                let offset = rs1_value.as_u64();
170                // SAFETY: alignment and no-overlap verified above; vl <= VLMAX.
171                unsafe {
172                    zve64x_perm_helpers::execute_slideup(
173                        ext_state, vd, vs2, vm, vl, vstart, sew, offset,
174                    );
175                }
176            }
177            // vslideup.vi vd, vs2, uimm, vm
178            // Same as vslideup.vx but offset is a 5-bit unsigned immediate.
179            Self::VslideupVi { vd, vs2, uimm, vm } => {
180                if !ext_state.vector_instructions_allowed() {
181                    Err(ExecutionError::IllegalInstruction {
182                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
183                    })?;
184                }
185                let vtype = ext_state
186                    .vtype()
187                    .ok_or(ExecutionError::IllegalInstruction {
188                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
189                    })?;
190                let group_regs = vtype.vlmul().register_count();
191                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
192                    program_counter,
193                    vd,
194                    group_regs,
195                )?;
196                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
197                    program_counter,
198                    vs2,
199                    group_regs,
200                )?;
201                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
202                    program_counter,
203                    vd,
204                    vs2,
205                    group_regs,
206                )?;
207                if !vm && vd.bits() == 0 {
208                    Err(ExecutionError::IllegalInstruction {
209                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
210                    })?;
211                }
212                let sew = vtype.vsew();
213                let vl = ext_state.vl();
214                let vstart = u32::from(ext_state.vstart());
215                let offset = u64::from(uimm);
216                // SAFETY: same as VslideupVx.
217                unsafe {
218                    zve64x_perm_helpers::execute_slideup(
219                        ext_state, vd, vs2, vm, vl, vstart, sew, offset,
220                    );
221                }
222            }
223            // vslidedown.vx vd, vs2, rs1: _, vm
224            // Element vd[i] = vs2[i + offset] if i + offset < VLMAX, else 0.
225            // vd may overlap vs2 for slidedown.
226            Self::VslidedownVx {
227                vd,
228                vs2,
229                rs1: _,
230                vm,
231            } => {
232                if !ext_state.vector_instructions_allowed() {
233                    Err(ExecutionError::IllegalInstruction {
234                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
235                    })?;
236                }
237                let vtype = ext_state
238                    .vtype()
239                    .ok_or(ExecutionError::IllegalInstruction {
240                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
241                    })?;
242                let group_regs = vtype.vlmul().register_count();
243                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
244                    program_counter,
245                    vd,
246                    group_regs,
247                )?;
248                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
249                    program_counter,
250                    vs2,
251                    group_regs,
252                )?;
253                if !vm && vd.bits() == 0 {
254                    Err(ExecutionError::IllegalInstruction {
255                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
256                    })?;
257                }
258                let sew = vtype.vsew();
259                let vl = ext_state.vl();
260                let vstart = u32::from(ext_state.vstart());
261                let vlmax = ext_state.vlmax_for_vtype(vtype);
262                let offset = rs1_value.as_u64();
263                // SAFETY: alignment verified above; vl <= VLMAX; offset clamped in helper.
264                unsafe {
265                    zve64x_perm_helpers::execute_slidedown(
266                        ext_state, vd, vs2, vm, vl, vstart, sew, vlmax, offset,
267                    );
268                }
269            }
270            // vslidedown.vi vd, vs2, uimm, vm
271            // Same as vslidedown.vx but offset is a 5-bit unsigned immediate.
272            Self::VslidedownVi { vd, vs2, uimm, vm } => {
273                if !ext_state.vector_instructions_allowed() {
274                    Err(ExecutionError::IllegalInstruction {
275                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
276                    })?;
277                }
278                let vtype = ext_state
279                    .vtype()
280                    .ok_or(ExecutionError::IllegalInstruction {
281                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
282                    })?;
283                let group_regs = vtype.vlmul().register_count();
284                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
285                    program_counter,
286                    vd,
287                    group_regs,
288                )?;
289                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
290                    program_counter,
291                    vs2,
292                    group_regs,
293                )?;
294                if !vm && vd.bits() == 0 {
295                    Err(ExecutionError::IllegalInstruction {
296                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
297                    })?;
298                }
299                let sew = vtype.vsew();
300                let vl = ext_state.vl();
301                let vstart = u32::from(ext_state.vstart());
302                let vlmax = ext_state.vlmax_for_vtype(vtype);
303                let offset = u64::from(uimm);
304                // SAFETY: same as VslidedownVx.
305                unsafe {
306                    zve64x_perm_helpers::execute_slidedown(
307                        ext_state, vd, vs2, vm, vl, vstart, sew, vlmax, offset,
308                    );
309                }
310            }
311            // vslide1up.vx vd, vs2, rs1: _, vm
312            // Element 0 of vd gets the scalar value rs1 (written at SEW width).
313            // Elements vd[i] for 1 <= i < vl get vs2[i - 1].
314            // vd must not overlap vs2.
315            Self::Vslide1upVx {
316                vd,
317                vs2,
318                rs1: _,
319                vm,
320            } => {
321                if !ext_state.vector_instructions_allowed() {
322                    Err(ExecutionError::IllegalInstruction {
323                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
324                    })?;
325                }
326                let vtype = ext_state
327                    .vtype()
328                    .ok_or(ExecutionError::IllegalInstruction {
329                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
330                    })?;
331                let group_regs = vtype.vlmul().register_count();
332                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
333                    program_counter,
334                    vd,
335                    group_regs,
336                )?;
337                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
338                    program_counter,
339                    vs2,
340                    group_regs,
341                )?;
342                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
343                    program_counter,
344                    vd,
345                    vs2,
346                    group_regs,
347                )?;
348                if !vm && vd.bits() == 0 {
349                    Err(ExecutionError::IllegalInstruction {
350                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
351                    })?;
352                }
353                let sew = vtype.vsew();
354                let vl = ext_state.vl();
355                let vstart = u32::from(ext_state.vstart());
356                let scalar = rs1_value.as_u64();
357                // SAFETY: alignment and no-overlap verified; vl <= VLMAX.
358                unsafe {
359                    zve64x_perm_helpers::execute_slide1up(
360                        ext_state, vd, vs2, vm, vl, vstart, sew, scalar,
361                    );
362                }
363            }
364            // vslide1down.vx vd, vs2, rs1: _, vm
365            // Element vd[i] = vs2[i + 1] for 0 <= i < vl - 1.
366            // Element vd[vl - 1] gets the scalar value rs1.
367            // vd may overlap vs2 for slide1down.
368            Self::Vslide1downVx {
369                vd,
370                vs2,
371                rs1: _,
372                vm,
373            } => {
374                if !ext_state.vector_instructions_allowed() {
375                    Err(ExecutionError::IllegalInstruction {
376                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
377                    })?;
378                }
379                let vtype = ext_state
380                    .vtype()
381                    .ok_or(ExecutionError::IllegalInstruction {
382                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
383                    })?;
384                let group_regs = vtype.vlmul().register_count();
385                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
386                    program_counter,
387                    vd,
388                    group_regs,
389                )?;
390                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
391                    program_counter,
392                    vs2,
393                    group_regs,
394                )?;
395                if !vm && vd.bits() == 0 {
396                    Err(ExecutionError::IllegalInstruction {
397                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
398                    })?;
399                }
400                let sew = vtype.vsew();
401                let vl = ext_state.vl();
402                let vstart = u32::from(ext_state.vstart());
403                let scalar = rs1_value.as_u64();
404                // SAFETY: alignment verified; vl <= VLMAX; overlap permitted by spec.
405                unsafe {
406                    zve64x_perm_helpers::execute_slide1down(
407                        ext_state, vd, vs2, vm, vl, vstart, sew, scalar,
408                    );
409                }
410            }
411            // vrgather.vv vd, vs2, vs1, vm
412            // vd[i] = (vs1[i] < VLMAX) ? vs2[vs1[i]] : 0
413            // vd must not overlap vs1 or vs2.
414            Self::VrgatherVv { vd, vs2, vs1, vm } => {
415                if !ext_state.vector_instructions_allowed() {
416                    Err(ExecutionError::IllegalInstruction {
417                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
418                    })?;
419                }
420                let vtype = ext_state
421                    .vtype()
422                    .ok_or(ExecutionError::IllegalInstruction {
423                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
424                    })?;
425                let group_regs = vtype.vlmul().register_count();
426                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
427                    program_counter,
428                    vd,
429                    group_regs,
430                )?;
431                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
432                    program_counter,
433                    vs2,
434                    group_regs,
435                )?;
436                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
437                    program_counter,
438                    vs1,
439                    group_regs,
440                )?;
441                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
442                    program_counter,
443                    vd,
444                    vs2,
445                    group_regs,
446                )?;
447                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
448                    program_counter,
449                    vd,
450                    vs1,
451                    group_regs,
452                )?;
453                if !vm && vd.bits() == 0 {
454                    Err(ExecutionError::IllegalInstruction {
455                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
456                    })?;
457                }
458                let sew = vtype.vsew();
459                let vl = ext_state.vl();
460                let vstart = u32::from(ext_state.vstart());
461                let vlmax = ext_state.vlmax_for_vtype(vtype);
462                // SAFETY: all alignment and overlap constraints verified above; vl <= VLMAX.
463                unsafe {
464                    zve64x_perm_helpers::execute_rgather_vv(
465                        ext_state, vd, vs2, vs1, vm, vl, vstart, sew, vlmax,
466                    );
467                }
468            }
469            // vrgather.vx vd, vs2, rs1: _, vm
470            // All active elements of vd get vs2[rs1] if rs1 < VLMAX, else 0.
471            // vd must not overlap vs2.
472            Self::VrgatherVx {
473                vd,
474                vs2,
475                rs1: _,
476                vm,
477            } => {
478                if !ext_state.vector_instructions_allowed() {
479                    Err(ExecutionError::IllegalInstruction {
480                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
481                    })?;
482                }
483                let vtype = ext_state
484                    .vtype()
485                    .ok_or(ExecutionError::IllegalInstruction {
486                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
487                    })?;
488                let group_regs = vtype.vlmul().register_count();
489                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
490                    program_counter,
491                    vd,
492                    group_regs,
493                )?;
494                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
495                    program_counter,
496                    vs2,
497                    group_regs,
498                )?;
499                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
500                    program_counter,
501                    vd,
502                    vs2,
503                    group_regs,
504                )?;
505                if !vm && vd.bits() == 0 {
506                    Err(ExecutionError::IllegalInstruction {
507                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
508                    })?;
509                }
510                let sew = vtype.vsew();
511                let vl = ext_state.vl();
512                let vstart = u32::from(ext_state.vstart());
513                let vlmax = ext_state.vlmax_for_vtype(vtype);
514                let index = rs1_value.as_u64();
515                // SAFETY: alignment and no-overlap verified; vl <= VLMAX.
516                unsafe {
517                    zve64x_perm_helpers::execute_rgather_scalar(
518                        ext_state, vd, vs2, vm, vl, vstart, sew, vlmax, index,
519                    );
520                }
521            }
522            // vrgather.vi vd, vs2, uimm, vm
523            // Same as vrgather.vx but index is a 5-bit unsigned immediate.
524            Self::VrgatherVi { vd, vs2, uimm, vm } => {
525                if !ext_state.vector_instructions_allowed() {
526                    Err(ExecutionError::IllegalInstruction {
527                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
528                    })?;
529                }
530                let vtype = ext_state
531                    .vtype()
532                    .ok_or(ExecutionError::IllegalInstruction {
533                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
534                    })?;
535                let group_regs = vtype.vlmul().register_count();
536                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
537                    program_counter,
538                    vd,
539                    group_regs,
540                )?;
541                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
542                    program_counter,
543                    vs2,
544                    group_regs,
545                )?;
546                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
547                    program_counter,
548                    vd,
549                    vs2,
550                    group_regs,
551                )?;
552                if !vm && vd.bits() == 0 {
553                    Err(ExecutionError::IllegalInstruction {
554                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
555                    })?;
556                }
557                let sew = vtype.vsew();
558                let vl = ext_state.vl();
559                let vstart = u32::from(ext_state.vstart());
560                let vlmax = ext_state.vlmax_for_vtype(vtype);
561                let index = u64::from(uimm);
562                // SAFETY: same as VrgatherVx.
563                unsafe {
564                    zve64x_perm_helpers::execute_rgather_scalar(
565                        ext_state, vd, vs2, vm, vl, vstart, sew, vlmax, index,
566                    );
567                }
568            }
569            // vrgatherei16.vv vd, vs2, vs1, vm
570            // Like vrgather.vv but vs1 always uses EEW=16 (regardless of SEW).
571            // EMUL_vs1 = (16 / SEW) * LMUL; must be in [1/8, 8] else illegal.
572            // vd must not overlap vs1 or vs2.
573            Self::Vrgatherei16Vv { vd, vs2, vs1, vm } => {
574                if !ext_state.vector_instructions_allowed() {
575                    Err(ExecutionError::IllegalInstruction {
576                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
577                    })?;
578                }
579                let vtype = ext_state
580                    .vtype()
581                    .ok_or(ExecutionError::IllegalInstruction {
582                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
583                    })?;
584                let group_regs = vtype.vlmul().register_count();
585                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
586                    program_counter,
587                    vd,
588                    group_regs,
589                )?;
590                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
591                    program_counter,
592                    vs2,
593                    group_regs,
594                )?;
595                // Compute EMUL for vs1 index register (EEW=16).
596                let index_group_regs = vtype
597                    .vlmul()
598                    .index_register_count(
599                        ab_riscv_primitives::instructions::v::Eew::E16,
600                        vtype.vsew(),
601                    )
602                    .ok_or(ExecutionError::IllegalInstruction {
603                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
604                    })?;
605                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
606                    program_counter,
607                    vs1,
608                    index_group_regs,
609                )?;
610                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
611                    program_counter,
612                    vd,
613                    vs2,
614                    group_regs,
615                )?;
616                // vd and vs1 have different group sizes (group_regs vs index_group_regs),
617                // so the symmetric helper would use the wrong size for one of the intervals.
618                zve64x_perm_helpers::check_no_overlap_asymmetric::<Reg, _, _, _>(
619                    program_counter,
620                    vd,
621                    group_regs,
622                    vs1,
623                    index_group_regs,
624                )?;
625                if !vm && vd.bits() == 0 {
626                    Err(ExecutionError::IllegalInstruction {
627                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
628                    })?;
629                }
630                let sew = vtype.vsew();
631                let vl = ext_state.vl();
632                let vstart = u32::from(ext_state.vstart());
633                let vlmax = ext_state.vlmax_for_vtype(vtype);
634                // SAFETY: all alignment and overlap constraints verified; vl <= VLMAX;
635                // vs1 uses EEW=16 with computed index_group_regs.
636                unsafe {
637                    zve64x_perm_helpers::execute_rgatherei16(
638                        ext_state,
639                        vd,
640                        vs2,
641                        vs1,
642                        vm,
643                        vl,
644                        vstart,
645                        sew,
646                        vlmax,
647                        index_group_regs,
648                    );
649                }
650            }
651            // vmerge.vvm / vmv.v.v
652            // When vm=true: vmv.v.v vd, vs1 - broadcast all active elements from vs1.
653            //   vs2 is ignored; no overlap restriction on vd/vs2.
654            // When vm=false: vmerge.vvm vd, vs2, vs1, v0
655            //   vd[i] = v0[i] ? vs1[i] : vs2[i]
656            //   vd must not overlap v0 (mask source).
657            Self::VmergeVvm { vd, vs2, vs1, vm } => {
658                if !ext_state.vector_instructions_allowed() {
659                    Err(ExecutionError::IllegalInstruction {
660                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
661                    })?;
662                }
663                let vtype = ext_state
664                    .vtype()
665                    .ok_or(ExecutionError::IllegalInstruction {
666                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
667                    })?;
668                let group_regs = vtype.vlmul().register_count();
669                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
670                    program_counter,
671                    vd,
672                    group_regs,
673                )?;
674                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
675                    program_counter,
676                    vs1,
677                    group_regs,
678                )?;
679                if !vm {
680                    // vmerge: vs2 is read, vd must not overlap v0
681                    zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
682                        program_counter,
683                        vs2,
684                        group_regs,
685                    )?;
686                    zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
687                        program_counter,
688                        vd,
689                        VReg::V0,
690                        group_regs,
691                    )?;
692                }
693                let sew = vtype.vsew();
694                let vl = ext_state.vl();
695                let vstart = u32::from(ext_state.vstart());
696                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
697                unsafe {
698                    zve64x_perm_helpers::execute_merge_vv(
699                        ext_state, vd, vs2, vs1, vm, vl, vstart, sew,
700                    );
701                }
702            }
703            // vmerge.vxm / vmv.v.x
704            // When vm=true: vmv.v.x vd, rs1 - broadcast scalar to all active elements.
705            // When vm=false: vmerge.vxm - vd[i] = v0[i] ? rs1 : vs2[i]
706            Self::VmergeVxm {
707                vd,
708                vs2,
709                rs1: _,
710                vm,
711            } => {
712                if !ext_state.vector_instructions_allowed() {
713                    Err(ExecutionError::IllegalInstruction {
714                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
715                    })?;
716                }
717                let vtype = ext_state
718                    .vtype()
719                    .ok_or(ExecutionError::IllegalInstruction {
720                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
721                    })?;
722                let group_regs = vtype.vlmul().register_count();
723                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
724                    program_counter,
725                    vd,
726                    group_regs,
727                )?;
728                if !vm {
729                    zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
730                        program_counter,
731                        vs2,
732                        group_regs,
733                    )?;
734                    zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
735                        program_counter,
736                        vd,
737                        VReg::V0,
738                        group_regs,
739                    )?;
740                }
741                let sew = vtype.vsew();
742                let vl = ext_state.vl();
743                let vstart = u32::from(ext_state.vstart());
744                let scalar = rs1_value.as_u64();
745                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
746                unsafe {
747                    zve64x_perm_helpers::execute_merge_scalar(
748                        ext_state, vd, vs2, vm, vl, vstart, sew, scalar,
749                    );
750                }
751            }
752            // vmerge.vim / vmv.v.i
753            // When vm=true: vmv.v.i vd, simm5 - broadcast sign-extended immediate.
754            // When vm=false: vmerge.vim - vd[i] = v0[i] ? simm5 : vs2[i]
755            Self::VmergeVim { vd, vs2, simm5, vm } => {
756                if !ext_state.vector_instructions_allowed() {
757                    Err(ExecutionError::IllegalInstruction {
758                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
759                    })?;
760                }
761                let vtype = ext_state
762                    .vtype()
763                    .ok_or(ExecutionError::IllegalInstruction {
764                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
765                    })?;
766                let group_regs = vtype.vlmul().register_count();
767                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
768                    program_counter,
769                    vd,
770                    group_regs,
771                )?;
772                if !vm {
773                    zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
774                        program_counter,
775                        vs2,
776                        group_regs,
777                    )?;
778                    zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
779                        program_counter,
780                        vd,
781                        VReg::V0,
782                        group_regs,
783                    )?;
784                }
785                let sew = vtype.vsew();
786                let vl = ext_state.vl();
787                let vstart = u32::from(ext_state.vstart());
788                // Sign-extend imm to u64 so the low sew_bytes are correct for all SEW.
789                let scalar = i64::from(simm5).cast_unsigned();
790                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
791                unsafe {
792                    zve64x_perm_helpers::execute_merge_scalar(
793                        ext_state, vd, vs2, vm, vl, vstart, sew, scalar,
794                    );
795                }
796            }
797            // vcompress.vm vd, vs2, vs1
798            // Packs active elements of vs2 (where vs1 mask bit is set) sequentially into vd.
799            // Always unmasked (vm=1 in encoding); vs1 is the explicit mask operand.
800            // vd must not overlap vs1 or vs2.
801            Self::VcompressVm { vd, vs2, vs1 } => {
802                if !ext_state.vector_instructions_allowed() {
803                    Err(ExecutionError::IllegalInstruction {
804                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
805                    })?;
806                }
807                let vtype = ext_state
808                    .vtype()
809                    .ok_or(ExecutionError::IllegalInstruction {
810                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
811                    })?;
812                // Spec §16.5: vstart must be zero.
813                if ext_state.vstart() != 0 {
814                    Err(ExecutionError::IllegalInstruction {
815                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
816                    })?;
817                }
818                let group_regs = vtype.vlmul().register_count();
819                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
820                    program_counter,
821                    vd,
822                    group_regs,
823                )?;
824                zve64x_perm_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
825                    program_counter,
826                    vs2,
827                    group_regs,
828                )?;
829                // vs1 is always a single mask register (no LMUL grouping)
830                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(
831                    program_counter,
832                    vd,
833                    vs2,
834                    group_regs,
835                )?;
836                // vs1 is a mask register; check it doesn't overlap vd
837                zve64x_perm_helpers::check_no_overlap::<Reg, _, _, _>(program_counter, vd, vs1, 1)?;
838                let sew = vtype.vsew();
839                let vl = ext_state.vl();
840                unsafe {
841                    zve64x_perm_helpers::execute_compress(ext_state, vd, vs2, vs1, vl, sew);
842                }
843            }
844            // vmv1r.v vd, vs2
845            // Whole register move: copies 1 register.
846            // No masking, no vtype/vl dependency.
847            Self::Vmv1rV { vd, vs2 } => {
848                if !ext_state.vector_instructions_allowed() {
849                    Err(ExecutionError::IllegalInstruction {
850                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
851                    })?;
852                }
853                // SAFETY: both vd.bits() and vs2.bits() are always in [0, 32) by VReg invariant;
854                // copying 1 register always fits.
855                unsafe {
856                    zve64x_perm_helpers::execute_whole_reg_move(
857                        ext_state.write_vreg(),
858                        vd.bits(),
859                        vs2.bits(),
860                        1,
861                    );
862                }
863                ext_state.mark_vs_dirty();
864                ext_state.reset_vstart();
865            }
866            // vmv2r.v vd, vs2
867            // Whole register move: copies 2 registers.
868            // vd and vs2 must be aligned to 2 (checked here per spec §17.6).
869            Self::Vmv2rV { vd, vs2 } => {
870                if !ext_state.vector_instructions_allowed() {
871                    Err(ExecutionError::IllegalInstruction {
872                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
873                    })?;
874                }
875                if !vd.bits().is_multiple_of(2) || !vs2.bits().is_multiple_of(2) {
876                    Err(ExecutionError::IllegalInstruction {
877                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
878                    })?;
879                }
880                // SAFETY: alignment verified; 2 registers from aligned base always stay in [0, 32).
881                unsafe {
882                    zve64x_perm_helpers::execute_whole_reg_move(
883                        ext_state.write_vreg(),
884                        vd.bits(),
885                        vs2.bits(),
886                        2,
887                    );
888                }
889                ext_state.mark_vs_dirty();
890                ext_state.reset_vstart();
891            }
892            // vmv4r.v vd, vs2
893            // Whole register move: copies 4 registers.
894            Self::Vmv4rV { vd, vs2 } => {
895                if !ext_state.vector_instructions_allowed() {
896                    Err(ExecutionError::IllegalInstruction {
897                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
898                    })?;
899                }
900                if !vd.bits().is_multiple_of(4) || !vs2.bits().is_multiple_of(4) {
901                    Err(ExecutionError::IllegalInstruction {
902                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
903                    })?;
904                }
905                // SAFETY: alignment verified; 4 registers from aligned base always stay in [0, 32).
906                unsafe {
907                    zve64x_perm_helpers::execute_whole_reg_move(
908                        ext_state.write_vreg(),
909                        vd.bits(),
910                        vs2.bits(),
911                        4,
912                    );
913                }
914                ext_state.mark_vs_dirty();
915                ext_state.reset_vstart();
916            }
917            // vmv8r.v vd, vs2
918            // Whole register move: copies 8 registers.
919            Self::Vmv8rV { vd, vs2 } => {
920                if !ext_state.vector_instructions_allowed() {
921                    Err(ExecutionError::IllegalInstruction {
922                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
923                    })?;
924                }
925                if !vd.bits().is_multiple_of(8) || !vs2.bits().is_multiple_of(8) {
926                    Err(ExecutionError::IllegalInstruction {
927                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
928                    })?;
929                }
930                // SAFETY: alignment verified; 8 registers from aligned base always stay in [0, 32).
931                unsafe {
932                    zve64x_perm_helpers::execute_whole_reg_move(
933                        ext_state.write_vreg(),
934                        vd.bits(),
935                        vs2.bits(),
936                        8,
937                    );
938                }
939                ext_state.mark_vs_dirty();
940                ext_state.reset_vstart();
941            }
942        }
943
944        Ok(ControlFlow::Continue(Default::default()))
945    }
946}