Skip to main content

ab_riscv_interpreter/v/zve64x/
perm.rs

1//! Zve64x permutation instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zve64x_perm_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::zve64x_helpers;
9use crate::{
10    ExecutableInstruction, ExecutionError, InterpreterState, ProgramCounter, VirtualMemory,
11};
12use ab_riscv_macros::instruction_execution;
13use ab_riscv_primitives::instructions::v::zve64x::perm::Zve64xPermInstruction;
14use ab_riscv_primitives::registers::general_purpose::{RegType, Register};
15use ab_riscv_primitives::registers::vector::VReg;
16use core::fmt;
17use core::ops::ControlFlow;
18
19#[instruction_execution]
20impl<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>
21    ExecutableInstruction<
22        InterpreterState<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>,
23        CustomError,
24    > for Zve64xPermInstruction<Reg>
25where
26    Reg: Register,
27    [(); Reg::N]:,
28    ExtState: VectorRegistersExt<Reg, CustomError>,
29    [(); ExtState::ELEN as usize]:,
30    [(); ExtState::VLEN as usize]:,
31    [(); ExtState::VLENB as usize]:,
32    Memory: VirtualMemory,
33    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
34    CustomError: fmt::Debug,
35{
36    #[inline(always)]
37    fn execute(
38        self,
39        state: &mut InterpreterState<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>,
40    ) -> Result<ControlFlow<()>, ExecutionError<Reg::Type, CustomError>> {
41        match self {
42            // vmv.x.s rd, vs2
43            // Copies sign-extended element 0 of vs2 (at current SEW) to GPR rd.
44            // Requires valid vtype (needs SEW to know element width).
45            // Does not use vl or masking; always reads element 0.
46            // Resets vstart per spec §6.3.
47            Self::VmvXS { rd, vs2 } => {
48                if !state.ext_state.vector_instructions_allowed() {
49                    Err(ExecutionError::IllegalInstruction {
50                        address: state
51                            .instruction_fetcher
52                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
53                    })?;
54                }
55                let vtype = state
56                    .ext_state
57                    .vtype()
58                    .ok_or(ExecutionError::IllegalInstruction {
59                        address: state
60                            .instruction_fetcher
61                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
62                    })?;
63                let sew = vtype.vsew();
64                // SAFETY: element 0 is always within register v(vs2_base), byte offset 0;
65                // VLENB >= sew.bytes() for all legal vtype configurations.
66                let raw = unsafe {
67                    zve64x_perm_helpers::read_element_0_u64(
68                        state.ext_state.read_vreg(),
69                        vs2.bits(),
70                        sew,
71                    )
72                };
73                let sign_extended = zve64x_perm_helpers::sign_extend_to_reg::<Reg>(raw, sew);
74                state.regs.write(rd, sign_extended);
75                state.ext_state.mark_vs_dirty();
76                state.ext_state.reset_vstart();
77            }
78            // vmv.s.x vd, rs1
79            // Copies scalar GPR rs1 (zero-extended / truncated to SEW) into element 0 of vd.
80            // When vl == 0, the write is suppressed but vstart is still reset.
81            // Resets vstart per spec §6.3.
82            Self::VmvSX { vd, rs1 } => {
83                if !state.ext_state.vector_instructions_allowed() {
84                    Err(ExecutionError::IllegalInstruction {
85                        address: state
86                            .instruction_fetcher
87                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
88                    })?;
89                }
90                let vtype = state
91                    .ext_state
92                    .vtype()
93                    .ok_or(ExecutionError::IllegalInstruction {
94                        address: state
95                            .instruction_fetcher
96                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
97                    })?;
98                let sew = vtype.vsew();
99                let vl = state.ext_state.vl();
100                // Per spec §16.1: if vl > 0 write element 0, otherwise no update.
101                if vl > 0 {
102                    let scalar = state.regs.read(rs1).as_u64();
103                    // SAFETY: element 0 always fits; same argument as VmvXS.
104                    unsafe {
105                        zve64x_perm_helpers::write_element_0_u64(
106                            state.ext_state.write_vreg(),
107                            vd.bits(),
108                            sew,
109                            scalar,
110                        );
111                    }
112                }
113                state.ext_state.mark_vs_dirty();
114                state.ext_state.reset_vstart();
115            }
116            // vslideup.vx vd, vs2, rs1, vm
117            // Slides elements of vs2 up by the scalar offset in rs1.
118            // Elements vd[0..offset] are unchanged (tail-undisturbed for those positions).
119            // Elements vd[i] for offset <= i < vl get vs2[i - offset].
120            // Per spec §16.3.1: vd must not overlap vs2.
121            Self::VslideupVx { vd, vs2, rs1, vm } => {
122                if !state.ext_state.vector_instructions_allowed() {
123                    Err(ExecutionError::IllegalInstruction {
124                        address: state
125                            .instruction_fetcher
126                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
127                    })?;
128                }
129                let vtype = state
130                    .ext_state
131                    .vtype()
132                    .ok_or(ExecutionError::IllegalInstruction {
133                        address: state
134                            .instruction_fetcher
135                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
136                    })?;
137                let group_regs = vtype.vlmul().register_count();
138                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
139                zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
140                // vd must not overlap vs2
141                zve64x_perm_helpers::check_no_overlap(state, vd, vs2, group_regs)?;
142                if !vm && vd.bits() == 0 {
143                    Err(ExecutionError::IllegalInstruction {
144                        address: state
145                            .instruction_fetcher
146                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
147                    })?;
148                }
149                let sew = vtype.vsew();
150                let vl = state.ext_state.vl();
151                let vstart = u32::from(state.ext_state.vstart());
152                let offset = state.regs.read(rs1).as_u64();
153                // SAFETY: alignment and no-overlap verified above; vl <= VLMAX.
154                unsafe {
155                    zve64x_perm_helpers::execute_slideup(
156                        state, vd, vs2, vm, vl, vstart, sew, offset,
157                    );
158                }
159            }
160            // vslideup.vi vd, vs2, uimm, vm
161            // Same as vslideup.vx but offset is a 5-bit unsigned immediate.
162            Self::VslideupVi { vd, vs2, uimm, vm } => {
163                if !state.ext_state.vector_instructions_allowed() {
164                    Err(ExecutionError::IllegalInstruction {
165                        address: state
166                            .instruction_fetcher
167                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
168                    })?;
169                }
170                let vtype = state
171                    .ext_state
172                    .vtype()
173                    .ok_or(ExecutionError::IllegalInstruction {
174                        address: state
175                            .instruction_fetcher
176                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
177                    })?;
178                let group_regs = vtype.vlmul().register_count();
179                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
180                zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
181                zve64x_perm_helpers::check_no_overlap(state, vd, vs2, group_regs)?;
182                if !vm && vd.bits() == 0 {
183                    Err(ExecutionError::IllegalInstruction {
184                        address: state
185                            .instruction_fetcher
186                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
187                    })?;
188                }
189                let sew = vtype.vsew();
190                let vl = state.ext_state.vl();
191                let vstart = u32::from(state.ext_state.vstart());
192                let offset = u64::from(uimm);
193                // SAFETY: same as VslideupVx.
194                unsafe {
195                    zve64x_perm_helpers::execute_slideup(
196                        state, vd, vs2, vm, vl, vstart, sew, offset,
197                    );
198                }
199            }
200            // vslidedown.vx vd, vs2, rs1, vm
201            // Element vd[i] = vs2[i + offset] if i + offset < VLMAX, else 0.
202            // vd may overlap vs2 for slidedown.
203            Self::VslidedownVx { vd, vs2, rs1, vm } => {
204                if !state.ext_state.vector_instructions_allowed() {
205                    Err(ExecutionError::IllegalInstruction {
206                        address: state
207                            .instruction_fetcher
208                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
209                    })?;
210                }
211                let vtype = state
212                    .ext_state
213                    .vtype()
214                    .ok_or(ExecutionError::IllegalInstruction {
215                        address: state
216                            .instruction_fetcher
217                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
218                    })?;
219                let group_regs = vtype.vlmul().register_count();
220                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
221                zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
222                if !vm && vd.bits() == 0 {
223                    Err(ExecutionError::IllegalInstruction {
224                        address: state
225                            .instruction_fetcher
226                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
227                    })?;
228                }
229                let sew = vtype.vsew();
230                let vl = state.ext_state.vl();
231                let vstart = u32::from(state.ext_state.vstart());
232                let vlmax = state.ext_state.vlmax_for_vtype(vtype);
233                let offset = state.regs.read(rs1).as_u64();
234                // SAFETY: alignment verified above; vl <= VLMAX; offset clamped in helper.
235                unsafe {
236                    zve64x_perm_helpers::execute_slidedown(
237                        state, vd, vs2, vm, vl, vstart, sew, vlmax, offset,
238                    );
239                }
240            }
241            // vslidedown.vi vd, vs2, uimm, vm
242            // Same as vslidedown.vx but offset is a 5-bit unsigned immediate.
243            Self::VslidedownVi { vd, vs2, uimm, vm } => {
244                if !state.ext_state.vector_instructions_allowed() {
245                    Err(ExecutionError::IllegalInstruction {
246                        address: state
247                            .instruction_fetcher
248                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
249                    })?;
250                }
251                let vtype = state
252                    .ext_state
253                    .vtype()
254                    .ok_or(ExecutionError::IllegalInstruction {
255                        address: state
256                            .instruction_fetcher
257                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
258                    })?;
259                let group_regs = vtype.vlmul().register_count();
260                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
261                zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
262                if !vm && vd.bits() == 0 {
263                    Err(ExecutionError::IllegalInstruction {
264                        address: state
265                            .instruction_fetcher
266                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
267                    })?;
268                }
269                let sew = vtype.vsew();
270                let vl = state.ext_state.vl();
271                let vstart = u32::from(state.ext_state.vstart());
272                let vlmax = state.ext_state.vlmax_for_vtype(vtype);
273                let offset = u64::from(uimm);
274                // SAFETY: same as VslidedownVx.
275                unsafe {
276                    zve64x_perm_helpers::execute_slidedown(
277                        state, vd, vs2, vm, vl, vstart, sew, vlmax, offset,
278                    );
279                }
280            }
281            // vslide1up.vx vd, vs2, rs1, vm
282            // Element 0 of vd gets the scalar value rs1 (written at SEW width).
283            // Elements vd[i] for 1 <= i < vl get vs2[i - 1].
284            // vd must not overlap vs2.
285            Self::Vslide1upVx { vd, vs2, rs1, vm } => {
286                if !state.ext_state.vector_instructions_allowed() {
287                    Err(ExecutionError::IllegalInstruction {
288                        address: state
289                            .instruction_fetcher
290                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
291                    })?;
292                }
293                let vtype = state
294                    .ext_state
295                    .vtype()
296                    .ok_or(ExecutionError::IllegalInstruction {
297                        address: state
298                            .instruction_fetcher
299                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
300                    })?;
301                let group_regs = vtype.vlmul().register_count();
302                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
303                zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
304                zve64x_perm_helpers::check_no_overlap(state, vd, vs2, group_regs)?;
305                if !vm && vd.bits() == 0 {
306                    Err(ExecutionError::IllegalInstruction {
307                        address: state
308                            .instruction_fetcher
309                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
310                    })?;
311                }
312                let sew = vtype.vsew();
313                let vl = state.ext_state.vl();
314                let vstart = u32::from(state.ext_state.vstart());
315                let scalar = state.regs.read(rs1).as_u64();
316                // SAFETY: alignment and no-overlap verified; vl <= VLMAX.
317                unsafe {
318                    zve64x_perm_helpers::execute_slide1up(
319                        state, vd, vs2, vm, vl, vstart, sew, scalar,
320                    );
321                }
322            }
323            // vslide1down.vx vd, vs2, rs1, vm
324            // Element vd[i] = vs2[i + 1] for 0 <= i < vl - 1.
325            // Element vd[vl - 1] gets the scalar value rs1.
326            // vd may overlap vs2 for slide1down.
327            Self::Vslide1downVx { vd, vs2, rs1, vm } => {
328                if !state.ext_state.vector_instructions_allowed() {
329                    Err(ExecutionError::IllegalInstruction {
330                        address: state
331                            .instruction_fetcher
332                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
333                    })?;
334                }
335                let vtype = state
336                    .ext_state
337                    .vtype()
338                    .ok_or(ExecutionError::IllegalInstruction {
339                        address: state
340                            .instruction_fetcher
341                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
342                    })?;
343                let group_regs = vtype.vlmul().register_count();
344                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
345                zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
346                if !vm && vd.bits() == 0 {
347                    Err(ExecutionError::IllegalInstruction {
348                        address: state
349                            .instruction_fetcher
350                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
351                    })?;
352                }
353                let sew = vtype.vsew();
354                let vl = state.ext_state.vl();
355                let vstart = u32::from(state.ext_state.vstart());
356                let scalar = state.regs.read(rs1).as_u64();
357                // SAFETY: alignment verified; vl <= VLMAX; overlap permitted by spec.
358                unsafe {
359                    zve64x_perm_helpers::execute_slide1down(
360                        state, vd, vs2, vm, vl, vstart, sew, scalar,
361                    );
362                }
363            }
364            // vrgather.vv vd, vs2, vs1, vm
365            // vd[i] = (vs1[i] < VLMAX) ? vs2[vs1[i]] : 0
366            // vd must not overlap vs1 or vs2.
367            Self::VrgatherVv { vd, vs2, vs1, vm } => {
368                if !state.ext_state.vector_instructions_allowed() {
369                    Err(ExecutionError::IllegalInstruction {
370                        address: state
371                            .instruction_fetcher
372                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
373                    })?;
374                }
375                let vtype = state
376                    .ext_state
377                    .vtype()
378                    .ok_or(ExecutionError::IllegalInstruction {
379                        address: state
380                            .instruction_fetcher
381                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
382                    })?;
383                let group_regs = vtype.vlmul().register_count();
384                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
385                zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
386                zve64x_perm_helpers::check_vreg_group_alignment(state, vs1, group_regs)?;
387                zve64x_perm_helpers::check_no_overlap(state, vd, vs2, group_regs)?;
388                zve64x_perm_helpers::check_no_overlap(state, vd, vs1, group_regs)?;
389                if !vm && vd.bits() == 0 {
390                    Err(ExecutionError::IllegalInstruction {
391                        address: state
392                            .instruction_fetcher
393                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
394                    })?;
395                }
396                let sew = vtype.vsew();
397                let vl = state.ext_state.vl();
398                let vstart = u32::from(state.ext_state.vstart());
399                let vlmax = state.ext_state.vlmax_for_vtype(vtype);
400                // SAFETY: all alignment and overlap constraints verified above; vl <= VLMAX.
401                unsafe {
402                    zve64x_perm_helpers::execute_rgather_vv(
403                        state, vd, vs2, vs1, vm, vl, vstart, sew, vlmax,
404                    );
405                }
406            }
407            // vrgather.vx vd, vs2, rs1, vm
408            // All active elements of vd get vs2[rs1] if rs1 < VLMAX, else 0.
409            // vd must not overlap vs2.
410            Self::VrgatherVx { vd, vs2, rs1, vm } => {
411                if !state.ext_state.vector_instructions_allowed() {
412                    Err(ExecutionError::IllegalInstruction {
413                        address: state
414                            .instruction_fetcher
415                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
416                    })?;
417                }
418                let vtype = state
419                    .ext_state
420                    .vtype()
421                    .ok_or(ExecutionError::IllegalInstruction {
422                        address: state
423                            .instruction_fetcher
424                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
425                    })?;
426                let group_regs = vtype.vlmul().register_count();
427                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
428                zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
429                zve64x_perm_helpers::check_no_overlap(state, vd, vs2, group_regs)?;
430                if !vm && vd.bits() == 0 {
431                    Err(ExecutionError::IllegalInstruction {
432                        address: state
433                            .instruction_fetcher
434                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
435                    })?;
436                }
437                let sew = vtype.vsew();
438                let vl = state.ext_state.vl();
439                let vstart = u32::from(state.ext_state.vstart());
440                let vlmax = state.ext_state.vlmax_for_vtype(vtype);
441                let index = state.regs.read(rs1).as_u64();
442                // SAFETY: alignment and no-overlap verified; vl <= VLMAX.
443                unsafe {
444                    zve64x_perm_helpers::execute_rgather_scalar(
445                        state, vd, vs2, vm, vl, vstart, sew, vlmax, index,
446                    );
447                }
448            }
449            // vrgather.vi vd, vs2, uimm, vm
450            // Same as vrgather.vx but index is a 5-bit unsigned immediate.
451            Self::VrgatherVi { vd, vs2, uimm, vm } => {
452                if !state.ext_state.vector_instructions_allowed() {
453                    Err(ExecutionError::IllegalInstruction {
454                        address: state
455                            .instruction_fetcher
456                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
457                    })?;
458                }
459                let vtype = state
460                    .ext_state
461                    .vtype()
462                    .ok_or(ExecutionError::IllegalInstruction {
463                        address: state
464                            .instruction_fetcher
465                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
466                    })?;
467                let group_regs = vtype.vlmul().register_count();
468                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
469                zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
470                zve64x_perm_helpers::check_no_overlap(state, vd, vs2, group_regs)?;
471                if !vm && vd.bits() == 0 {
472                    Err(ExecutionError::IllegalInstruction {
473                        address: state
474                            .instruction_fetcher
475                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
476                    })?;
477                }
478                let sew = vtype.vsew();
479                let vl = state.ext_state.vl();
480                let vstart = u32::from(state.ext_state.vstart());
481                let vlmax = state.ext_state.vlmax_for_vtype(vtype);
482                let index = u64::from(uimm);
483                // SAFETY: same as VrgatherVx.
484                unsafe {
485                    zve64x_perm_helpers::execute_rgather_scalar(
486                        state, vd, vs2, vm, vl, vstart, sew, vlmax, index,
487                    );
488                }
489            }
490            // vrgatherei16.vv vd, vs2, vs1, vm
491            // Like vrgather.vv but vs1 always uses EEW=16 (regardless of SEW).
492            // EMUL_vs1 = (16 / SEW) * LMUL; must be in [1/8, 8] else illegal.
493            // vd must not overlap vs1 or vs2.
494            Self::Vrgatherei16Vv { vd, vs2, vs1, vm } => {
495                if !state.ext_state.vector_instructions_allowed() {
496                    Err(ExecutionError::IllegalInstruction {
497                        address: state
498                            .instruction_fetcher
499                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
500                    })?;
501                }
502                let vtype = state
503                    .ext_state
504                    .vtype()
505                    .ok_or(ExecutionError::IllegalInstruction {
506                        address: state
507                            .instruction_fetcher
508                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
509                    })?;
510                let group_regs = vtype.vlmul().register_count();
511                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
512                zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
513                // Compute EMUL for vs1 index register (EEW=16).
514                let index_group_regs = vtype
515                    .vlmul()
516                    .index_register_count(
517                        ab_riscv_primitives::instructions::v::Eew::E16,
518                        vtype.vsew(),
519                    )
520                    .ok_or(ExecutionError::IllegalInstruction {
521                        address: state
522                            .instruction_fetcher
523                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
524                    })?;
525                zve64x_perm_helpers::check_vreg_group_alignment(state, vs1, index_group_regs)?;
526                zve64x_perm_helpers::check_no_overlap(state, vd, vs2, group_regs)?;
527                // vd and vs1 have different group sizes (group_regs vs index_group_regs),
528                // so the symmetric helper would use the wrong size for one of the intervals.
529                zve64x_perm_helpers::check_no_overlap_asymmetric(
530                    state,
531                    vd,
532                    group_regs,
533                    vs1,
534                    index_group_regs,
535                )?;
536                if !vm && vd.bits() == 0 {
537                    Err(ExecutionError::IllegalInstruction {
538                        address: state
539                            .instruction_fetcher
540                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
541                    })?;
542                }
543                let sew = vtype.vsew();
544                let vl = state.ext_state.vl();
545                let vstart = u32::from(state.ext_state.vstart());
546                let vlmax = state.ext_state.vlmax_for_vtype(vtype);
547                // SAFETY: all alignment and overlap constraints verified; vl <= VLMAX;
548                // vs1 uses EEW=16 with computed index_group_regs.
549                unsafe {
550                    zve64x_perm_helpers::execute_rgatherei16(
551                        state,
552                        vd,
553                        vs2,
554                        vs1,
555                        vm,
556                        vl,
557                        vstart,
558                        sew,
559                        vlmax,
560                        index_group_regs,
561                    );
562                }
563            }
564            // vmerge.vvm / vmv.v.v
565            // When vm=true: vmv.v.v vd, vs1 - broadcast all active elements from vs1.
566            //   vs2 is ignored; no overlap restriction on vd/vs2.
567            // When vm=false: vmerge.vvm vd, vs2, vs1, v0
568            //   vd[i] = v0[i] ? vs1[i] : vs2[i]
569            //   vd must not overlap v0 (mask source).
570            Self::VmergeVvm { vd, vs2, vs1, vm } => {
571                if !state.ext_state.vector_instructions_allowed() {
572                    Err(ExecutionError::IllegalInstruction {
573                        address: state
574                            .instruction_fetcher
575                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
576                    })?;
577                }
578                let vtype = state
579                    .ext_state
580                    .vtype()
581                    .ok_or(ExecutionError::IllegalInstruction {
582                        address: state
583                            .instruction_fetcher
584                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
585                    })?;
586                let group_regs = vtype.vlmul().register_count();
587                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
588                zve64x_perm_helpers::check_vreg_group_alignment(state, vs1, group_regs)?;
589                if !vm {
590                    // vmerge: vs2 is read, vd must not overlap v0
591                    zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
592                    zve64x_perm_helpers::check_no_overlap(state, vd, VReg::V0, group_regs)?;
593                }
594                let sew = vtype.vsew();
595                let vl = state.ext_state.vl();
596                let vstart = u32::from(state.ext_state.vstart());
597                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
598                unsafe {
599                    zve64x_perm_helpers::execute_merge_vv(state, vd, vs2, vs1, vm, vl, vstart, sew);
600                }
601            }
602            // vmerge.vxm / vmv.v.x
603            // When vm=true: vmv.v.x vd, rs1 - broadcast scalar to all active elements.
604            // When vm=false: vmerge.vxm - vd[i] = v0[i] ? rs1 : vs2[i]
605            Self::VmergeVxm { vd, vs2, rs1, vm } => {
606                if !state.ext_state.vector_instructions_allowed() {
607                    Err(ExecutionError::IllegalInstruction {
608                        address: state
609                            .instruction_fetcher
610                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
611                    })?;
612                }
613                let vtype = state
614                    .ext_state
615                    .vtype()
616                    .ok_or(ExecutionError::IllegalInstruction {
617                        address: state
618                            .instruction_fetcher
619                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
620                    })?;
621                let group_regs = vtype.vlmul().register_count();
622                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
623                if !vm {
624                    zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
625                    zve64x_perm_helpers::check_no_overlap(state, vd, VReg::V0, group_regs)?;
626                }
627                let sew = vtype.vsew();
628                let vl = state.ext_state.vl();
629                let vstart = u32::from(state.ext_state.vstart());
630                let scalar = state.regs.read(rs1).as_u64();
631                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
632                unsafe {
633                    zve64x_perm_helpers::execute_merge_scalar(
634                        state, vd, vs2, vm, vl, vstart, sew, scalar,
635                    );
636                }
637            }
638            // vmerge.vim / vmv.v.i
639            // When vm=true: vmv.v.i vd, simm5 - broadcast sign-extended immediate.
640            // When vm=false: vmerge.vim - vd[i] = v0[i] ? simm5 : vs2[i]
641            Self::VmergeVim { vd, vs2, simm5, vm } => {
642                if !state.ext_state.vector_instructions_allowed() {
643                    Err(ExecutionError::IllegalInstruction {
644                        address: state
645                            .instruction_fetcher
646                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
647                    })?;
648                }
649                let vtype = state
650                    .ext_state
651                    .vtype()
652                    .ok_or(ExecutionError::IllegalInstruction {
653                        address: state
654                            .instruction_fetcher
655                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
656                    })?;
657                let group_regs = vtype.vlmul().register_count();
658                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
659                if !vm {
660                    zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
661                    zve64x_perm_helpers::check_no_overlap(state, vd, VReg::V0, group_regs)?;
662                }
663                let sew = vtype.vsew();
664                let vl = state.ext_state.vl();
665                let vstart = u32::from(state.ext_state.vstart());
666                // Sign-extend imm to u64 so the low sew_bytes are correct for all SEW.
667                let scalar = i64::from(simm5).cast_unsigned();
668                // SAFETY: alignment and overlap verified above; vl <= VLMAX.
669                unsafe {
670                    zve64x_perm_helpers::execute_merge_scalar(
671                        state, vd, vs2, vm, vl, vstart, sew, scalar,
672                    );
673                }
674            }
675            // vcompress.vm vd, vs2, vs1
676            // Packs active elements of vs2 (where vs1 mask bit is set) sequentially into vd.
677            // Always unmasked (vm=1 in encoding); vs1 is the explicit mask operand.
678            // vd must not overlap vs1 or vs2.
679            Self::VcompressVm { vd, vs2, vs1 } => {
680                if !state.ext_state.vector_instructions_allowed() {
681                    Err(ExecutionError::IllegalInstruction {
682                        address: state
683                            .instruction_fetcher
684                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
685                    })?;
686                }
687                let vtype = state
688                    .ext_state
689                    .vtype()
690                    .ok_or(ExecutionError::IllegalInstruction {
691                        address: state
692                            .instruction_fetcher
693                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
694                    })?;
695                let group_regs = vtype.vlmul().register_count();
696                zve64x_perm_helpers::check_vreg_group_alignment(state, vd, group_regs)?;
697                zve64x_perm_helpers::check_vreg_group_alignment(state, vs2, group_regs)?;
698                // vs1 is always a single mask register (no LMUL grouping)
699                zve64x_perm_helpers::check_no_overlap(state, vd, vs2, group_regs)?;
700                // vs1 is a mask register; check it doesn't overlap vd
701                zve64x_perm_helpers::check_no_overlap(state, vd, vs1, 1)?;
702                let sew = vtype.vsew();
703                let vl = state.ext_state.vl();
704                let vstart = u32::from(state.ext_state.vstart());
705                // SAFETY: all alignment and overlap constraints verified; vl <= VLMAX.
706                unsafe {
707                    zve64x_perm_helpers::execute_compress(state, vd, vs2, vs1, vl, vstart, sew);
708                }
709            }
710            // vmv1r.v vd, vs2
711            // Whole register move: copies 1 register.
712            // No masking, no vtype/vl dependency.
713            Self::Vmv1rV { vd, vs2 } => {
714                if !state.ext_state.vector_instructions_allowed() {
715                    Err(ExecutionError::IllegalInstruction {
716                        address: state
717                            .instruction_fetcher
718                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
719                    })?;
720                }
721                // SAFETY: both vd.bits() and vs2.bits() are always in [0, 32) by VReg invariant;
722                // copying 1 register always fits.
723                unsafe {
724                    zve64x_perm_helpers::execute_whole_reg_move(
725                        state.ext_state.write_vreg(),
726                        vd.bits(),
727                        vs2.bits(),
728                        1,
729                    );
730                }
731                state.ext_state.mark_vs_dirty();
732                state.ext_state.reset_vstart();
733            }
734            // vmv2r.v vd, vs2
735            // Whole register move: copies 2 registers.
736            // vd and vs2 must be aligned to 2 (checked here per spec §17.6).
737            Self::Vmv2rV { vd, vs2 } => {
738                if !state.ext_state.vector_instructions_allowed() {
739                    Err(ExecutionError::IllegalInstruction {
740                        address: state
741                            .instruction_fetcher
742                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
743                    })?;
744                }
745                if !vd.bits().is_multiple_of(2) || !vs2.bits().is_multiple_of(2) {
746                    Err(ExecutionError::IllegalInstruction {
747                        address: state
748                            .instruction_fetcher
749                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
750                    })?;
751                }
752                // SAFETY: alignment verified; 2 registers from aligned base always stay in [0, 32).
753                unsafe {
754                    zve64x_perm_helpers::execute_whole_reg_move(
755                        state.ext_state.write_vreg(),
756                        vd.bits(),
757                        vs2.bits(),
758                        2,
759                    );
760                }
761                state.ext_state.mark_vs_dirty();
762                state.ext_state.reset_vstart();
763            }
764            // vmv4r.v vd, vs2
765            // Whole register move: copies 4 registers.
766            Self::Vmv4rV { vd, vs2 } => {
767                if !state.ext_state.vector_instructions_allowed() {
768                    Err(ExecutionError::IllegalInstruction {
769                        address: state
770                            .instruction_fetcher
771                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
772                    })?;
773                }
774                if !vd.bits().is_multiple_of(4) || !vs2.bits().is_multiple_of(4) {
775                    Err(ExecutionError::IllegalInstruction {
776                        address: state
777                            .instruction_fetcher
778                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
779                    })?;
780                }
781                // SAFETY: alignment verified; 4 registers from aligned base always stay in [0, 32).
782                unsafe {
783                    zve64x_perm_helpers::execute_whole_reg_move(
784                        state.ext_state.write_vreg(),
785                        vd.bits(),
786                        vs2.bits(),
787                        4,
788                    );
789                }
790                state.ext_state.mark_vs_dirty();
791                state.ext_state.reset_vstart();
792            }
793            // vmv8r.v vd, vs2
794            // Whole register move: copies 8 registers.
795            Self::Vmv8rV { vd, vs2 } => {
796                if !state.ext_state.vector_instructions_allowed() {
797                    Err(ExecutionError::IllegalInstruction {
798                        address: state
799                            .instruction_fetcher
800                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
801                    })?;
802                }
803                if !vd.bits().is_multiple_of(8) || !vs2.bits().is_multiple_of(8) {
804                    Err(ExecutionError::IllegalInstruction {
805                        address: state
806                            .instruction_fetcher
807                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
808                    })?;
809                }
810                // SAFETY: alignment verified; 8 registers from aligned base always stay in [0, 32).
811                unsafe {
812                    zve64x_perm_helpers::execute_whole_reg_move(
813                        state.ext_state.write_vreg(),
814                        vd.bits(),
815                        vs2.bits(),
816                        8,
817                    );
818                }
819                state.ext_state.mark_vs_dirty();
820                state.ext_state.reset_vstart();
821            }
822        }
823        Ok(ControlFlow::Continue(()))
824    }
825}