Skip to main content

ab_riscv_interpreter/v/zvexx/
load.rs

1//! ZveXx vector load instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zvexx_load_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zvexx::zvexx_helpers;
9use crate::{
10    ExecutableInstruction, ExecutableInstructionCsr, ExecutableInstructionOperands, ExecutionError,
11    ProgramCounter, RegisterFile, Rs1Rs2OperandValues, Rs1Rs2Operands, VirtualMemory,
12};
13use ab_riscv_macros::instruction_execution;
14use ab_riscv_primitives::prelude::*;
15use core::fmt;
16use core::ops::ControlFlow;
17
18#[instruction_execution]
19impl<Reg> ExecutableInstructionOperands for ZveXxLoadInstruction<Reg> where Reg: Register {}
20
21#[instruction_execution]
22impl<Reg, ExtState, CustomError> ExecutableInstructionCsr<ExtState, CustomError>
23    for ZveXxLoadInstruction<Reg>
24where
25    Reg: Register,
26{
27}
28
29#[instruction_execution]
30impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
31    ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
32    for ZveXxLoadInstruction<Reg>
33where
34    Reg: Register,
35    Regs: RegisterFile<Reg>,
36    ExtState: VectorRegistersExt<Reg, CustomError>,
37    [(); ExtState::ELEN as usize]:,
38    [(); ExtState::VLEN as usize]:,
39    [(); ExtState::VLENB as usize]:,
40    Memory: VirtualMemory,
41    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
42    CustomError: fmt::Debug,
43{
44    #[inline(always)]
45    fn execute(
46        self,
47        Rs1Rs2OperandValues {
48            rs1_value,
49            rs2_value,
50        }: Rs1Rs2OperandValues<<Self::Reg as Register>::Type>,
51        _regs: &mut Regs,
52        ext_state: &mut ExtState,
53        memory: &mut Memory,
54        program_counter: &mut PC,
55        _system_instruction_handler: &mut InstructionHandler,
56    ) -> Result<
57        ControlFlow<(), (Self::Reg, <Self::Reg as Register>::Type)>,
58        ExecutionError<Reg::Type, CustomError>,
59    > {
60        match self {
61            // Whole-register load: loads `nreg` consecutive registers starting at `vd` directly
62            // from memory. `vd` must be aligned to `nreg`. Ignores vtype, vl, vstart, masking.
63            Self::Vlr {
64                vd,
65                rs1: _,
66                nreg,
67                eew: _,
68            } => {
69                let nreg = nreg.num_registers();
70                if !ext_state.vector_instructions_allowed() {
71                    ::core::hint::cold_path();
72                    return Err(ExecutionError::IllegalInstruction {
73                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
74                    });
75                }
76                if vd.to_bits() % nreg != 0 {
77                    ::core::hint::cold_path();
78                    return Err(ExecutionError::IllegalInstruction {
79                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
80                    });
81                }
82                let base = rs1_value.as_u64();
83                let vlenb = u64::from(ExtState::VLENB);
84                for reg_off in 0..nreg {
85                    // SAFETY: the decoder guarantees nreg in {1,2,4,8} and vd is nreg-aligned
86                    // (checked above), so vd.to_bits() + nreg - 1 <= 31.
87                    let reg = unsafe { VReg::from_bits(vd.to_bits() + reg_off).unwrap_unchecked() };
88                    let bytes = memory
89                        .read_slice(base + u64::from(reg_off) * vlenb, ExtState::VLENB)
90                        .inspect_err(|_error| {
91                            if reg_off > 0 {
92                                ext_state.mark_vs_dirty();
93                                ext_state.reset_vstart();
94                            }
95                        })?;
96                    ext_state.write_vregs().get_mut(reg).copy_from_slice(bytes);
97                }
98                ext_state.mark_vs_dirty();
99                ext_state.reset_vstart();
100            }
101
102            // Mask load: loads ceil(vl / 8) bytes from base into vd with no masking applied.
103            // Does not require a valid vtype: when vill is set vl is 0, so zero bytes are read.
104            Self::Vlm { vd, rs1: _ } => {
105                if !ext_state.vector_instructions_allowed() {
106                    ::core::hint::cold_path();
107                    return Err(ExecutionError::IllegalInstruction {
108                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
109                    });
110                }
111                let vl = ext_state.vl();
112                let byte_count = vl.div_ceil(u8::BITS);
113                if byte_count > 0 {
114                    let base = rs1_value.as_u64();
115                    let bytes = memory.read_slice(base, byte_count)?;
116                    // SAFETY: `bytes.len() == byte_count = vl.div_ceil(8) <= VLEN / 8 = VLENB`
117                    // because `vl <= VLMAX <= VLEN`, so `..bytes.len()` is in bounds within the
118                    // `VLENB`-byte destination register.
119                    unsafe {
120                        ext_state
121                            .write_vregs()
122                            .get_mut(vd)
123                            .get_unchecked_mut(..bytes.len())
124                            .copy_from_slice(bytes);
125                    }
126                }
127                ext_state.mark_vs_dirty();
128                ext_state.reset_vstart();
129            }
130
131            // Unit-stride load.
132            //
133            // Destination EMUL = EEW/SEW * LMUL, computed via `index_register_count`. This
134            // gives `group_regs` such that `VLMAX = group_regs * VLENB / eew.bytes()` matches
135            // the architectural `vl`.
136            Self::Vle {
137                vd,
138                rs1: _,
139                vm,
140                eew,
141            } => {
142                if !ext_state.vector_instructions_allowed() {
143                    ::core::hint::cold_path();
144                    return Err(ExecutionError::IllegalInstruction {
145                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
146                    });
147                }
148                let Some(vtype) = ext_state.vtype() else {
149                    ::core::hint::cold_path();
150                    return Err(ExecutionError::IllegalInstruction {
151                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
152                    });
153                };
154                let group_regs = vtype
155                    .vlmul()
156                    .index_register_count(eew, vtype.vsew())
157                    .ok_or(ExecutionError::IllegalInstruction {
158                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
159                    })?;
160                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
161                    program_counter,
162                    vd,
163                    group_regs,
164                )?;
165                if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
166                    ::core::hint::cold_path();
167                    return Err(ExecutionError::IllegalInstruction {
168                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
169                    });
170                }
171                // SAFETY:
172                // - alignment: `check_register_group_alignment` verified `vd % group_regs == 0` and
173                //   `vd + group_regs <= 32`, satisfying both the alignment and nf=1 bounds
174                //   preconditions
175                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL computed for
176                //   this `eew` and `vtype`, so this VLMAX equals the architectural VLMAX that
177                //   bounds `vl`
178                // - mask overlap: checked above via `groups_overlap`
179                unsafe {
180                    zvexx_load_helpers::execute_unit_stride_load::<false, _, _, _, _>(
181                        ext_state,
182                        memory,
183                        vd,
184                        vm,
185                        rs1_value.as_u64(),
186                        eew,
187                        group_regs,
188                        Nf::N1,
189                    )?;
190                }
191            }
192
193            // Fault-only-first unit-stride load. Preconditions identical to `Vle`.
194            Self::Vleff {
195                vd,
196                rs1: _,
197                vm,
198                eew,
199            } => {
200                if !ext_state.vector_instructions_allowed() {
201                    ::core::hint::cold_path();
202                    return Err(ExecutionError::IllegalInstruction {
203                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
204                    });
205                }
206                let Some(vtype) = ext_state.vtype() else {
207                    ::core::hint::cold_path();
208                    return Err(ExecutionError::IllegalInstruction {
209                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
210                    });
211                };
212                let group_regs = vtype
213                    .vlmul()
214                    .index_register_count(eew, vtype.vsew())
215                    .ok_or(ExecutionError::IllegalInstruction {
216                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
217                    })?;
218                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
219                    program_counter,
220                    vd,
221                    group_regs,
222                )?;
223                if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
224                    ::core::hint::cold_path();
225                    return Err(ExecutionError::IllegalInstruction {
226                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
227                    });
228                }
229                // SAFETY: preconditions identical to `Vle`; see that arm for the full argument.
230                unsafe {
231                    zvexx_load_helpers::execute_unit_stride_load::<true, _, _, _, _>(
232                        ext_state,
233                        memory,
234                        vd,
235                        vm,
236                        rs1_value.as_u64(),
237                        eew,
238                        group_regs,
239                        Nf::N1,
240                    )?;
241                }
242            }
243
244            // Strided load. Destination EMUL = EEW/SEW * LMUL as for unit-stride.
245            Self::Vlse {
246                vd,
247                rs1: _,
248                rs2: _,
249                vm,
250                eew,
251            } => {
252                if !ext_state.vector_instructions_allowed() {
253                    ::core::hint::cold_path();
254                    return Err(ExecutionError::IllegalInstruction {
255                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
256                    });
257                }
258                let Some(vtype) = ext_state.vtype() else {
259                    ::core::hint::cold_path();
260                    return Err(ExecutionError::IllegalInstruction {
261                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
262                    });
263                };
264                let group_regs = vtype
265                    .vlmul()
266                    .index_register_count(eew, vtype.vsew())
267                    .ok_or(ExecutionError::IllegalInstruction {
268                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
269                    })?;
270                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
271                    program_counter,
272                    vd,
273                    group_regs,
274                )?;
275                if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
276                    ::core::hint::cold_path();
277                    return Err(ExecutionError::IllegalInstruction {
278                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
279                    });
280                }
281                // rs2 holds a signed stride; reinterpret the register value as signed
282                let stride = rs2_value.as_i64();
283                // SAFETY:
284                // - alignment and nf=1 bounds: `check_register_group_alignment` verified `vd %
285                //   group_regs == 0` and `vd + group_regs <= 32`
286                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL for this
287                //   `eew` and `vtype`, so this VLMAX equals the architectural VLMAX bounding `vl`
288                // - mask overlap: checked above via `groups_overlap`
289                unsafe {
290                    zvexx_load_helpers::execute_strided_load(
291                        ext_state,
292                        memory,
293                        vd,
294                        vm,
295                        rs1_value.as_u64(),
296                        stride,
297                        eew,
298                        group_regs,
299                        Nf::N1,
300                    )?;
301                }
302            }
303
304            // Indexed-unordered load: eew is the index EEW; data EEW comes from vtype.vsew().
305            // The data destination uses the base LMUL (data EEW = SEW for indexed loads).
306            Self::Vluxei {
307                vd,
308                rs1: _,
309                vs2,
310                vm,
311                eew: index_eew,
312            } => {
313                if !ext_state.vector_instructions_allowed() {
314                    ::core::hint::cold_path();
315                    return Err(ExecutionError::IllegalInstruction {
316                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
317                    });
318                }
319                let Some(vtype) = ext_state.vtype() else {
320                    ::core::hint::cold_path();
321                    return Err(ExecutionError::IllegalInstruction {
322                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
323                    });
324                };
325                let data_group_regs = vtype.vlmul().register_count();
326                let index_group_regs = vtype
327                    .vlmul()
328                    .index_register_count(index_eew, vtype.vsew())
329                    .ok_or(ExecutionError::IllegalInstruction {
330                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
331                    })?;
332                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
333                    program_counter,
334                    vd,
335                    data_group_regs,
336                )?;
337                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
338                    program_counter,
339                    vs2,
340                    index_group_regs,
341                )?;
342                // Non-segment indexed loads permit `vd`/`vs2` overlap under the general
343                // EEW-relative overlap rule (e.g. when the data and index EEW match); only
344                // disallowed overlaps are reserved.
345                if !zvexx_load_helpers::indexed_load_overlap_allowed(
346                    vd,
347                    data_group_regs,
348                    vs2,
349                    index_group_regs,
350                    index_eew,
351                    vtype.vsew(),
352                    vtype.vlmul(),
353                ) {
354                    ::core::hint::cold_path();
355                    return Err(ExecutionError::IllegalInstruction {
356                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
357                    });
358                }
359                if !vm && zvexx_load_helpers::groups_overlap(vd, data_group_regs, VReg::V0, 1) {
360                    ::core::hint::cold_path();
361                    return Err(ExecutionError::IllegalInstruction {
362                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
363                    });
364                }
365                // SAFETY:
366                // - data alignment/nf=1 bounds: `check_register_group_alignment` on `vd`
367                // - index alignment/bounds: `check_register_group_alignment` on `vs2`
368                // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: data EEW = SEW and
369                //   `data_group_regs = LMUL`, so VLMAX = LMUL * VLEN / SEW, which bounds `vl`
370                // - `vl <= index_group_regs * VLENB / index_eew.bytes()`: `index_group_regs` is
371                //   EMUL_index defined so this VLMAX_index equals the architectural VLMAX
372                // - `vd`/`vs2` overlap (if any) satisfies the general EEW overlap rule, checked
373                //   above; the in-order element loop reads index element `i` before writing data
374                //   element `i`, and that rule guarantees a data write never clobbers an index
375                //   element that has not yet been consumed
376                // - mask overlap: checked above via `groups_overlap`
377                unsafe {
378                    zvexx_load_helpers::execute_indexed_load(
379                        ext_state,
380                        memory,
381                        vd,
382                        vs2,
383                        vm,
384                        rs1_value.as_u64(),
385                        vtype.vsew().as_eew(),
386                        index_eew,
387                        data_group_regs,
388                        Nf::N1,
389                    )?;
390                }
391            }
392
393            // Indexed-ordered load: functionally identical to `Vluxei` for a software
394            // interpreter; memory access ordering has no observable effect here.
395            Self::Vloxei {
396                vd,
397                rs1: _,
398                vs2,
399                vm,
400                eew: index_eew,
401            } => {
402                if !ext_state.vector_instructions_allowed() {
403                    ::core::hint::cold_path();
404                    return Err(ExecutionError::IllegalInstruction {
405                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
406                    });
407                }
408                let Some(vtype) = ext_state.vtype() else {
409                    ::core::hint::cold_path();
410                    return Err(ExecutionError::IllegalInstruction {
411                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
412                    });
413                };
414                let data_group_regs = vtype.vlmul().register_count();
415                let index_group_regs = vtype
416                    .vlmul()
417                    .index_register_count(index_eew, vtype.vsew())
418                    .ok_or(ExecutionError::IllegalInstruction {
419                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
420                    })?;
421                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
422                    program_counter,
423                    vd,
424                    data_group_regs,
425                )?;
426                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
427                    program_counter,
428                    vs2,
429                    index_group_regs,
430                )?;
431                // Non-segment indexed loads permit `vd`/`vs2` overlap under the general
432                // EEW-relative overlap rule; see the `Vluxei` arm for details.
433                if !zvexx_load_helpers::indexed_load_overlap_allowed(
434                    vd,
435                    data_group_regs,
436                    vs2,
437                    index_group_regs,
438                    index_eew,
439                    vtype.vsew(),
440                    vtype.vlmul(),
441                ) {
442                    ::core::hint::cold_path();
443                    return Err(ExecutionError::IllegalInstruction {
444                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
445                    });
446                }
447                if !vm && zvexx_load_helpers::groups_overlap(vd, data_group_regs, VReg::V0, 1) {
448                    ::core::hint::cold_path();
449                    return Err(ExecutionError::IllegalInstruction {
450                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
451                    });
452                }
453                // SAFETY: preconditions identical to `Vluxei`; see that arm for the full
454                // argument.
455                unsafe {
456                    zvexx_load_helpers::execute_indexed_load(
457                        ext_state,
458                        memory,
459                        vd,
460                        vs2,
461                        vm,
462                        rs1_value.as_u64(),
463                        vtype.vsew().as_eew(),
464                        index_eew,
465                        data_group_regs,
466                        Nf::N1,
467                    )?;
468                }
469            }
470
471            // Unit-stride segment load. EMUL = EEW/SEW * LMUL per field group.
472            Self::Vlseg {
473                vd,
474                rs1: _,
475                eew,
476                vm_nf,
477            } => {
478                let vm = vm_nf.vm();
479                let nf = vm_nf.nf();
480                if !ext_state.vector_instructions_allowed() {
481                    ::core::hint::cold_path();
482                    return Err(ExecutionError::IllegalInstruction {
483                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
484                    });
485                }
486                let Some(vtype) = ext_state.vtype() else {
487                    ::core::hint::cold_path();
488                    return Err(ExecutionError::IllegalInstruction {
489                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
490                    });
491                };
492                let group_regs = vtype
493                    .vlmul()
494                    .index_register_count(eew, vtype.vsew())
495                    .ok_or(ExecutionError::IllegalInstruction {
496                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
497                    })?;
498                zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
499                    program_counter,
500                    vd,
501                    vm,
502                    group_regs,
503                    nf,
504                )?;
505                // SAFETY:
506                // - alignment and nf-group bounds: `validate_segment_registers` verified `vd %
507                //   group_regs == 0` and `vd + nf * group_regs <= 32`
508                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL for this
509                //   `eew` and `vtype`, so this VLMAX equals the architectural VLMAX bounding `vl`
510                // - mask overlap with v0: `validate_segment_registers` checked `vd.to_bits() != 0`
511                //   when `vm=false`, ensuring no field group contains v0
512                unsafe {
513                    zvexx_load_helpers::execute_unit_stride_load::<false, _, _, _, _>(
514                        ext_state,
515                        memory,
516                        vd,
517                        vm,
518                        rs1_value.as_u64(),
519                        eew,
520                        group_regs,
521                        nf,
522                    )?;
523                }
524            }
525
526            // Fault-only-first segment load. Preconditions identical to `Vlseg`.
527            Self::Vlsegff {
528                vd,
529                rs1: _,
530                eew,
531                vm_nf,
532            } => {
533                let vm = vm_nf.vm();
534                let nf = vm_nf.nf();
535                if !ext_state.vector_instructions_allowed() {
536                    ::core::hint::cold_path();
537                    return Err(ExecutionError::IllegalInstruction {
538                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
539                    });
540                }
541                let Some(vtype) = ext_state.vtype() else {
542                    ::core::hint::cold_path();
543                    return Err(ExecutionError::IllegalInstruction {
544                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
545                    });
546                };
547                let group_regs = vtype
548                    .vlmul()
549                    .index_register_count(eew, vtype.vsew())
550                    .ok_or(ExecutionError::IllegalInstruction {
551                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
552                    })?;
553                zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
554                    program_counter,
555                    vd,
556                    vm,
557                    group_regs,
558                    nf,
559                )?;
560                // SAFETY: preconditions identical to `Vlseg`; see that arm for the full argument.
561                unsafe {
562                    zvexx_load_helpers::execute_unit_stride_load::<true, _, _, _, _>(
563                        ext_state,
564                        memory,
565                        vd,
566                        vm,
567                        rs1_value.as_u64(),
568                        eew,
569                        group_regs,
570                        nf,
571                    )?;
572                }
573            }
574
575            // Strided segment load. EMUL = EEW/SEW * LMUL as for `Vlse`.
576            Self::Vlsseg {
577                vd,
578                rs1: _,
579                rs2: _,
580                eew,
581                vm_nf,
582            } => {
583                let vm = vm_nf.vm();
584                let nf = vm_nf.nf();
585                if !ext_state.vector_instructions_allowed() {
586                    ::core::hint::cold_path();
587                    return Err(ExecutionError::IllegalInstruction {
588                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
589                    });
590                }
591                let Some(vtype) = ext_state.vtype() else {
592                    ::core::hint::cold_path();
593                    return Err(ExecutionError::IllegalInstruction {
594                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
595                    });
596                };
597                let group_regs = vtype
598                    .vlmul()
599                    .index_register_count(eew, vtype.vsew())
600                    .ok_or(ExecutionError::IllegalInstruction {
601                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
602                    })?;
603                zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
604                    program_counter,
605                    vd,
606                    vm,
607                    group_regs,
608                    nf,
609                )?;
610                let stride = rs2_value.as_i64();
611                // SAFETY:
612                // - alignment and nf-group bounds: `validate_segment_registers` verified `vd %
613                //   group_regs == 0` and `vd + nf * group_regs <= 32`
614                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is EMUL for this `eew`
615                //   and `vtype`
616                // - mask overlap: `validate_segment_registers` checked `vd.to_bits() != 0` when
617                //   `vm=false`
618                unsafe {
619                    zvexx_load_helpers::execute_strided_load(
620                        ext_state,
621                        memory,
622                        vd,
623                        vm,
624                        rs1_value.as_u64(),
625                        stride,
626                        eew,
627                        group_regs,
628                        nf,
629                    )?;
630                }
631            }
632
633            // Indexed-unordered segment load
634            Self::Vluxseg {
635                vd,
636                rs1: _,
637                vs2,
638                eew: index_eew,
639                vm_nf,
640            } => {
641                let vm = vm_nf.vm();
642                let nf = vm_nf.nf();
643                if !ext_state.vector_instructions_allowed() {
644                    ::core::hint::cold_path();
645                    return Err(ExecutionError::IllegalInstruction {
646                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
647                    });
648                }
649                let Some(vtype) = ext_state.vtype() else {
650                    ::core::hint::cold_path();
651                    return Err(ExecutionError::IllegalInstruction {
652                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
653                    });
654                };
655                let data_group_regs = vtype.vlmul().register_count();
656                let index_group_regs = vtype
657                    .vlmul()
658                    .index_register_count(index_eew, vtype.vsew())
659                    .ok_or(ExecutionError::IllegalInstruction {
660                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
661                    })?;
662                // `validate_segment_registers` is called before the per-field overlap loop so
663                // that `vd.to_bits() + f * data_group_regs < 32` is established for all `f < nf`,
664                // which is required by the `VReg::from_bits` call inside the loop.
665                zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
666                    program_counter,
667                    vd,
668                    vm,
669                    data_group_regs,
670                    nf,
671                )?;
672                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
673                    program_counter,
674                    vs2,
675                    index_group_regs,
676                )?;
677                for f in 0..nf.fields_per_segment() {
678                    // SAFETY: `vd.to_bits() + f * data_group_regs < 32` because
679                    // `validate_segment_registers` established `vd.to_bits() + nf * data_group_regs
680                    // <= 32` and `f < nf`. The value is in [0, 31], so it is a valid `VReg`
681                    // encoding.
682                    let field_vd = unsafe {
683                        VReg::from_bits(vd.to_bits() + f * data_group_regs).unwrap_unchecked()
684                    };
685                    if zvexx_load_helpers::groups_overlap(
686                        field_vd,
687                        data_group_regs,
688                        vs2,
689                        index_group_regs,
690                    ) {
691                        ::core::hint::cold_path();
692                        return Err(ExecutionError::IllegalInstruction {
693                            address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
694                        });
695                    }
696                }
697                // SAFETY:
698                // - data alignment/nf-group bounds: `validate_segment_registers` verified `vd %
699                //   data_group_regs == 0` and `vd + nf * data_group_regs <= 32`
700                // - index alignment/bounds: `check_register_group_alignment` verified `vs2 %
701                //   EMUL_index == 0` and `vs2 + EMUL_index <= 32`
702                // - no field/index group overlap: verified by the loop above
703                // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: data EEW = SEW and
704                //   `data_group_regs = LMUL`, so VLMAX = LMUL * VLEN / SEW bounds `vl`
705                // - `vl <= EMUL_index * VLENB / index_eew.bytes()`: `index_group_regs` (EMUL_index)
706                //   is defined so this VLMAX_index equals the architectural VLMAX
707                // - mask overlap: `validate_segment_registers` checked `vd.to_bits() != 0` when
708                //   `vm=false`, and no field group starts at 0 since groups are contiguous from
709                //   `vd` which is nonzero
710                unsafe {
711                    zvexx_load_helpers::execute_indexed_load(
712                        ext_state,
713                        memory,
714                        vd,
715                        vs2,
716                        vm,
717                        rs1_value.as_u64(),
718                        vtype.vsew().as_eew(),
719                        index_eew,
720                        data_group_regs,
721                        nf,
722                    )?;
723                }
724            }
725
726            // Indexed-ordered segment load: functionally identical to `Vluxseg` for a software
727            // interpreter
728            Self::Vloxseg {
729                vd,
730                rs1: _,
731                vs2,
732                eew: index_eew,
733                vm_nf,
734            } => {
735                let vm = vm_nf.vm();
736                let nf = vm_nf.nf();
737                if !ext_state.vector_instructions_allowed() {
738                    ::core::hint::cold_path();
739                    return Err(ExecutionError::IllegalInstruction {
740                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
741                    });
742                }
743                let Some(vtype) = ext_state.vtype() else {
744                    ::core::hint::cold_path();
745                    return Err(ExecutionError::IllegalInstruction {
746                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
747                    });
748                };
749                let data_group_regs = vtype.vlmul().register_count();
750                let index_group_regs = vtype
751                    .vlmul()
752                    .index_register_count(index_eew, vtype.vsew())
753                    .ok_or(ExecutionError::IllegalInstruction {
754                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
755                    })?;
756                zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
757                    program_counter,
758                    vd,
759                    vm,
760                    data_group_regs,
761                    nf,
762                )?;
763                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
764                    program_counter,
765                    vs2,
766                    index_group_regs,
767                )?;
768                for f in 0..nf.fields_per_segment() {
769                    // SAFETY: `vd.to_bits() + f * data_group_regs < 32` because
770                    // `validate_segment_registers` established `vd.to_bits() + nf * data_group_regs
771                    // <= 32` and `f < nf`. The value is in [0, 31], so it is a valid `VReg`
772                    // encoding.
773                    let field_vd = unsafe {
774                        VReg::from_bits(vd.to_bits() + f * data_group_regs).unwrap_unchecked()
775                    };
776                    if zvexx_load_helpers::groups_overlap(
777                        field_vd,
778                        data_group_regs,
779                        vs2,
780                        index_group_regs,
781                    ) {
782                        ::core::hint::cold_path();
783                        return Err(ExecutionError::IllegalInstruction {
784                            address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
785                        });
786                    }
787                }
788                // SAFETY: preconditions identical to `Vluxseg`; see that arm for the full
789                // argument
790                unsafe {
791                    zvexx_load_helpers::execute_indexed_load(
792                        ext_state,
793                        memory,
794                        vd,
795                        vs2,
796                        vm,
797                        rs1_value.as_u64(),
798                        vtype.vsew().as_eew(),
799                        index_eew,
800                        data_group_regs,
801                        nf,
802                    )?;
803                }
804            }
805        }
806
807        Ok(ControlFlow::Continue(Default::default()))
808    }
809}