Skip to main content

ab_riscv_interpreter/v/zvexx/
load.rs

1//! ZveXx vector load instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zvexx_load_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zvexx::zvexx_helpers;
9use crate::{
10    ExecutableInstruction, ExecutableInstructionCsr, ExecutableInstructionOperands, ExecutionError,
11    ProgramCounter, RegisterFile, Rs1Rs2OperandValues, Rs1Rs2Operands, VirtualMemory,
12};
13use ab_riscv_macros::instruction_execution;
14use ab_riscv_primitives::prelude::*;
15use core::fmt;
16use core::ops::ControlFlow;
17
18#[instruction_execution]
19impl<Reg> ExecutableInstructionOperands for ZveXxLoadInstruction<Reg> where Reg: Register {}
20
21#[instruction_execution]
22impl<Reg, ExtState, CustomError> ExecutableInstructionCsr<ExtState, CustomError>
23    for ZveXxLoadInstruction<Reg>
24where
25    Reg: Register,
26{
27}
28
29#[instruction_execution]
30impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
31    ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
32    for ZveXxLoadInstruction<Reg>
33where
34    Reg: Register,
35    Regs: RegisterFile<Reg>,
36    ExtState: VectorRegistersExt<Reg, CustomError>,
37    [(); ExtState::ELEN as usize]:,
38    [(); ExtState::VLEN as usize]:,
39    [(); ExtState::VLENB as usize]:,
40    Memory: VirtualMemory,
41    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
42    CustomError: fmt::Debug,
43{
44    #[inline(always)]
45    fn execute(
46        self,
47        Rs1Rs2OperandValues {
48            rs1_value,
49            rs2_value,
50        }: Rs1Rs2OperandValues<<Self::Reg as Register>::Type>,
51        _regs: &mut Regs,
52        ext_state: &mut ExtState,
53        memory: &mut Memory,
54        program_counter: &mut PC,
55        _system_instruction_handler: &mut InstructionHandler,
56    ) -> Result<
57        ControlFlow<(), (Self::Reg, <Self::Reg as Register>::Type)>,
58        ExecutionError<Reg::Type, CustomError>,
59    > {
60        match self {
61            // Whole-register load: loads `nreg` consecutive registers starting at `vd` directly
62            // from memory. `vd` must be aligned to `nreg`. Ignores vtype, vl, vstart, masking.
63            Self::Vlr {
64                vd,
65                rs1: _,
66                nreg,
67                eew: _,
68            } => {
69                let nreg = nreg.num_registers();
70                if !ext_state.vector_instructions_allowed() {
71                    return Err(ExecutionError::IllegalInstruction {
72                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
73                    });
74                }
75                if vd.to_bits() % nreg != 0 {
76                    return Err(ExecutionError::IllegalInstruction {
77                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
78                    });
79                }
80                let base = rs1_value.as_u64();
81                let vlenb = u64::from(ExtState::VLENB);
82                for reg_off in 0..nreg {
83                    // SAFETY: the decoder guarantees nreg in {1,2,4,8} and vd is nreg-aligned
84                    // (checked above), so vd.to_bits() + nreg - 1 <= 31.
85                    let reg = unsafe { VReg::from_bits(vd.to_bits() + reg_off).unwrap_unchecked() };
86                    let bytes = memory
87                        .read_slice(base + u64::from(reg_off) * vlenb, ExtState::VLENB)
88                        .inspect_err(|_error| {
89                            if reg_off > 0 {
90                                ext_state.mark_vs_dirty();
91                                ext_state.reset_vstart();
92                            }
93                        })?;
94                    ext_state.write_vregs().get_mut(reg).copy_from_slice(bytes);
95                }
96                ext_state.mark_vs_dirty();
97                ext_state.reset_vstart();
98            }
99
100            // Mask load: loads ceil(vl / 8) bytes from base into vd with no masking applied.
101            // Does not require a valid vtype: when vill is set vl is 0, so zero bytes are read.
102            Self::Vlm { vd, rs1: _ } => {
103                if !ext_state.vector_instructions_allowed() {
104                    return Err(ExecutionError::IllegalInstruction {
105                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
106                    });
107                }
108                let vl = ext_state.vl();
109                let byte_count = vl.div_ceil(u8::BITS);
110                if byte_count > 0 {
111                    let base = rs1_value.as_u64();
112                    let bytes = memory.read_slice(base, byte_count)?;
113                    // SAFETY: `bytes.len() == byte_count = vl.div_ceil(8) <= VLEN / 8 = VLENB`
114                    // because `vl <= VLMAX <= VLEN`, so `..bytes.len()` is in bounds within the
115                    // `VLENB`-byte destination register.
116                    unsafe {
117                        ext_state
118                            .write_vregs()
119                            .get_mut(vd)
120                            .get_unchecked_mut(..bytes.len())
121                            .copy_from_slice(bytes);
122                    }
123                }
124                ext_state.mark_vs_dirty();
125                ext_state.reset_vstart();
126            }
127
128            // Unit-stride load.
129            //
130            // Destination EMUL = EEW/SEW * LMUL, computed via `index_register_count`. This
131            // gives `group_regs` such that `VLMAX = group_regs * VLENB / eew.bytes()` matches
132            // the architectural `vl`.
133            Self::Vle {
134                vd,
135                rs1: _,
136                vm,
137                eew,
138            } => {
139                if !ext_state.vector_instructions_allowed() {
140                    return Err(ExecutionError::IllegalInstruction {
141                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
142                    });
143                }
144                let vtype = ext_state
145                    .vtype()
146                    .ok_or(ExecutionError::IllegalInstruction {
147                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
148                    })?;
149                let group_regs = vtype
150                    .vlmul()
151                    .index_register_count(eew, vtype.vsew())
152                    .ok_or(ExecutionError::IllegalInstruction {
153                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
154                    })?;
155                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
156                    program_counter,
157                    vd,
158                    group_regs,
159                )?;
160                if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
161                    return Err(ExecutionError::IllegalInstruction {
162                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
163                    });
164                }
165                // SAFETY:
166                // - alignment: `check_register_group_alignment` verified `vd % group_regs == 0` and
167                //   `vd + group_regs <= 32`, satisfying both the alignment and nf=1 bounds
168                //   preconditions
169                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL computed for
170                //   this `eew` and `vtype`, so this VLMAX equals the architectural VLMAX that
171                //   bounds `vl`
172                // - mask overlap: checked above via `groups_overlap`
173                unsafe {
174                    zvexx_load_helpers::execute_unit_stride_load(
175                        ext_state,
176                        memory,
177                        vd,
178                        vm,
179                        rs1_value.as_u64(),
180                        eew,
181                        group_regs,
182                        Nf::N1,
183                        false,
184                    )?;
185                }
186            }
187
188            // Fault-only-first unit-stride load. Preconditions identical to `Vle`.
189            Self::Vleff {
190                vd,
191                rs1: _,
192                vm,
193                eew,
194            } => {
195                if !ext_state.vector_instructions_allowed() {
196                    return Err(ExecutionError::IllegalInstruction {
197                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
198                    });
199                }
200                let vtype = ext_state
201                    .vtype()
202                    .ok_or(ExecutionError::IllegalInstruction {
203                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
204                    })?;
205                let group_regs = vtype
206                    .vlmul()
207                    .index_register_count(eew, vtype.vsew())
208                    .ok_or(ExecutionError::IllegalInstruction {
209                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
210                    })?;
211                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
212                    program_counter,
213                    vd,
214                    group_regs,
215                )?;
216                if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
217                    return Err(ExecutionError::IllegalInstruction {
218                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
219                    });
220                }
221                // SAFETY: preconditions identical to `Vle`; see that arm for the full argument.
222                unsafe {
223                    zvexx_load_helpers::execute_unit_stride_load(
224                        ext_state,
225                        memory,
226                        vd,
227                        vm,
228                        rs1_value.as_u64(),
229                        eew,
230                        group_regs,
231                        Nf::N1,
232                        true,
233                    )?;
234                }
235            }
236
237            // Strided load. Destination EMUL = EEW/SEW * LMUL as for unit-stride.
238            Self::Vlse {
239                vd,
240                rs1: _,
241                rs2: _,
242                vm,
243                eew,
244            } => {
245                if !ext_state.vector_instructions_allowed() {
246                    return Err(ExecutionError::IllegalInstruction {
247                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
248                    });
249                }
250                let vtype = ext_state
251                    .vtype()
252                    .ok_or(ExecutionError::IllegalInstruction {
253                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
254                    })?;
255                let group_regs = vtype
256                    .vlmul()
257                    .index_register_count(eew, vtype.vsew())
258                    .ok_or(ExecutionError::IllegalInstruction {
259                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
260                    })?;
261                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
262                    program_counter,
263                    vd,
264                    group_regs,
265                )?;
266                if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
267                    return Err(ExecutionError::IllegalInstruction {
268                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
269                    });
270                }
271                // rs2 holds a signed stride; reinterpret the register value as signed
272                let stride = rs2_value.as_i64();
273                // SAFETY:
274                // - alignment and nf=1 bounds: `check_register_group_alignment` verified `vd %
275                //   group_regs == 0` and `vd + group_regs <= 32`
276                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL for this
277                //   `eew` and `vtype`, so this VLMAX equals the architectural VLMAX bounding `vl`
278                // - mask overlap: checked above via `groups_overlap`
279                unsafe {
280                    zvexx_load_helpers::execute_strided_load(
281                        ext_state,
282                        memory,
283                        vd,
284                        vm,
285                        rs1_value.as_u64(),
286                        stride,
287                        eew,
288                        group_regs,
289                        Nf::N1,
290                    )?;
291                }
292            }
293
294            // Indexed-unordered load: eew is the index EEW; data EEW comes from vtype.vsew().
295            // The data destination uses the base LMUL (data EEW = SEW for indexed loads).
296            Self::Vluxei {
297                vd,
298                rs1: _,
299                vs2,
300                vm,
301                eew: index_eew,
302            } => {
303                if !ext_state.vector_instructions_allowed() {
304                    return Err(ExecutionError::IllegalInstruction {
305                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
306                    });
307                }
308                let vtype = ext_state
309                    .vtype()
310                    .ok_or(ExecutionError::IllegalInstruction {
311                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
312                    })?;
313                let data_group_regs = vtype.vlmul().register_count();
314                let index_group_regs = vtype
315                    .vlmul()
316                    .index_register_count(index_eew, vtype.vsew())
317                    .ok_or(ExecutionError::IllegalInstruction {
318                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
319                    })?;
320                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
321                    program_counter,
322                    vd,
323                    data_group_regs,
324                )?;
325                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
326                    program_counter,
327                    vs2,
328                    index_group_regs,
329                )?;
330                // Non-segment indexed loads permit `vd`/`vs2` overlap under the general
331                // EEW-relative overlap rule (e.g. when the data and index EEW match); only
332                // disallowed overlaps are reserved.
333                if !zvexx_load_helpers::indexed_load_overlap_allowed(
334                    vd,
335                    data_group_regs,
336                    vs2,
337                    index_group_regs,
338                    index_eew,
339                    vtype.vsew(),
340                    vtype.vlmul(),
341                ) {
342                    return Err(ExecutionError::IllegalInstruction {
343                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
344                    });
345                }
346                if !vm && zvexx_load_helpers::groups_overlap(vd, data_group_regs, VReg::V0, 1) {
347                    return Err(ExecutionError::IllegalInstruction {
348                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
349                    });
350                }
351                // SAFETY:
352                // - data alignment/nf=1 bounds: `check_register_group_alignment` on `vd`
353                // - index alignment/bounds: `check_register_group_alignment` on `vs2`
354                // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: data EEW = SEW and
355                //   `data_group_regs = LMUL`, so VLMAX = LMUL * VLEN / SEW, which bounds `vl`
356                // - `vl <= index_group_regs * VLENB / index_eew.bytes()`: `index_group_regs` is
357                //   EMUL_index defined so this VLMAX_index equals the architectural VLMAX
358                // - `vd`/`vs2` overlap (if any) satisfies the general EEW overlap rule, checked
359                //   above; the in-order element loop reads index element `i` before writing data
360                //   element `i`, and that rule guarantees a data write never clobbers an index
361                //   element that has not yet been consumed
362                // - mask overlap: checked above via `groups_overlap`
363                unsafe {
364                    zvexx_load_helpers::execute_indexed_load(
365                        ext_state,
366                        memory,
367                        vd,
368                        vs2,
369                        vm,
370                        rs1_value.as_u64(),
371                        vtype.vsew().as_eew(),
372                        index_eew,
373                        data_group_regs,
374                        Nf::N1,
375                    )?;
376                }
377            }
378
379            // Indexed-ordered load: functionally identical to `Vluxei` for a software
380            // interpreter; memory access ordering has no observable effect here.
381            Self::Vloxei {
382                vd,
383                rs1: _,
384                vs2,
385                vm,
386                eew: index_eew,
387            } => {
388                if !ext_state.vector_instructions_allowed() {
389                    return Err(ExecutionError::IllegalInstruction {
390                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
391                    });
392                }
393                let vtype = ext_state
394                    .vtype()
395                    .ok_or(ExecutionError::IllegalInstruction {
396                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
397                    })?;
398                let data_group_regs = vtype.vlmul().register_count();
399                let index_group_regs = vtype
400                    .vlmul()
401                    .index_register_count(index_eew, vtype.vsew())
402                    .ok_or(ExecutionError::IllegalInstruction {
403                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
404                    })?;
405                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
406                    program_counter,
407                    vd,
408                    data_group_regs,
409                )?;
410                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
411                    program_counter,
412                    vs2,
413                    index_group_regs,
414                )?;
415                // Non-segment indexed loads permit `vd`/`vs2` overlap under the general
416                // EEW-relative overlap rule; see the `Vluxei` arm for details.
417                if !zvexx_load_helpers::indexed_load_overlap_allowed(
418                    vd,
419                    data_group_regs,
420                    vs2,
421                    index_group_regs,
422                    index_eew,
423                    vtype.vsew(),
424                    vtype.vlmul(),
425                ) {
426                    return Err(ExecutionError::IllegalInstruction {
427                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
428                    });
429                }
430                if !vm && zvexx_load_helpers::groups_overlap(vd, data_group_regs, VReg::V0, 1) {
431                    return Err(ExecutionError::IllegalInstruction {
432                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
433                    });
434                }
435                // SAFETY: preconditions identical to `Vluxei`; see that arm for the full
436                // argument.
437                unsafe {
438                    zvexx_load_helpers::execute_indexed_load(
439                        ext_state,
440                        memory,
441                        vd,
442                        vs2,
443                        vm,
444                        rs1_value.as_u64(),
445                        vtype.vsew().as_eew(),
446                        index_eew,
447                        data_group_regs,
448                        Nf::N1,
449                    )?;
450                }
451            }
452
453            // Unit-stride segment load. EMUL = EEW/SEW * LMUL per field group.
454            Self::Vlseg {
455                vd,
456                rs1: _,
457                eew,
458                vm_nf,
459            } => {
460                let vm = vm_nf.vm();
461                let nf = vm_nf.nf();
462                if !ext_state.vector_instructions_allowed() {
463                    return Err(ExecutionError::IllegalInstruction {
464                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
465                    });
466                }
467                let vtype = ext_state
468                    .vtype()
469                    .ok_or(ExecutionError::IllegalInstruction {
470                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
471                    })?;
472                let group_regs = vtype
473                    .vlmul()
474                    .index_register_count(eew, vtype.vsew())
475                    .ok_or(ExecutionError::IllegalInstruction {
476                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
477                    })?;
478                zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
479                    program_counter,
480                    vd,
481                    vm,
482                    group_regs,
483                    nf,
484                )?;
485                // SAFETY:
486                // - alignment and nf-group bounds: `validate_segment_registers` verified `vd %
487                //   group_regs == 0` and `vd + nf * group_regs <= 32`
488                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL for this
489                //   `eew` and `vtype`, so this VLMAX equals the architectural VLMAX bounding `vl`
490                // - mask overlap with v0: `validate_segment_registers` checked `vd.to_bits() != 0`
491                //   when `vm=false`, ensuring no field group contains v0
492                unsafe {
493                    zvexx_load_helpers::execute_unit_stride_load(
494                        ext_state,
495                        memory,
496                        vd,
497                        vm,
498                        rs1_value.as_u64(),
499                        eew,
500                        group_regs,
501                        nf,
502                        false,
503                    )?;
504                }
505            }
506
507            // Fault-only-first segment load. Preconditions identical to `Vlseg`.
508            Self::Vlsegff {
509                vd,
510                rs1: _,
511                eew,
512                vm_nf,
513            } => {
514                let vm = vm_nf.vm();
515                let nf = vm_nf.nf();
516                if !ext_state.vector_instructions_allowed() {
517                    return Err(ExecutionError::IllegalInstruction {
518                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
519                    });
520                }
521                let vtype = ext_state
522                    .vtype()
523                    .ok_or(ExecutionError::IllegalInstruction {
524                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
525                    })?;
526                let group_regs = vtype
527                    .vlmul()
528                    .index_register_count(eew, vtype.vsew())
529                    .ok_or(ExecutionError::IllegalInstruction {
530                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
531                    })?;
532                zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
533                    program_counter,
534                    vd,
535                    vm,
536                    group_regs,
537                    nf,
538                )?;
539                // SAFETY: preconditions identical to `Vlseg`; see that arm for the full argument.
540                unsafe {
541                    zvexx_load_helpers::execute_unit_stride_load(
542                        ext_state,
543                        memory,
544                        vd,
545                        vm,
546                        rs1_value.as_u64(),
547                        eew,
548                        group_regs,
549                        nf,
550                        true,
551                    )?;
552                }
553            }
554
555            // Strided segment load. EMUL = EEW/SEW * LMUL as for `Vlse`.
556            Self::Vlsseg {
557                vd,
558                rs1: _,
559                rs2: _,
560                eew,
561                vm_nf,
562            } => {
563                let vm = vm_nf.vm();
564                let nf = vm_nf.nf();
565                if !ext_state.vector_instructions_allowed() {
566                    return Err(ExecutionError::IllegalInstruction {
567                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
568                    });
569                }
570                let vtype = ext_state
571                    .vtype()
572                    .ok_or(ExecutionError::IllegalInstruction {
573                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
574                    })?;
575                let group_regs = vtype
576                    .vlmul()
577                    .index_register_count(eew, vtype.vsew())
578                    .ok_or(ExecutionError::IllegalInstruction {
579                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
580                    })?;
581                zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
582                    program_counter,
583                    vd,
584                    vm,
585                    group_regs,
586                    nf,
587                )?;
588                let stride = rs2_value.as_i64();
589                // SAFETY:
590                // - alignment and nf-group bounds: `validate_segment_registers` verified `vd %
591                //   group_regs == 0` and `vd + nf * group_regs <= 32`
592                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is EMUL for this `eew`
593                //   and `vtype`
594                // - mask overlap: `validate_segment_registers` checked `vd.to_bits() != 0` when
595                //   `vm=false`
596                unsafe {
597                    zvexx_load_helpers::execute_strided_load(
598                        ext_state,
599                        memory,
600                        vd,
601                        vm,
602                        rs1_value.as_u64(),
603                        stride,
604                        eew,
605                        group_regs,
606                        nf,
607                    )?;
608                }
609            }
610
611            // Indexed-unordered segment load
612            Self::Vluxseg {
613                vd,
614                rs1: _,
615                vs2,
616                eew: index_eew,
617                vm_nf,
618            } => {
619                let vm = vm_nf.vm();
620                let nf = vm_nf.nf();
621                if !ext_state.vector_instructions_allowed() {
622                    return Err(ExecutionError::IllegalInstruction {
623                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
624                    });
625                }
626                let vtype = ext_state
627                    .vtype()
628                    .ok_or(ExecutionError::IllegalInstruction {
629                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
630                    })?;
631                let data_group_regs = vtype.vlmul().register_count();
632                let index_group_regs = vtype
633                    .vlmul()
634                    .index_register_count(index_eew, vtype.vsew())
635                    .ok_or(ExecutionError::IllegalInstruction {
636                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
637                    })?;
638                // `validate_segment_registers` is called before the per-field overlap loop so
639                // that `vd.to_bits() + f * data_group_regs < 32` is established for all `f < nf`,
640                // which is required by the `VReg::from_bits` call inside the loop.
641                zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
642                    program_counter,
643                    vd,
644                    vm,
645                    data_group_regs,
646                    nf,
647                )?;
648                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
649                    program_counter,
650                    vs2,
651                    index_group_regs,
652                )?;
653                for f in 0..nf.fields_per_segment() {
654                    // SAFETY: `vd.to_bits() + f * data_group_regs < 32` because
655                    // `validate_segment_registers` established `vd.to_bits() + nf * data_group_regs
656                    // <= 32` and `f < nf`. The value is in [0, 31], so it is a valid `VReg`
657                    // encoding.
658                    let field_vd = unsafe {
659                        VReg::from_bits(vd.to_bits() + f * data_group_regs).unwrap_unchecked()
660                    };
661                    if zvexx_load_helpers::groups_overlap(
662                        field_vd,
663                        data_group_regs,
664                        vs2,
665                        index_group_regs,
666                    ) {
667                        return Err(ExecutionError::IllegalInstruction {
668                            address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
669                        });
670                    }
671                }
672                // SAFETY:
673                // - data alignment/nf-group bounds: `validate_segment_registers` verified `vd %
674                //   data_group_regs == 0` and `vd + nf * data_group_regs <= 32`
675                // - index alignment/bounds: `check_register_group_alignment` verified `vs2 %
676                //   EMUL_index == 0` and `vs2 + EMUL_index <= 32`
677                // - no field/index group overlap: verified by the loop above
678                // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: data EEW = SEW and
679                //   `data_group_regs = LMUL`, so VLMAX = LMUL * VLEN / SEW bounds `vl`
680                // - `vl <= EMUL_index * VLENB / index_eew.bytes()`: `index_group_regs` (EMUL_index)
681                //   is defined so this VLMAX_index equals the architectural VLMAX
682                // - mask overlap: `validate_segment_registers` checked `vd.to_bits() != 0` when
683                //   `vm=false`, and no field group starts at 0 since groups are contiguous from
684                //   `vd` which is nonzero
685                unsafe {
686                    zvexx_load_helpers::execute_indexed_load(
687                        ext_state,
688                        memory,
689                        vd,
690                        vs2,
691                        vm,
692                        rs1_value.as_u64(),
693                        vtype.vsew().as_eew(),
694                        index_eew,
695                        data_group_regs,
696                        nf,
697                    )?;
698                }
699            }
700
701            // Indexed-ordered segment load: functionally identical to `Vluxseg` for a software
702            // interpreter
703            Self::Vloxseg {
704                vd,
705                rs1: _,
706                vs2,
707                eew: index_eew,
708                vm_nf,
709            } => {
710                let vm = vm_nf.vm();
711                let nf = vm_nf.nf();
712                if !ext_state.vector_instructions_allowed() {
713                    return Err(ExecutionError::IllegalInstruction {
714                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
715                    });
716                }
717                let vtype = ext_state
718                    .vtype()
719                    .ok_or(ExecutionError::IllegalInstruction {
720                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
721                    })?;
722                let data_group_regs = vtype.vlmul().register_count();
723                let index_group_regs = vtype
724                    .vlmul()
725                    .index_register_count(index_eew, vtype.vsew())
726                    .ok_or(ExecutionError::IllegalInstruction {
727                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
728                    })?;
729                zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
730                    program_counter,
731                    vd,
732                    vm,
733                    data_group_regs,
734                    nf,
735                )?;
736                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
737                    program_counter,
738                    vs2,
739                    index_group_regs,
740                )?;
741                for f in 0..nf.fields_per_segment() {
742                    // SAFETY: `vd.to_bits() + f * data_group_regs < 32` because
743                    // `validate_segment_registers` established `vd.to_bits() + nf * data_group_regs
744                    // <= 32` and `f < nf`. The value is in [0, 31], so it is a valid `VReg`
745                    // encoding.
746                    let field_vd = unsafe {
747                        VReg::from_bits(vd.to_bits() + f * data_group_regs).unwrap_unchecked()
748                    };
749                    if zvexx_load_helpers::groups_overlap(
750                        field_vd,
751                        data_group_regs,
752                        vs2,
753                        index_group_regs,
754                    ) {
755                        return Err(ExecutionError::IllegalInstruction {
756                            address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
757                        });
758                    }
759                }
760                // SAFETY: preconditions identical to `Vluxseg`; see that arm for the full
761                // argument
762                unsafe {
763                    zvexx_load_helpers::execute_indexed_load(
764                        ext_state,
765                        memory,
766                        vd,
767                        vs2,
768                        vm,
769                        rs1_value.as_u64(),
770                        vtype.vsew().as_eew(),
771                        index_eew,
772                        data_group_regs,
773                        nf,
774                    )?;
775                }
776            }
777        }
778
779        Ok(ControlFlow::Continue(Default::default()))
780    }
781}