Skip to main content

ab_riscv_interpreter/v/zve64x/
load.rs

1//! Zve64x vector load instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zve64x_load_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::zve64x_helpers;
9use crate::{
10    ExecutableInstruction, ExecutionError, InterpreterState, ProgramCounter, VirtualMemory,
11};
12use ab_riscv_macros::instruction_execution;
13use ab_riscv_primitives::instructions::v::zve64x::load::Zve64xLoadInstruction;
14use ab_riscv_primitives::registers::general_purpose::{RegType, Register};
15use ab_riscv_primitives::registers::vector::VReg;
16use core::fmt;
17use core::ops::ControlFlow;
18
19#[instruction_execution]
20impl<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>
21    ExecutableInstruction<
22        InterpreterState<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>,
23        CustomError,
24    > for Zve64xLoadInstruction<Reg>
25where
26    Reg: Register,
27    [(); Reg::N]:,
28    ExtState: VectorRegistersExt<Reg, CustomError>,
29    [(); ExtState::ELEN as usize]:,
30    [(); ExtState::VLEN as usize]:,
31    [(); ExtState::VLENB as usize]:,
32    Memory: VirtualMemory,
33    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
34    CustomError: fmt::Debug,
35{
36    #[inline(always)]
37    fn execute(
38        self,
39        state: &mut InterpreterState<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>,
40    ) -> Result<ControlFlow<()>, ExecutionError<Reg::Type, CustomError>> {
41        match self {
42            // Whole-register load: loads `nreg` consecutive registers starting at `vd` directly
43            // from memory. `vd` must be aligned to `nreg`. Ignores vtype, vl, vstart, masking.
44            Self::Vlr {
45                vd,
46                rs1,
47                nreg,
48                eew: _,
49            } => {
50                if !state.ext_state.vector_instructions_allowed() {
51                    Err(ExecutionError::IllegalInstruction {
52                        address: state
53                            .instruction_fetcher
54                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
55                    })?;
56                }
57                if u32::from(vd.bits()) % u32::from(nreg) != 0 {
58                    Err(ExecutionError::IllegalInstruction {
59                        address: state
60                            .instruction_fetcher
61                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
62                    })?;
63                }
64                let base = state.regs.read(rs1).as_u64();
65                let vlenb = u64::from(ExtState::VLENB);
66                for reg_off in 0..u64::from(nreg) {
67                    let reg_idx = u64::from(vd.bits()) + reg_off;
68                    let bytes = match state
69                        .memory
70                        .read_slice(base + reg_off * vlenb, ExtState::VLENB)
71                    {
72                        Ok(bytes) => bytes,
73                        Err(error) => {
74                            if reg_off > 0 {
75                                state.ext_state.mark_vs_dirty();
76                                state.ext_state.reset_vstart();
77                            }
78                            Err(ExecutionError::MemoryAccess(error))?
79                        }
80                    };
81                    // SAFETY: `reg_idx < 32` because the decoder guarantees nreg in {1,2,4,8}
82                    // and vd is nreg-aligned (checked above), so vd.bits() + nreg - 1 <= 31.
83                    // `read_slice` returns a slice of exactly `ExtState::VLENB` bytes on success,
84                    // matching `dst`'s length, so `copy_from_slice` cannot panic.
85                    let dst = unsafe {
86                        state
87                            .ext_state
88                            .write_vreg()
89                            .get_unchecked_mut(reg_idx as usize)
90                    };
91                    dst.copy_from_slice(bytes);
92                }
93                state.ext_state.mark_vs_dirty();
94                state.ext_state.reset_vstart();
95            }
96
97            // Mask load: loads ceil(vl / 8) bytes from base into vd with no masking applied.
98            // Does not require a valid vtype: when vill is set vl is 0, so zero bytes are read.
99            Self::Vlm { vd, rs1 } => {
100                if !state.ext_state.vector_instructions_allowed() {
101                    Err(ExecutionError::IllegalInstruction {
102                        address: state
103                            .instruction_fetcher
104                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
105                    })?;
106                }
107                let vl = state.ext_state.vl();
108                let byte_count = vl.div_ceil(u8::BITS);
109                if byte_count > 0 {
110                    let base = state.regs.read(rs1).as_u64();
111                    let bytes = state.memory.read_slice(base, byte_count)?;
112                    // SAFETY: `vd.bits() < 32` is guaranteed by the `VReg` type.
113                    // `bytes.len() == byte_count = vl.div_ceil(8) <= VLEN / 8 = VLENB` because
114                    // `vl <= VLMAX <= VLEN`, so `..bytes.len()` is in bounds within the
115                    // `VLENB`-byte destination register.
116                    unsafe {
117                        state
118                            .ext_state
119                            .write_vreg()
120                            .get_unchecked_mut(usize::from(vd.bits()))
121                            .get_unchecked_mut(..bytes.len())
122                            .copy_from_slice(bytes);
123                    }
124                }
125                state.ext_state.mark_vs_dirty();
126                state.ext_state.reset_vstart();
127            }
128
129            // Unit-stride load.
130            //
131            // Destination EMUL = EEW/SEW * LMUL, computed via `index_register_count`. This
132            // gives `group_regs` such that `VLMAX = group_regs * VLENB / eew.bytes()` matches
133            // the architectural `vl`.
134            Self::Vle { vd, rs1, vm, eew } => {
135                if !state.ext_state.vector_instructions_allowed() {
136                    Err(ExecutionError::IllegalInstruction {
137                        address: state
138                            .instruction_fetcher
139                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
140                    })?;
141                }
142                let vtype = state
143                    .ext_state
144                    .vtype()
145                    .ok_or(ExecutionError::IllegalInstruction {
146                        address: state
147                            .instruction_fetcher
148                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
149                    })?;
150                let group_regs = vtype
151                    .vlmul()
152                    .index_register_count(eew, vtype.vsew())
153                    .ok_or(ExecutionError::IllegalInstruction {
154                        address: state
155                            .instruction_fetcher
156                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
157                    })?;
158                zve64x_load_helpers::check_register_group_alignment(state, vd, group_regs)?;
159                if !vm && zve64x_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
160                    Err(ExecutionError::IllegalInstruction {
161                        address: state
162                            .instruction_fetcher
163                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
164                    })?;
165                }
166                // SAFETY:
167                // - 1 <= MAX_NF
168                // - alignment: `check_register_group_alignment` verified `vd % group_regs == 0` and
169                //   `vd + group_regs <= 32`, satisfying both the alignment and nf=1 bounds
170                //   preconditions
171                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL computed for
172                //   this `eew` and `vtype`, so this VLMAX equals the architectural VLMAX that
173                //   bounds `vl`
174                // - mask overlap: checked above via `groups_overlap`
175                unsafe {
176                    zve64x_load_helpers::execute_unit_stride_load(
177                        state,
178                        vd,
179                        vm,
180                        state.ext_state.vl(),
181                        u32::from(state.ext_state.vstart()),
182                        state.regs.read(rs1).as_u64(),
183                        eew,
184                        group_regs,
185                        1,
186                        false,
187                    )?;
188                }
189            }
190
191            // Fault-only-first unit-stride load. Preconditions identical to `Vle`.
192            Self::Vleff { vd, rs1, vm, eew } => {
193                if !state.ext_state.vector_instructions_allowed() {
194                    Err(ExecutionError::IllegalInstruction {
195                        address: state
196                            .instruction_fetcher
197                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
198                    })?;
199                }
200                let vtype = state
201                    .ext_state
202                    .vtype()
203                    .ok_or(ExecutionError::IllegalInstruction {
204                        address: state
205                            .instruction_fetcher
206                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
207                    })?;
208                let group_regs = vtype
209                    .vlmul()
210                    .index_register_count(eew, vtype.vsew())
211                    .ok_or(ExecutionError::IllegalInstruction {
212                        address: state
213                            .instruction_fetcher
214                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
215                    })?;
216                zve64x_load_helpers::check_register_group_alignment(state, vd, group_regs)?;
217                if !vm && zve64x_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
218                    Err(ExecutionError::IllegalInstruction {
219                        address: state
220                            .instruction_fetcher
221                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
222                    })?;
223                }
224                // SAFETY: preconditions identical to `Vle`; see that arm for the full argument.
225                unsafe {
226                    zve64x_load_helpers::execute_unit_stride_load(
227                        state,
228                        vd,
229                        vm,
230                        state.ext_state.vl(),
231                        u32::from(state.ext_state.vstart()),
232                        state.regs.read(rs1).as_u64(),
233                        eew,
234                        group_regs,
235                        1,
236                        true,
237                    )?;
238                }
239            }
240
241            // Strided load. Destination EMUL = EEW/SEW * LMUL as for unit-stride.
242            Self::Vlse {
243                vd,
244                rs1,
245                rs2,
246                vm,
247                eew,
248            } => {
249                if !state.ext_state.vector_instructions_allowed() {
250                    Err(ExecutionError::IllegalInstruction {
251                        address: state
252                            .instruction_fetcher
253                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
254                    })?;
255                }
256                let vtype = state
257                    .ext_state
258                    .vtype()
259                    .ok_or(ExecutionError::IllegalInstruction {
260                        address: state
261                            .instruction_fetcher
262                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
263                    })?;
264                let group_regs = vtype
265                    .vlmul()
266                    .index_register_count(eew, vtype.vsew())
267                    .ok_or(ExecutionError::IllegalInstruction {
268                        address: state
269                            .instruction_fetcher
270                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
271                    })?;
272                zve64x_load_helpers::check_register_group_alignment(state, vd, group_regs)?;
273                if !vm && zve64x_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
274                    Err(ExecutionError::IllegalInstruction {
275                        address: state
276                            .instruction_fetcher
277                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
278                    })?;
279                }
280                // rs2 holds a signed stride; reinterpret the register value as signed.
281                let stride = state.regs.read(rs2).as_u64().cast_signed();
282                // SAFETY:
283                // - alignment and nf=1 bounds: `check_register_group_alignment` verified `vd %
284                //   group_regs == 0` and `vd + group_regs <= 32`
285                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL for this
286                //   `eew` and `vtype`, so this VLMAX equals the architectural VLMAX bounding `vl`
287                // - mask overlap: checked above via `groups_overlap`
288                unsafe {
289                    zve64x_load_helpers::execute_strided_load(
290                        state,
291                        vd,
292                        vm,
293                        state.ext_state.vl(),
294                        u32::from(state.ext_state.vstart()),
295                        state.regs.read(rs1).as_u64(),
296                        stride,
297                        eew,
298                        group_regs,
299                        1,
300                    )?;
301                }
302            }
303
304            // Indexed-unordered load: eew is the index EEW; data EEW comes from vtype.vsew().
305            // The data destination uses the base LMUL (data EEW = SEW for indexed loads).
306            Self::Vluxei {
307                vd,
308                rs1,
309                vs2,
310                vm,
311                eew: index_eew,
312            } => {
313                if !state.ext_state.vector_instructions_allowed() {
314                    Err(ExecutionError::IllegalInstruction {
315                        address: state
316                            .instruction_fetcher
317                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
318                    })?;
319                }
320                let vtype = state
321                    .ext_state
322                    .vtype()
323                    .ok_or(ExecutionError::IllegalInstruction {
324                        address: state
325                            .instruction_fetcher
326                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
327                    })?;
328                let data_group_regs = vtype.vlmul().register_count();
329                let index_group_regs = vtype
330                    .vlmul()
331                    .index_register_count(index_eew, vtype.vsew())
332                    .ok_or(ExecutionError::IllegalInstruction {
333                        address: state
334                            .instruction_fetcher
335                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
336                    })?;
337                zve64x_load_helpers::check_register_group_alignment(state, vd, data_group_regs)?;
338                zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
339                if zve64x_load_helpers::groups_overlap(vd, data_group_regs, vs2, index_group_regs) {
340                    Err(ExecutionError::IllegalInstruction {
341                        address: state
342                            .instruction_fetcher
343                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
344                    })?;
345                }
346                if !vm && zve64x_load_helpers::groups_overlap(vd, data_group_regs, VReg::V0, 1) {
347                    Err(ExecutionError::IllegalInstruction {
348                        address: state
349                            .instruction_fetcher
350                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
351                    })?;
352                }
353                // SAFETY:
354                // - data alignment/nf=1 bounds: `check_register_group_alignment` on `vd`
355                // - index alignment/bounds: `check_register_group_alignment` on `vs2`
356                // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: data EEW = SEW and
357                //   `data_group_regs = LMUL`, so VLMAX = LMUL * VLEN / SEW, which bounds `vl`
358                // - `vl <= index_group_regs * VLENB / index_eew.bytes()`: `index_group_regs` is
359                //   EMUL_index defined so this VLMAX_index equals the architectural VLMAX
360                // - no overlap between data and index groups: checked above
361                // - mask overlap: checked above via `groups_overlap`
362                unsafe {
363                    zve64x_load_helpers::execute_indexed_load(
364                        state,
365                        vd,
366                        vs2,
367                        vm,
368                        state.ext_state.vl(),
369                        u32::from(state.ext_state.vstart()),
370                        state.regs.read(rs1).as_u64(),
371                        vtype.vsew().as_eew(),
372                        index_eew,
373                        data_group_regs,
374                        1,
375                    )?;
376                }
377            }
378
379            // Indexed-ordered load: functionally identical to `Vluxei` for a software
380            // interpreter; memory access ordering has no observable effect here.
381            Self::Vloxei {
382                vd,
383                rs1,
384                vs2,
385                vm,
386                eew: index_eew,
387            } => {
388                if !state.ext_state.vector_instructions_allowed() {
389                    Err(ExecutionError::IllegalInstruction {
390                        address: state
391                            .instruction_fetcher
392                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
393                    })?;
394                }
395                let vtype = state
396                    .ext_state
397                    .vtype()
398                    .ok_or(ExecutionError::IllegalInstruction {
399                        address: state
400                            .instruction_fetcher
401                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
402                    })?;
403                let data_group_regs = vtype.vlmul().register_count();
404                let index_group_regs = vtype
405                    .vlmul()
406                    .index_register_count(index_eew, vtype.vsew())
407                    .ok_or(ExecutionError::IllegalInstruction {
408                        address: state
409                            .instruction_fetcher
410                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
411                    })?;
412                zve64x_load_helpers::check_register_group_alignment(state, vd, data_group_regs)?;
413                zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
414                if zve64x_load_helpers::groups_overlap(vd, data_group_regs, vs2, index_group_regs) {
415                    Err(ExecutionError::IllegalInstruction {
416                        address: state
417                            .instruction_fetcher
418                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
419                    })?;
420                }
421                if !vm && zve64x_load_helpers::groups_overlap(vd, data_group_regs, VReg::V0, 1) {
422                    Err(ExecutionError::IllegalInstruction {
423                        address: state
424                            .instruction_fetcher
425                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
426                    })?;
427                }
428                // SAFETY: preconditions identical to `Vluxei`; see that arm for the full
429                // argument.
430                unsafe {
431                    zve64x_load_helpers::execute_indexed_load(
432                        state,
433                        vd,
434                        vs2,
435                        vm,
436                        state.ext_state.vl(),
437                        u32::from(state.ext_state.vstart()),
438                        state.regs.read(rs1).as_u64(),
439                        vtype.vsew().as_eew(),
440                        index_eew,
441                        data_group_regs,
442                        1,
443                    )?;
444                }
445            }
446
447            // Unit-stride segment load. EMUL = EEW/SEW * LMUL per field group.
448            Self::Vlseg {
449                vd,
450                rs1,
451                vm,
452                eew,
453                nf,
454            } => {
455                if !state.ext_state.vector_instructions_allowed() {
456                    Err(ExecutionError::IllegalInstruction {
457                        address: state
458                            .instruction_fetcher
459                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
460                    })?;
461                }
462                let vtype = state
463                    .ext_state
464                    .vtype()
465                    .ok_or(ExecutionError::IllegalInstruction {
466                        address: state
467                            .instruction_fetcher
468                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
469                    })?;
470                let group_regs = vtype
471                    .vlmul()
472                    .index_register_count(eew, vtype.vsew())
473                    .ok_or(ExecutionError::IllegalInstruction {
474                        address: state
475                            .instruction_fetcher
476                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
477                    })?;
478                zve64x_load_helpers::validate_segment_registers(state, vd, vm, group_regs, nf)?;
479                if nf > zve64x_load_helpers::MAX_NF {
480                    Err(ExecutionError::IllegalInstruction {
481                        address: state
482                            .instruction_fetcher
483                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
484                    })?;
485                }
486                // SAFETY:
487                // - `nf <= MAX_NF` checked above
488                // - alignment and nf-group bounds: `validate_segment_registers` verified `vd %
489                //   group_regs == 0` and `vd + nf * group_regs <= 32`
490                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL for this
491                //   `eew` and `vtype`, so this VLMAX equals the architectural VLMAX bounding `vl`
492                // - mask overlap with v0: `validate_segment_registers` checked `vd.bits() != 0`
493                //   when `vm=false`, ensuring no field group contains v0
494                unsafe {
495                    zve64x_load_helpers::execute_unit_stride_load(
496                        state,
497                        vd,
498                        vm,
499                        state.ext_state.vl(),
500                        u32::from(state.ext_state.vstart()),
501                        state.regs.read(rs1).as_u64(),
502                        eew,
503                        group_regs,
504                        nf,
505                        false,
506                    )?;
507                }
508            }
509
510            // Fault-only-first segment load. Preconditions identical to `Vlseg`.
511            Self::Vlsegff {
512                vd,
513                rs1,
514                vm,
515                eew,
516                nf,
517            } => {
518                if !state.ext_state.vector_instructions_allowed() {
519                    Err(ExecutionError::IllegalInstruction {
520                        address: state
521                            .instruction_fetcher
522                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
523                    })?;
524                }
525                let vtype = state
526                    .ext_state
527                    .vtype()
528                    .ok_or(ExecutionError::IllegalInstruction {
529                        address: state
530                            .instruction_fetcher
531                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
532                    })?;
533                let group_regs = vtype
534                    .vlmul()
535                    .index_register_count(eew, vtype.vsew())
536                    .ok_or(ExecutionError::IllegalInstruction {
537                        address: state
538                            .instruction_fetcher
539                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
540                    })?;
541                zve64x_load_helpers::validate_segment_registers(state, vd, vm, group_regs, nf)?;
542                if nf > zve64x_load_helpers::MAX_NF {
543                    Err(ExecutionError::IllegalInstruction {
544                        address: state
545                            .instruction_fetcher
546                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
547                    })?;
548                }
549                // SAFETY: preconditions identical to `Vlseg`; see that arm for the full argument.
550                unsafe {
551                    zve64x_load_helpers::execute_unit_stride_load(
552                        state,
553                        vd,
554                        vm,
555                        state.ext_state.vl(),
556                        u32::from(state.ext_state.vstart()),
557                        state.regs.read(rs1).as_u64(),
558                        eew,
559                        group_regs,
560                        nf,
561                        true,
562                    )?;
563                }
564            }
565
566            // Strided segment load. EMUL = EEW/SEW * LMUL as for `Vlse`.
567            Self::Vlsseg {
568                vd,
569                rs1,
570                rs2,
571                vm,
572                eew,
573                nf,
574            } => {
575                if !state.ext_state.vector_instructions_allowed() {
576                    Err(ExecutionError::IllegalInstruction {
577                        address: state
578                            .instruction_fetcher
579                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
580                    })?;
581                }
582                let vtype = state
583                    .ext_state
584                    .vtype()
585                    .ok_or(ExecutionError::IllegalInstruction {
586                        address: state
587                            .instruction_fetcher
588                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
589                    })?;
590                let group_regs = vtype
591                    .vlmul()
592                    .index_register_count(eew, vtype.vsew())
593                    .ok_or(ExecutionError::IllegalInstruction {
594                        address: state
595                            .instruction_fetcher
596                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
597                    })?;
598                zve64x_load_helpers::validate_segment_registers(state, vd, vm, group_regs, nf)?;
599                let stride = state.regs.read(rs2).as_u64().cast_signed();
600                // SAFETY:
601                // - alignment and nf-group bounds: `validate_segment_registers` verified `vd %
602                //   group_regs == 0` and `vd + nf * group_regs <= 32`
603                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is EMUL for this `eew`
604                //   and `vtype`
605                // - mask overlap: `validate_segment_registers` checked `vd.bits() != 0` when
606                //   `vm=false`
607                unsafe {
608                    zve64x_load_helpers::execute_strided_load(
609                        state,
610                        vd,
611                        vm,
612                        state.ext_state.vl(),
613                        u32::from(state.ext_state.vstart()),
614                        state.regs.read(rs1).as_u64(),
615                        stride,
616                        eew,
617                        group_regs,
618                        nf,
619                    )?;
620                }
621            }
622
623            // Indexed-unordered segment load
624            Self::Vluxseg {
625                vd,
626                rs1,
627                vs2,
628                vm,
629                eew: index_eew,
630                nf,
631            } => {
632                if !state.ext_state.vector_instructions_allowed() {
633                    Err(ExecutionError::IllegalInstruction {
634                        address: state
635                            .instruction_fetcher
636                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
637                    })?;
638                }
639                let vtype = state
640                    .ext_state
641                    .vtype()
642                    .ok_or(ExecutionError::IllegalInstruction {
643                        address: state
644                            .instruction_fetcher
645                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
646                    })?;
647                let data_group_regs = vtype.vlmul().register_count();
648                let index_group_regs = vtype
649                    .vlmul()
650                    .index_register_count(index_eew, vtype.vsew())
651                    .ok_or(ExecutionError::IllegalInstruction {
652                        address: state
653                            .instruction_fetcher
654                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
655                    })?;
656                // `validate_segment_registers` is called before the per-field overlap loop so
657                // that `vd.bits() + f * data_group_regs < 32` is established for all `f < nf`,
658                // which is required by the `VReg::from_bits` call inside the loop.
659                zve64x_load_helpers::validate_segment_registers(
660                    state,
661                    vd,
662                    vm,
663                    data_group_regs,
664                    nf,
665                )?;
666                zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
667                for f in 0..nf {
668                    // SAFETY: `vd.bits() + f * data_group_regs < 32` because
669                    // `validate_segment_registers` established `vd.bits() + nf * data_group_regs
670                    // <= 32` and `f < nf`. The value is in [0, 31], so it is a valid `VReg`
671                    // encoding.
672                    let field_vd = unsafe {
673                        VReg::from_bits(vd.bits() + f * data_group_regs).unwrap_unchecked()
674                    };
675                    if zve64x_load_helpers::groups_overlap(
676                        field_vd,
677                        data_group_regs,
678                        vs2,
679                        index_group_regs,
680                    ) {
681                        Err(ExecutionError::IllegalInstruction {
682                            address: state
683                                .instruction_fetcher
684                                .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
685                        })?;
686                    }
687                }
688                // SAFETY:
689                // - data alignment/nf-group bounds: `validate_segment_registers` verified `vd %
690                //   data_group_regs == 0` and `vd + nf * data_group_regs <= 32`
691                // - index alignment/bounds: `check_register_group_alignment` verified `vs2 %
692                //   EMUL_index == 0` and `vs2 + EMUL_index <= 32`
693                // - no field/index group overlap: verified by the loop above
694                // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: data EEW = SEW and
695                //   `data_group_regs = LMUL`, so VLMAX = LMUL * VLEN / SEW bounds `vl`
696                // - `vl <= EMUL_index * VLENB / index_eew.bytes()`: `index_group_regs` (EMUL_index)
697                //   is defined so this VLMAX_index equals the architectural VLMAX
698                // - mask overlap: `validate_segment_registers` checked `vd.bits() != 0` when
699                //   `vm=false`, and no field group starts at 0 since groups are contiguous from
700                //   `vd` which is nonzero
701                unsafe {
702                    zve64x_load_helpers::execute_indexed_load(
703                        state,
704                        vd,
705                        vs2,
706                        vm,
707                        state.ext_state.vl(),
708                        u32::from(state.ext_state.vstart()),
709                        state.regs.read(rs1).as_u64(),
710                        vtype.vsew().as_eew(),
711                        index_eew,
712                        data_group_regs,
713                        nf,
714                    )?;
715                }
716            }
717
718            // Indexed-ordered segment load: functionally identical to `Vluxseg` for a software
719            // interpreter
720            Self::Vloxseg {
721                vd,
722                rs1,
723                vs2,
724                vm,
725                eew: index_eew,
726                nf,
727            } => {
728                if !state.ext_state.vector_instructions_allowed() {
729                    Err(ExecutionError::IllegalInstruction {
730                        address: state
731                            .instruction_fetcher
732                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
733                    })?;
734                }
735                let vtype = state
736                    .ext_state
737                    .vtype()
738                    .ok_or(ExecutionError::IllegalInstruction {
739                        address: state
740                            .instruction_fetcher
741                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
742                    })?;
743                let data_group_regs = vtype.vlmul().register_count();
744                let index_group_regs = vtype
745                    .vlmul()
746                    .index_register_count(index_eew, vtype.vsew())
747                    .ok_or(ExecutionError::IllegalInstruction {
748                        address: state
749                            .instruction_fetcher
750                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
751                    })?;
752                zve64x_load_helpers::validate_segment_registers(
753                    state,
754                    vd,
755                    vm,
756                    data_group_regs,
757                    nf,
758                )?;
759                zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
760                for f in 0..nf {
761                    // SAFETY: `vd.bits() + f * data_group_regs < 32` because
762                    // `validate_segment_registers` established `vd.bits() + nf * data_group_regs
763                    // <= 32` and `f < nf`. The value is in [0, 31], so it is a valid `VReg`
764                    // encoding.
765                    let field_vd = unsafe {
766                        VReg::from_bits(vd.bits() + f * data_group_regs).unwrap_unchecked()
767                    };
768                    if zve64x_load_helpers::groups_overlap(
769                        field_vd,
770                        data_group_regs,
771                        vs2,
772                        index_group_regs,
773                    ) {
774                        Err(ExecutionError::IllegalInstruction {
775                            address: state
776                                .instruction_fetcher
777                                .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
778                        })?;
779                    }
780                }
781                // SAFETY: preconditions identical to `Vluxseg`; see that arm for the full
782                // argument
783                unsafe {
784                    zve64x_load_helpers::execute_indexed_load(
785                        state,
786                        vd,
787                        vs2,
788                        vm,
789                        state.ext_state.vl(),
790                        u32::from(state.ext_state.vstart()),
791                        state.regs.read(rs1).as_u64(),
792                        vtype.vsew().as_eew(),
793                        index_eew,
794                        data_group_regs,
795                        nf,
796                    )?;
797                }
798            }
799        }
800
801        Ok(ControlFlow::Continue(()))
802    }
803}