ab_riscv_interpreter/v/zvexx/load.rs
1//! ZveXx vector load instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zvexx_load_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zvexx::zvexx_helpers;
9use crate::{
10 ExecutableInstruction, ExecutableInstructionCsr, ExecutableInstructionOperands, ExecutionError,
11 ProgramCounter, RegisterFile, Rs1Rs2OperandValues, Rs1Rs2Operands, VirtualMemory,
12};
13use ab_riscv_macros::instruction_execution;
14use ab_riscv_primitives::prelude::*;
15use core::fmt;
16use core::ops::ControlFlow;
17
18#[instruction_execution]
19impl<Reg> ExecutableInstructionOperands for ZveXxLoadInstruction<Reg> where Reg: Register {}
20
21#[instruction_execution]
22impl<Reg, ExtState, CustomError> ExecutableInstructionCsr<ExtState, CustomError>
23 for ZveXxLoadInstruction<Reg>
24where
25 Reg: Register,
26{
27}
28
29#[instruction_execution]
30impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
31 ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
32 for ZveXxLoadInstruction<Reg>
33where
34 Reg: Register,
35 Regs: RegisterFile<Reg>,
36 ExtState: VectorRegistersExt<Reg, CustomError>,
37 [(); ExtState::ELEN as usize]:,
38 [(); ExtState::VLEN as usize]:,
39 [(); ExtState::VLENB as usize]:,
40 Memory: VirtualMemory,
41 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
42 CustomError: fmt::Debug,
43{
44 #[inline(always)]
45 fn execute(
46 self,
47 Rs1Rs2OperandValues {
48 rs1_value,
49 rs2_value,
50 }: Rs1Rs2OperandValues<<Self::Reg as Register>::Type>,
51 _regs: &mut Regs,
52 ext_state: &mut ExtState,
53 memory: &mut Memory,
54 program_counter: &mut PC,
55 _system_instruction_handler: &mut InstructionHandler,
56 ) -> Result<
57 ControlFlow<(), (Self::Reg, <Self::Reg as Register>::Type)>,
58 ExecutionError<Reg::Type, CustomError>,
59 > {
60 match self {
61 // Whole-register load: loads `nreg` consecutive registers starting at `vd` directly
62 // from memory. `vd` must be aligned to `nreg`. Ignores vtype, vl, vstart, masking.
63 Self::Vlr {
64 vd,
65 rs1: _,
66 nreg,
67 eew: _,
68 } => {
69 let nreg = nreg.num_registers();
70 if !ext_state.vector_instructions_allowed() {
71 ::core::hint::cold_path();
72 return Err(ExecutionError::IllegalInstruction {
73 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
74 });
75 }
76 if vd.to_bits() % nreg != 0 {
77 ::core::hint::cold_path();
78 return Err(ExecutionError::IllegalInstruction {
79 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
80 });
81 }
82 let base = rs1_value.as_u64();
83 let vlenb = u64::from(ExtState::VLENB);
84 for reg_off in 0..nreg {
85 // SAFETY: the decoder guarantees nreg in {1,2,4,8} and vd is nreg-aligned
86 // (checked above), so vd.to_bits() + nreg - 1 <= 31.
87 let reg = unsafe { VReg::from_bits(vd.to_bits() + reg_off).unwrap_unchecked() };
88 let bytes = memory
89 .read_slice(base + u64::from(reg_off) * vlenb, ExtState::VLENB)
90 .inspect_err(|_error| {
91 if reg_off > 0 {
92 ext_state.mark_vs_dirty();
93 ext_state.reset_vstart();
94 }
95 })?;
96 ext_state.write_vregs().get_mut(reg).copy_from_slice(bytes);
97 }
98 ext_state.mark_vs_dirty();
99 ext_state.reset_vstart();
100 }
101
102 // Mask load: loads ceil(vl / 8) bytes from base into vd with no masking applied.
103 // Does not require a valid vtype: when vill is set vl is 0, so zero bytes are read.
104 Self::Vlm { vd, rs1: _ } => {
105 if !ext_state.vector_instructions_allowed() {
106 ::core::hint::cold_path();
107 return Err(ExecutionError::IllegalInstruction {
108 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
109 });
110 }
111 let vl = ext_state.vl();
112 let byte_count = vl.div_ceil(u8::BITS);
113 if byte_count > 0 {
114 let base = rs1_value.as_u64();
115 let bytes = memory.read_slice(base, byte_count)?;
116 // SAFETY: `bytes.len() == byte_count = vl.div_ceil(8) <= VLEN / 8 = VLENB`
117 // because `vl <= VLMAX <= VLEN`, so `..bytes.len()` is in bounds within the
118 // `VLENB`-byte destination register.
119 unsafe {
120 ext_state
121 .write_vregs()
122 .get_mut(vd)
123 .get_unchecked_mut(..bytes.len())
124 .copy_from_slice(bytes);
125 }
126 }
127 ext_state.mark_vs_dirty();
128 ext_state.reset_vstart();
129 }
130
131 // Unit-stride load.
132 //
133 // Destination EMUL = EEW/SEW * LMUL, computed via `index_register_count`. This
134 // gives `group_regs` such that `VLMAX = group_regs * VLENB / eew.bytes()` matches
135 // the architectural `vl`.
136 Self::Vle {
137 vd,
138 rs1: _,
139 vm,
140 eew,
141 } => {
142 if !ext_state.vector_instructions_allowed() {
143 ::core::hint::cold_path();
144 return Err(ExecutionError::IllegalInstruction {
145 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
146 });
147 }
148 let Some(vtype) = ext_state.vtype() else {
149 ::core::hint::cold_path();
150 return Err(ExecutionError::IllegalInstruction {
151 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
152 });
153 };
154 let group_regs = vtype
155 .vlmul()
156 .index_register_count(eew, vtype.vsew())
157 .ok_or(ExecutionError::IllegalInstruction {
158 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
159 })?;
160 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
161 program_counter,
162 vd,
163 group_regs,
164 )?;
165 if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
166 ::core::hint::cold_path();
167 return Err(ExecutionError::IllegalInstruction {
168 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
169 });
170 }
171 // SAFETY:
172 // - alignment: `check_register_group_alignment` verified `vd % group_regs == 0` and
173 // `vd + group_regs <= 32`, satisfying both the alignment and nf=1 bounds
174 // preconditions
175 // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL computed for
176 // this `eew` and `vtype`, so this VLMAX equals the architectural VLMAX that
177 // bounds `vl`
178 // - mask overlap: checked above via `groups_overlap`
179 unsafe {
180 zvexx_load_helpers::execute_unit_stride_load::<false, _, _, _, _>(
181 ext_state,
182 memory,
183 vd,
184 vm,
185 rs1_value.as_u64(),
186 eew,
187 group_regs,
188 Nf::N1,
189 )?;
190 }
191 }
192
193 // Fault-only-first unit-stride load. Preconditions identical to `Vle`.
194 Self::Vleff {
195 vd,
196 rs1: _,
197 vm,
198 eew,
199 } => {
200 if !ext_state.vector_instructions_allowed() {
201 ::core::hint::cold_path();
202 return Err(ExecutionError::IllegalInstruction {
203 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
204 });
205 }
206 let Some(vtype) = ext_state.vtype() else {
207 ::core::hint::cold_path();
208 return Err(ExecutionError::IllegalInstruction {
209 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
210 });
211 };
212 let group_regs = vtype
213 .vlmul()
214 .index_register_count(eew, vtype.vsew())
215 .ok_or(ExecutionError::IllegalInstruction {
216 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
217 })?;
218 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
219 program_counter,
220 vd,
221 group_regs,
222 )?;
223 if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
224 ::core::hint::cold_path();
225 return Err(ExecutionError::IllegalInstruction {
226 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
227 });
228 }
229 // SAFETY: preconditions identical to `Vle`; see that arm for the full argument.
230 unsafe {
231 zvexx_load_helpers::execute_unit_stride_load::<true, _, _, _, _>(
232 ext_state,
233 memory,
234 vd,
235 vm,
236 rs1_value.as_u64(),
237 eew,
238 group_regs,
239 Nf::N1,
240 )?;
241 }
242 }
243
244 // Strided load. Destination EMUL = EEW/SEW * LMUL as for unit-stride.
245 Self::Vlse {
246 vd,
247 rs1: _,
248 rs2: _,
249 vm,
250 eew,
251 } => {
252 if !ext_state.vector_instructions_allowed() {
253 ::core::hint::cold_path();
254 return Err(ExecutionError::IllegalInstruction {
255 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
256 });
257 }
258 let Some(vtype) = ext_state.vtype() else {
259 ::core::hint::cold_path();
260 return Err(ExecutionError::IllegalInstruction {
261 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
262 });
263 };
264 let group_regs = vtype
265 .vlmul()
266 .index_register_count(eew, vtype.vsew())
267 .ok_or(ExecutionError::IllegalInstruction {
268 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
269 })?;
270 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
271 program_counter,
272 vd,
273 group_regs,
274 )?;
275 if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
276 ::core::hint::cold_path();
277 return Err(ExecutionError::IllegalInstruction {
278 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
279 });
280 }
281 // rs2 holds a signed stride; reinterpret the register value as signed
282 let stride = rs2_value.as_i64();
283 // SAFETY:
284 // - alignment and nf=1 bounds: `check_register_group_alignment` verified `vd %
285 // group_regs == 0` and `vd + group_regs <= 32`
286 // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL for this
287 // `eew` and `vtype`, so this VLMAX equals the architectural VLMAX bounding `vl`
288 // - mask overlap: checked above via `groups_overlap`
289 unsafe {
290 zvexx_load_helpers::execute_strided_load(
291 ext_state,
292 memory,
293 vd,
294 vm,
295 rs1_value.as_u64(),
296 stride,
297 eew,
298 group_regs,
299 Nf::N1,
300 )?;
301 }
302 }
303
304 // Indexed-unordered load: eew is the index EEW; data EEW comes from vtype.vsew().
305 // The data destination uses the base LMUL (data EEW = SEW for indexed loads).
306 Self::Vluxei {
307 vd,
308 rs1: _,
309 vs2,
310 vm,
311 eew: index_eew,
312 } => {
313 if !ext_state.vector_instructions_allowed() {
314 ::core::hint::cold_path();
315 return Err(ExecutionError::IllegalInstruction {
316 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
317 });
318 }
319 let Some(vtype) = ext_state.vtype() else {
320 ::core::hint::cold_path();
321 return Err(ExecutionError::IllegalInstruction {
322 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
323 });
324 };
325 let data_group_regs = vtype.vlmul().register_count();
326 let index_group_regs = vtype
327 .vlmul()
328 .index_register_count(index_eew, vtype.vsew())
329 .ok_or(ExecutionError::IllegalInstruction {
330 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
331 })?;
332 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
333 program_counter,
334 vd,
335 data_group_regs,
336 )?;
337 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
338 program_counter,
339 vs2,
340 index_group_regs,
341 )?;
342 // Non-segment indexed loads permit `vd`/`vs2` overlap under the general
343 // EEW-relative overlap rule (e.g. when the data and index EEW match); only
344 // disallowed overlaps are reserved.
345 if !zvexx_load_helpers::indexed_load_overlap_allowed(
346 vd,
347 data_group_regs,
348 vs2,
349 index_group_regs,
350 index_eew,
351 vtype.vsew(),
352 vtype.vlmul(),
353 ) {
354 ::core::hint::cold_path();
355 return Err(ExecutionError::IllegalInstruction {
356 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
357 });
358 }
359 if !vm && zvexx_load_helpers::groups_overlap(vd, data_group_regs, VReg::V0, 1) {
360 ::core::hint::cold_path();
361 return Err(ExecutionError::IllegalInstruction {
362 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
363 });
364 }
365 // SAFETY:
366 // - data alignment/nf=1 bounds: `check_register_group_alignment` on `vd`
367 // - index alignment/bounds: `check_register_group_alignment` on `vs2`
368 // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: data EEW = SEW and
369 // `data_group_regs = LMUL`, so VLMAX = LMUL * VLEN / SEW, which bounds `vl`
370 // - `vl <= index_group_regs * VLENB / index_eew.bytes()`: `index_group_regs` is
371 // EMUL_index defined so this VLMAX_index equals the architectural VLMAX
372 // - `vd`/`vs2` overlap (if any) satisfies the general EEW overlap rule, checked
373 // above; the in-order element loop reads index element `i` before writing data
374 // element `i`, and that rule guarantees a data write never clobbers an index
375 // element that has not yet been consumed
376 // - mask overlap: checked above via `groups_overlap`
377 unsafe {
378 zvexx_load_helpers::execute_indexed_load(
379 ext_state,
380 memory,
381 vd,
382 vs2,
383 vm,
384 rs1_value.as_u64(),
385 vtype.vsew().as_eew(),
386 index_eew,
387 data_group_regs,
388 Nf::N1,
389 )?;
390 }
391 }
392
393 // Indexed-ordered load: functionally identical to `Vluxei` for a software
394 // interpreter; memory access ordering has no observable effect here.
395 Self::Vloxei {
396 vd,
397 rs1: _,
398 vs2,
399 vm,
400 eew: index_eew,
401 } => {
402 if !ext_state.vector_instructions_allowed() {
403 ::core::hint::cold_path();
404 return Err(ExecutionError::IllegalInstruction {
405 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
406 });
407 }
408 let Some(vtype) = ext_state.vtype() else {
409 ::core::hint::cold_path();
410 return Err(ExecutionError::IllegalInstruction {
411 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
412 });
413 };
414 let data_group_regs = vtype.vlmul().register_count();
415 let index_group_regs = vtype
416 .vlmul()
417 .index_register_count(index_eew, vtype.vsew())
418 .ok_or(ExecutionError::IllegalInstruction {
419 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
420 })?;
421 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
422 program_counter,
423 vd,
424 data_group_regs,
425 )?;
426 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
427 program_counter,
428 vs2,
429 index_group_regs,
430 )?;
431 // Non-segment indexed loads permit `vd`/`vs2` overlap under the general
432 // EEW-relative overlap rule; see the `Vluxei` arm for details.
433 if !zvexx_load_helpers::indexed_load_overlap_allowed(
434 vd,
435 data_group_regs,
436 vs2,
437 index_group_regs,
438 index_eew,
439 vtype.vsew(),
440 vtype.vlmul(),
441 ) {
442 ::core::hint::cold_path();
443 return Err(ExecutionError::IllegalInstruction {
444 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
445 });
446 }
447 if !vm && zvexx_load_helpers::groups_overlap(vd, data_group_regs, VReg::V0, 1) {
448 ::core::hint::cold_path();
449 return Err(ExecutionError::IllegalInstruction {
450 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
451 });
452 }
453 // SAFETY: preconditions identical to `Vluxei`; see that arm for the full
454 // argument.
455 unsafe {
456 zvexx_load_helpers::execute_indexed_load(
457 ext_state,
458 memory,
459 vd,
460 vs2,
461 vm,
462 rs1_value.as_u64(),
463 vtype.vsew().as_eew(),
464 index_eew,
465 data_group_regs,
466 Nf::N1,
467 )?;
468 }
469 }
470
471 // Unit-stride segment load. EMUL = EEW/SEW * LMUL per field group.
472 Self::Vlseg {
473 vd,
474 rs1: _,
475 eew,
476 vm_nf,
477 } => {
478 let vm = vm_nf.vm();
479 let nf = vm_nf.nf();
480 if !ext_state.vector_instructions_allowed() {
481 ::core::hint::cold_path();
482 return Err(ExecutionError::IllegalInstruction {
483 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
484 });
485 }
486 let Some(vtype) = ext_state.vtype() else {
487 ::core::hint::cold_path();
488 return Err(ExecutionError::IllegalInstruction {
489 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
490 });
491 };
492 let group_regs = vtype
493 .vlmul()
494 .index_register_count(eew, vtype.vsew())
495 .ok_or(ExecutionError::IllegalInstruction {
496 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
497 })?;
498 zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
499 program_counter,
500 vd,
501 vm,
502 group_regs,
503 nf,
504 )?;
505 // SAFETY:
506 // - alignment and nf-group bounds: `validate_segment_registers` verified `vd %
507 // group_regs == 0` and `vd + nf * group_regs <= 32`
508 // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL for this
509 // `eew` and `vtype`, so this VLMAX equals the architectural VLMAX bounding `vl`
510 // - mask overlap with v0: `validate_segment_registers` checked `vd.to_bits() != 0`
511 // when `vm=false`, ensuring no field group contains v0
512 unsafe {
513 zvexx_load_helpers::execute_unit_stride_load::<false, _, _, _, _>(
514 ext_state,
515 memory,
516 vd,
517 vm,
518 rs1_value.as_u64(),
519 eew,
520 group_regs,
521 nf,
522 )?;
523 }
524 }
525
526 // Fault-only-first segment load. Preconditions identical to `Vlseg`.
527 Self::Vlsegff {
528 vd,
529 rs1: _,
530 eew,
531 vm_nf,
532 } => {
533 let vm = vm_nf.vm();
534 let nf = vm_nf.nf();
535 if !ext_state.vector_instructions_allowed() {
536 ::core::hint::cold_path();
537 return Err(ExecutionError::IllegalInstruction {
538 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
539 });
540 }
541 let Some(vtype) = ext_state.vtype() else {
542 ::core::hint::cold_path();
543 return Err(ExecutionError::IllegalInstruction {
544 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
545 });
546 };
547 let group_regs = vtype
548 .vlmul()
549 .index_register_count(eew, vtype.vsew())
550 .ok_or(ExecutionError::IllegalInstruction {
551 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
552 })?;
553 zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
554 program_counter,
555 vd,
556 vm,
557 group_regs,
558 nf,
559 )?;
560 // SAFETY: preconditions identical to `Vlseg`; see that arm for the full argument.
561 unsafe {
562 zvexx_load_helpers::execute_unit_stride_load::<true, _, _, _, _>(
563 ext_state,
564 memory,
565 vd,
566 vm,
567 rs1_value.as_u64(),
568 eew,
569 group_regs,
570 nf,
571 )?;
572 }
573 }
574
575 // Strided segment load. EMUL = EEW/SEW * LMUL as for `Vlse`.
576 Self::Vlsseg {
577 vd,
578 rs1: _,
579 rs2: _,
580 eew,
581 vm_nf,
582 } => {
583 let vm = vm_nf.vm();
584 let nf = vm_nf.nf();
585 if !ext_state.vector_instructions_allowed() {
586 ::core::hint::cold_path();
587 return Err(ExecutionError::IllegalInstruction {
588 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
589 });
590 }
591 let Some(vtype) = ext_state.vtype() else {
592 ::core::hint::cold_path();
593 return Err(ExecutionError::IllegalInstruction {
594 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
595 });
596 };
597 let group_regs = vtype
598 .vlmul()
599 .index_register_count(eew, vtype.vsew())
600 .ok_or(ExecutionError::IllegalInstruction {
601 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
602 })?;
603 zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
604 program_counter,
605 vd,
606 vm,
607 group_regs,
608 nf,
609 )?;
610 let stride = rs2_value.as_i64();
611 // SAFETY:
612 // - alignment and nf-group bounds: `validate_segment_registers` verified `vd %
613 // group_regs == 0` and `vd + nf * group_regs <= 32`
614 // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is EMUL for this `eew`
615 // and `vtype`
616 // - mask overlap: `validate_segment_registers` checked `vd.to_bits() != 0` when
617 // `vm=false`
618 unsafe {
619 zvexx_load_helpers::execute_strided_load(
620 ext_state,
621 memory,
622 vd,
623 vm,
624 rs1_value.as_u64(),
625 stride,
626 eew,
627 group_regs,
628 nf,
629 )?;
630 }
631 }
632
633 // Indexed-unordered segment load
634 Self::Vluxseg {
635 vd,
636 rs1: _,
637 vs2,
638 eew: index_eew,
639 vm_nf,
640 } => {
641 let vm = vm_nf.vm();
642 let nf = vm_nf.nf();
643 if !ext_state.vector_instructions_allowed() {
644 ::core::hint::cold_path();
645 return Err(ExecutionError::IllegalInstruction {
646 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
647 });
648 }
649 let Some(vtype) = ext_state.vtype() else {
650 ::core::hint::cold_path();
651 return Err(ExecutionError::IllegalInstruction {
652 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
653 });
654 };
655 let data_group_regs = vtype.vlmul().register_count();
656 let index_group_regs = vtype
657 .vlmul()
658 .index_register_count(index_eew, vtype.vsew())
659 .ok_or(ExecutionError::IllegalInstruction {
660 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
661 })?;
662 // `validate_segment_registers` is called before the per-field overlap loop so
663 // that `vd.to_bits() + f * data_group_regs < 32` is established for all `f < nf`,
664 // which is required by the `VReg::from_bits` call inside the loop.
665 zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
666 program_counter,
667 vd,
668 vm,
669 data_group_regs,
670 nf,
671 )?;
672 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
673 program_counter,
674 vs2,
675 index_group_regs,
676 )?;
677 for f in 0..nf.fields_per_segment() {
678 // SAFETY: `vd.to_bits() + f * data_group_regs < 32` because
679 // `validate_segment_registers` established `vd.to_bits() + nf * data_group_regs
680 // <= 32` and `f < nf`. The value is in [0, 31], so it is a valid `VReg`
681 // encoding.
682 let field_vd = unsafe {
683 VReg::from_bits(vd.to_bits() + f * data_group_regs).unwrap_unchecked()
684 };
685 if zvexx_load_helpers::groups_overlap(
686 field_vd,
687 data_group_regs,
688 vs2,
689 index_group_regs,
690 ) {
691 ::core::hint::cold_path();
692 return Err(ExecutionError::IllegalInstruction {
693 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
694 });
695 }
696 }
697 // SAFETY:
698 // - data alignment/nf-group bounds: `validate_segment_registers` verified `vd %
699 // data_group_regs == 0` and `vd + nf * data_group_regs <= 32`
700 // - index alignment/bounds: `check_register_group_alignment` verified `vs2 %
701 // EMUL_index == 0` and `vs2 + EMUL_index <= 32`
702 // - no field/index group overlap: verified by the loop above
703 // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: data EEW = SEW and
704 // `data_group_regs = LMUL`, so VLMAX = LMUL * VLEN / SEW bounds `vl`
705 // - `vl <= EMUL_index * VLENB / index_eew.bytes()`: `index_group_regs` (EMUL_index)
706 // is defined so this VLMAX_index equals the architectural VLMAX
707 // - mask overlap: `validate_segment_registers` checked `vd.to_bits() != 0` when
708 // `vm=false`, and no field group starts at 0 since groups are contiguous from
709 // `vd` which is nonzero
710 unsafe {
711 zvexx_load_helpers::execute_indexed_load(
712 ext_state,
713 memory,
714 vd,
715 vs2,
716 vm,
717 rs1_value.as_u64(),
718 vtype.vsew().as_eew(),
719 index_eew,
720 data_group_regs,
721 nf,
722 )?;
723 }
724 }
725
726 // Indexed-ordered segment load: functionally identical to `Vluxseg` for a software
727 // interpreter
728 Self::Vloxseg {
729 vd,
730 rs1: _,
731 vs2,
732 eew: index_eew,
733 vm_nf,
734 } => {
735 let vm = vm_nf.vm();
736 let nf = vm_nf.nf();
737 if !ext_state.vector_instructions_allowed() {
738 ::core::hint::cold_path();
739 return Err(ExecutionError::IllegalInstruction {
740 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
741 });
742 }
743 let Some(vtype) = ext_state.vtype() else {
744 ::core::hint::cold_path();
745 return Err(ExecutionError::IllegalInstruction {
746 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
747 });
748 };
749 let data_group_regs = vtype.vlmul().register_count();
750 let index_group_regs = vtype
751 .vlmul()
752 .index_register_count(index_eew, vtype.vsew())
753 .ok_or(ExecutionError::IllegalInstruction {
754 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
755 })?;
756 zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
757 program_counter,
758 vd,
759 vm,
760 data_group_regs,
761 nf,
762 )?;
763 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
764 program_counter,
765 vs2,
766 index_group_regs,
767 )?;
768 for f in 0..nf.fields_per_segment() {
769 // SAFETY: `vd.to_bits() + f * data_group_regs < 32` because
770 // `validate_segment_registers` established `vd.to_bits() + nf * data_group_regs
771 // <= 32` and `f < nf`. The value is in [0, 31], so it is a valid `VReg`
772 // encoding.
773 let field_vd = unsafe {
774 VReg::from_bits(vd.to_bits() + f * data_group_regs).unwrap_unchecked()
775 };
776 if zvexx_load_helpers::groups_overlap(
777 field_vd,
778 data_group_regs,
779 vs2,
780 index_group_regs,
781 ) {
782 ::core::hint::cold_path();
783 return Err(ExecutionError::IllegalInstruction {
784 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
785 });
786 }
787 }
788 // SAFETY: preconditions identical to `Vluxseg`; see that arm for the full
789 // argument
790 unsafe {
791 zvexx_load_helpers::execute_indexed_load(
792 ext_state,
793 memory,
794 vd,
795 vs2,
796 vm,
797 rs1_value.as_u64(),
798 vtype.vsew().as_eew(),
799 index_eew,
800 data_group_regs,
801 nf,
802 )?;
803 }
804 }
805 }
806
807 Ok(ControlFlow::Continue(Default::default()))
808 }
809}