ab_riscv_interpreter/v/zvexx/load.rs
1//! ZveXx vector load instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zvexx_load_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zvexx::zvexx_helpers;
9use crate::{
10 ExecutableInstruction, ExecutableInstructionCsr, ExecutableInstructionOperands, ExecutionError,
11 ProgramCounter, RegisterFile, Rs1Rs2OperandValues, Rs1Rs2Operands, VirtualMemory,
12};
13use ab_riscv_macros::instruction_execution;
14use ab_riscv_primitives::prelude::*;
15use core::fmt;
16use core::ops::ControlFlow;
17
18#[instruction_execution]
19impl<Reg> ExecutableInstructionOperands for ZveXxLoadInstruction<Reg> where Reg: Register {}
20
21#[instruction_execution]
22impl<Reg, ExtState, CustomError> ExecutableInstructionCsr<ExtState, CustomError>
23 for ZveXxLoadInstruction<Reg>
24where
25 Reg: Register,
26{
27}
28
29#[instruction_execution]
30impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
31 ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
32 for ZveXxLoadInstruction<Reg>
33where
34 Reg: Register,
35 Regs: RegisterFile<Reg>,
36 ExtState: VectorRegistersExt<Reg, CustomError>,
37 [(); ExtState::ELEN as usize]:,
38 [(); ExtState::VLEN as usize]:,
39 [(); ExtState::VLENB as usize]:,
40 Memory: VirtualMemory,
41 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
42 CustomError: fmt::Debug,
43{
44 #[inline(always)]
45 fn execute(
46 self,
47 Rs1Rs2OperandValues {
48 rs1_value,
49 rs2_value,
50 }: Rs1Rs2OperandValues<<Self::Reg as Register>::Type>,
51 _regs: &mut Regs,
52 ext_state: &mut ExtState,
53 memory: &mut Memory,
54 program_counter: &mut PC,
55 _system_instruction_handler: &mut InstructionHandler,
56 ) -> Result<
57 ControlFlow<(), (Self::Reg, <Self::Reg as Register>::Type)>,
58 ExecutionError<Reg::Type, CustomError>,
59 > {
60 match self {
61 // Whole-register load: loads `nreg` consecutive registers starting at `vd` directly
62 // from memory. `vd` must be aligned to `nreg`. Ignores vtype, vl, vstart, masking.
63 Self::Vlr {
64 vd,
65 rs1: _,
66 nreg,
67 eew: _,
68 } => {
69 let nreg = nreg.num_registers();
70 if !ext_state.vector_instructions_allowed() {
71 return Err(ExecutionError::IllegalInstruction {
72 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
73 });
74 }
75 if vd.to_bits() % nreg != 0 {
76 return Err(ExecutionError::IllegalInstruction {
77 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
78 });
79 }
80 let base = rs1_value.as_u64();
81 let vlenb = u64::from(ExtState::VLENB);
82 for reg_off in 0..nreg {
83 // SAFETY: the decoder guarantees nreg in {1,2,4,8} and vd is nreg-aligned
84 // (checked above), so vd.to_bits() + nreg - 1 <= 31.
85 let reg = unsafe { VReg::from_bits(vd.to_bits() + reg_off).unwrap_unchecked() };
86 let bytes = memory
87 .read_slice(base + u64::from(reg_off) * vlenb, ExtState::VLENB)
88 .inspect_err(|_error| {
89 if reg_off > 0 {
90 ext_state.mark_vs_dirty();
91 ext_state.reset_vstart();
92 }
93 })?;
94 ext_state.write_vregs().get_mut(reg).copy_from_slice(bytes);
95 }
96 ext_state.mark_vs_dirty();
97 ext_state.reset_vstart();
98 }
99
100 // Mask load: loads ceil(vl / 8) bytes from base into vd with no masking applied.
101 // Does not require a valid vtype: when vill is set vl is 0, so zero bytes are read.
102 Self::Vlm { vd, rs1: _ } => {
103 if !ext_state.vector_instructions_allowed() {
104 return Err(ExecutionError::IllegalInstruction {
105 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
106 });
107 }
108 let vl = ext_state.vl();
109 let byte_count = vl.div_ceil(u8::BITS);
110 if byte_count > 0 {
111 let base = rs1_value.as_u64();
112 let bytes = memory.read_slice(base, byte_count)?;
113 // SAFETY: `bytes.len() == byte_count = vl.div_ceil(8) <= VLEN / 8 = VLENB`
114 // because `vl <= VLMAX <= VLEN`, so `..bytes.len()` is in bounds within the
115 // `VLENB`-byte destination register.
116 unsafe {
117 ext_state
118 .write_vregs()
119 .get_mut(vd)
120 .get_unchecked_mut(..bytes.len())
121 .copy_from_slice(bytes);
122 }
123 }
124 ext_state.mark_vs_dirty();
125 ext_state.reset_vstart();
126 }
127
128 // Unit-stride load.
129 //
130 // Destination EMUL = EEW/SEW * LMUL, computed via `index_register_count`. This
131 // gives `group_regs` such that `VLMAX = group_regs * VLENB / eew.bytes()` matches
132 // the architectural `vl`.
133 Self::Vle {
134 vd,
135 rs1: _,
136 vm,
137 eew,
138 } => {
139 if !ext_state.vector_instructions_allowed() {
140 return Err(ExecutionError::IllegalInstruction {
141 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
142 });
143 }
144 let vtype = ext_state
145 .vtype()
146 .ok_or(ExecutionError::IllegalInstruction {
147 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
148 })?;
149 let group_regs = vtype
150 .vlmul()
151 .index_register_count(eew, vtype.vsew())
152 .ok_or(ExecutionError::IllegalInstruction {
153 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
154 })?;
155 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
156 program_counter,
157 vd,
158 group_regs,
159 )?;
160 if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
161 return Err(ExecutionError::IllegalInstruction {
162 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
163 });
164 }
165 // SAFETY:
166 // - alignment: `check_register_group_alignment` verified `vd % group_regs == 0` and
167 // `vd + group_regs <= 32`, satisfying both the alignment and nf=1 bounds
168 // preconditions
169 // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL computed for
170 // this `eew` and `vtype`, so this VLMAX equals the architectural VLMAX that
171 // bounds `vl`
172 // - mask overlap: checked above via `groups_overlap`
173 unsafe {
174 zvexx_load_helpers::execute_unit_stride_load(
175 ext_state,
176 memory,
177 vd,
178 vm,
179 rs1_value.as_u64(),
180 eew,
181 group_regs,
182 Nf::N1,
183 false,
184 )?;
185 }
186 }
187
188 // Fault-only-first unit-stride load. Preconditions identical to `Vle`.
189 Self::Vleff {
190 vd,
191 rs1: _,
192 vm,
193 eew,
194 } => {
195 if !ext_state.vector_instructions_allowed() {
196 return Err(ExecutionError::IllegalInstruction {
197 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
198 });
199 }
200 let vtype = ext_state
201 .vtype()
202 .ok_or(ExecutionError::IllegalInstruction {
203 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
204 })?;
205 let group_regs = vtype
206 .vlmul()
207 .index_register_count(eew, vtype.vsew())
208 .ok_or(ExecutionError::IllegalInstruction {
209 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
210 })?;
211 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
212 program_counter,
213 vd,
214 group_regs,
215 )?;
216 if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
217 return Err(ExecutionError::IllegalInstruction {
218 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
219 });
220 }
221 // SAFETY: preconditions identical to `Vle`; see that arm for the full argument.
222 unsafe {
223 zvexx_load_helpers::execute_unit_stride_load(
224 ext_state,
225 memory,
226 vd,
227 vm,
228 rs1_value.as_u64(),
229 eew,
230 group_regs,
231 Nf::N1,
232 true,
233 )?;
234 }
235 }
236
237 // Strided load. Destination EMUL = EEW/SEW * LMUL as for unit-stride.
238 Self::Vlse {
239 vd,
240 rs1: _,
241 rs2: _,
242 vm,
243 eew,
244 } => {
245 if !ext_state.vector_instructions_allowed() {
246 return Err(ExecutionError::IllegalInstruction {
247 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
248 });
249 }
250 let vtype = ext_state
251 .vtype()
252 .ok_or(ExecutionError::IllegalInstruction {
253 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
254 })?;
255 let group_regs = vtype
256 .vlmul()
257 .index_register_count(eew, vtype.vsew())
258 .ok_or(ExecutionError::IllegalInstruction {
259 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
260 })?;
261 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
262 program_counter,
263 vd,
264 group_regs,
265 )?;
266 if !vm && zvexx_load_helpers::groups_overlap(vd, group_regs, VReg::V0, 1) {
267 return Err(ExecutionError::IllegalInstruction {
268 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
269 });
270 }
271 // rs2 holds a signed stride; reinterpret the register value as signed
272 let stride = rs2_value.as_i64();
273 // SAFETY:
274 // - alignment and nf=1 bounds: `check_register_group_alignment` verified `vd %
275 // group_regs == 0` and `vd + group_regs <= 32`
276 // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL for this
277 // `eew` and `vtype`, so this VLMAX equals the architectural VLMAX bounding `vl`
278 // - mask overlap: checked above via `groups_overlap`
279 unsafe {
280 zvexx_load_helpers::execute_strided_load(
281 ext_state,
282 memory,
283 vd,
284 vm,
285 rs1_value.as_u64(),
286 stride,
287 eew,
288 group_regs,
289 Nf::N1,
290 )?;
291 }
292 }
293
294 // Indexed-unordered load: eew is the index EEW; data EEW comes from vtype.vsew().
295 // The data destination uses the base LMUL (data EEW = SEW for indexed loads).
296 Self::Vluxei {
297 vd,
298 rs1: _,
299 vs2,
300 vm,
301 eew: index_eew,
302 } => {
303 if !ext_state.vector_instructions_allowed() {
304 return Err(ExecutionError::IllegalInstruction {
305 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
306 });
307 }
308 let vtype = ext_state
309 .vtype()
310 .ok_or(ExecutionError::IllegalInstruction {
311 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
312 })?;
313 let data_group_regs = vtype.vlmul().register_count();
314 let index_group_regs = vtype
315 .vlmul()
316 .index_register_count(index_eew, vtype.vsew())
317 .ok_or(ExecutionError::IllegalInstruction {
318 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
319 })?;
320 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
321 program_counter,
322 vd,
323 data_group_regs,
324 )?;
325 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
326 program_counter,
327 vs2,
328 index_group_regs,
329 )?;
330 // Non-segment indexed loads permit `vd`/`vs2` overlap under the general
331 // EEW-relative overlap rule (e.g. when the data and index EEW match); only
332 // disallowed overlaps are reserved.
333 if !zvexx_load_helpers::indexed_load_overlap_allowed(
334 vd,
335 data_group_regs,
336 vs2,
337 index_group_regs,
338 index_eew,
339 vtype.vsew(),
340 vtype.vlmul(),
341 ) {
342 return Err(ExecutionError::IllegalInstruction {
343 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
344 });
345 }
346 if !vm && zvexx_load_helpers::groups_overlap(vd, data_group_regs, VReg::V0, 1) {
347 return Err(ExecutionError::IllegalInstruction {
348 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
349 });
350 }
351 // SAFETY:
352 // - data alignment/nf=1 bounds: `check_register_group_alignment` on `vd`
353 // - index alignment/bounds: `check_register_group_alignment` on `vs2`
354 // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: data EEW = SEW and
355 // `data_group_regs = LMUL`, so VLMAX = LMUL * VLEN / SEW, which bounds `vl`
356 // - `vl <= index_group_regs * VLENB / index_eew.bytes()`: `index_group_regs` is
357 // EMUL_index defined so this VLMAX_index equals the architectural VLMAX
358 // - `vd`/`vs2` overlap (if any) satisfies the general EEW overlap rule, checked
359 // above; the in-order element loop reads index element `i` before writing data
360 // element `i`, and that rule guarantees a data write never clobbers an index
361 // element that has not yet been consumed
362 // - mask overlap: checked above via `groups_overlap`
363 unsafe {
364 zvexx_load_helpers::execute_indexed_load(
365 ext_state,
366 memory,
367 vd,
368 vs2,
369 vm,
370 rs1_value.as_u64(),
371 vtype.vsew().as_eew(),
372 index_eew,
373 data_group_regs,
374 Nf::N1,
375 )?;
376 }
377 }
378
379 // Indexed-ordered load: functionally identical to `Vluxei` for a software
380 // interpreter; memory access ordering has no observable effect here.
381 Self::Vloxei {
382 vd,
383 rs1: _,
384 vs2,
385 vm,
386 eew: index_eew,
387 } => {
388 if !ext_state.vector_instructions_allowed() {
389 return Err(ExecutionError::IllegalInstruction {
390 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
391 });
392 }
393 let vtype = ext_state
394 .vtype()
395 .ok_or(ExecutionError::IllegalInstruction {
396 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
397 })?;
398 let data_group_regs = vtype.vlmul().register_count();
399 let index_group_regs = vtype
400 .vlmul()
401 .index_register_count(index_eew, vtype.vsew())
402 .ok_or(ExecutionError::IllegalInstruction {
403 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
404 })?;
405 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
406 program_counter,
407 vd,
408 data_group_regs,
409 )?;
410 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
411 program_counter,
412 vs2,
413 index_group_regs,
414 )?;
415 // Non-segment indexed loads permit `vd`/`vs2` overlap under the general
416 // EEW-relative overlap rule; see the `Vluxei` arm for details.
417 if !zvexx_load_helpers::indexed_load_overlap_allowed(
418 vd,
419 data_group_regs,
420 vs2,
421 index_group_regs,
422 index_eew,
423 vtype.vsew(),
424 vtype.vlmul(),
425 ) {
426 return Err(ExecutionError::IllegalInstruction {
427 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
428 });
429 }
430 if !vm && zvexx_load_helpers::groups_overlap(vd, data_group_regs, VReg::V0, 1) {
431 return Err(ExecutionError::IllegalInstruction {
432 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
433 });
434 }
435 // SAFETY: preconditions identical to `Vluxei`; see that arm for the full
436 // argument.
437 unsafe {
438 zvexx_load_helpers::execute_indexed_load(
439 ext_state,
440 memory,
441 vd,
442 vs2,
443 vm,
444 rs1_value.as_u64(),
445 vtype.vsew().as_eew(),
446 index_eew,
447 data_group_regs,
448 Nf::N1,
449 )?;
450 }
451 }
452
453 // Unit-stride segment load. EMUL = EEW/SEW * LMUL per field group.
454 Self::Vlseg {
455 vd,
456 rs1: _,
457 eew,
458 vm_nf,
459 } => {
460 let vm = vm_nf.vm();
461 let nf = vm_nf.nf();
462 if !ext_state.vector_instructions_allowed() {
463 return Err(ExecutionError::IllegalInstruction {
464 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
465 });
466 }
467 let vtype = ext_state
468 .vtype()
469 .ok_or(ExecutionError::IllegalInstruction {
470 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
471 })?;
472 let group_regs = vtype
473 .vlmul()
474 .index_register_count(eew, vtype.vsew())
475 .ok_or(ExecutionError::IllegalInstruction {
476 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
477 })?;
478 zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
479 program_counter,
480 vd,
481 vm,
482 group_regs,
483 nf,
484 )?;
485 // SAFETY:
486 // - alignment and nf-group bounds: `validate_segment_registers` verified `vd %
487 // group_regs == 0` and `vd + nf * group_regs <= 32`
488 // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL for this
489 // `eew` and `vtype`, so this VLMAX equals the architectural VLMAX bounding `vl`
490 // - mask overlap with v0: `validate_segment_registers` checked `vd.to_bits() != 0`
491 // when `vm=false`, ensuring no field group contains v0
492 unsafe {
493 zvexx_load_helpers::execute_unit_stride_load(
494 ext_state,
495 memory,
496 vd,
497 vm,
498 rs1_value.as_u64(),
499 eew,
500 group_regs,
501 nf,
502 false,
503 )?;
504 }
505 }
506
507 // Fault-only-first segment load. Preconditions identical to `Vlseg`.
508 Self::Vlsegff {
509 vd,
510 rs1: _,
511 eew,
512 vm_nf,
513 } => {
514 let vm = vm_nf.vm();
515 let nf = vm_nf.nf();
516 if !ext_state.vector_instructions_allowed() {
517 return Err(ExecutionError::IllegalInstruction {
518 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
519 });
520 }
521 let vtype = ext_state
522 .vtype()
523 .ok_or(ExecutionError::IllegalInstruction {
524 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
525 })?;
526 let group_regs = vtype
527 .vlmul()
528 .index_register_count(eew, vtype.vsew())
529 .ok_or(ExecutionError::IllegalInstruction {
530 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
531 })?;
532 zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
533 program_counter,
534 vd,
535 vm,
536 group_regs,
537 nf,
538 )?;
539 // SAFETY: preconditions identical to `Vlseg`; see that arm for the full argument.
540 unsafe {
541 zvexx_load_helpers::execute_unit_stride_load(
542 ext_state,
543 memory,
544 vd,
545 vm,
546 rs1_value.as_u64(),
547 eew,
548 group_regs,
549 nf,
550 true,
551 )?;
552 }
553 }
554
555 // Strided segment load. EMUL = EEW/SEW * LMUL as for `Vlse`.
556 Self::Vlsseg {
557 vd,
558 rs1: _,
559 rs2: _,
560 eew,
561 vm_nf,
562 } => {
563 let vm = vm_nf.vm();
564 let nf = vm_nf.nf();
565 if !ext_state.vector_instructions_allowed() {
566 return Err(ExecutionError::IllegalInstruction {
567 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
568 });
569 }
570 let vtype = ext_state
571 .vtype()
572 .ok_or(ExecutionError::IllegalInstruction {
573 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
574 })?;
575 let group_regs = vtype
576 .vlmul()
577 .index_register_count(eew, vtype.vsew())
578 .ok_or(ExecutionError::IllegalInstruction {
579 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
580 })?;
581 zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
582 program_counter,
583 vd,
584 vm,
585 group_regs,
586 nf,
587 )?;
588 let stride = rs2_value.as_i64();
589 // SAFETY:
590 // - alignment and nf-group bounds: `validate_segment_registers` verified `vd %
591 // group_regs == 0` and `vd + nf * group_regs <= 32`
592 // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is EMUL for this `eew`
593 // and `vtype`
594 // - mask overlap: `validate_segment_registers` checked `vd.to_bits() != 0` when
595 // `vm=false`
596 unsafe {
597 zvexx_load_helpers::execute_strided_load(
598 ext_state,
599 memory,
600 vd,
601 vm,
602 rs1_value.as_u64(),
603 stride,
604 eew,
605 group_regs,
606 nf,
607 )?;
608 }
609 }
610
611 // Indexed-unordered segment load
612 Self::Vluxseg {
613 vd,
614 rs1: _,
615 vs2,
616 eew: index_eew,
617 vm_nf,
618 } => {
619 let vm = vm_nf.vm();
620 let nf = vm_nf.nf();
621 if !ext_state.vector_instructions_allowed() {
622 return Err(ExecutionError::IllegalInstruction {
623 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
624 });
625 }
626 let vtype = ext_state
627 .vtype()
628 .ok_or(ExecutionError::IllegalInstruction {
629 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
630 })?;
631 let data_group_regs = vtype.vlmul().register_count();
632 let index_group_regs = vtype
633 .vlmul()
634 .index_register_count(index_eew, vtype.vsew())
635 .ok_or(ExecutionError::IllegalInstruction {
636 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
637 })?;
638 // `validate_segment_registers` is called before the per-field overlap loop so
639 // that `vd.to_bits() + f * data_group_regs < 32` is established for all `f < nf`,
640 // which is required by the `VReg::from_bits` call inside the loop.
641 zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
642 program_counter,
643 vd,
644 vm,
645 data_group_regs,
646 nf,
647 )?;
648 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
649 program_counter,
650 vs2,
651 index_group_regs,
652 )?;
653 for f in 0..nf.fields_per_segment() {
654 // SAFETY: `vd.to_bits() + f * data_group_regs < 32` because
655 // `validate_segment_registers` established `vd.to_bits() + nf * data_group_regs
656 // <= 32` and `f < nf`. The value is in [0, 31], so it is a valid `VReg`
657 // encoding.
658 let field_vd = unsafe {
659 VReg::from_bits(vd.to_bits() + f * data_group_regs).unwrap_unchecked()
660 };
661 if zvexx_load_helpers::groups_overlap(
662 field_vd,
663 data_group_regs,
664 vs2,
665 index_group_regs,
666 ) {
667 return Err(ExecutionError::IllegalInstruction {
668 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
669 });
670 }
671 }
672 // SAFETY:
673 // - data alignment/nf-group bounds: `validate_segment_registers` verified `vd %
674 // data_group_regs == 0` and `vd + nf * data_group_regs <= 32`
675 // - index alignment/bounds: `check_register_group_alignment` verified `vs2 %
676 // EMUL_index == 0` and `vs2 + EMUL_index <= 32`
677 // - no field/index group overlap: verified by the loop above
678 // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: data EEW = SEW and
679 // `data_group_regs = LMUL`, so VLMAX = LMUL * VLEN / SEW bounds `vl`
680 // - `vl <= EMUL_index * VLENB / index_eew.bytes()`: `index_group_regs` (EMUL_index)
681 // is defined so this VLMAX_index equals the architectural VLMAX
682 // - mask overlap: `validate_segment_registers` checked `vd.to_bits() != 0` when
683 // `vm=false`, and no field group starts at 0 since groups are contiguous from
684 // `vd` which is nonzero
685 unsafe {
686 zvexx_load_helpers::execute_indexed_load(
687 ext_state,
688 memory,
689 vd,
690 vs2,
691 vm,
692 rs1_value.as_u64(),
693 vtype.vsew().as_eew(),
694 index_eew,
695 data_group_regs,
696 nf,
697 )?;
698 }
699 }
700
701 // Indexed-ordered segment load: functionally identical to `Vluxseg` for a software
702 // interpreter
703 Self::Vloxseg {
704 vd,
705 rs1: _,
706 vs2,
707 eew: index_eew,
708 vm_nf,
709 } => {
710 let vm = vm_nf.vm();
711 let nf = vm_nf.nf();
712 if !ext_state.vector_instructions_allowed() {
713 return Err(ExecutionError::IllegalInstruction {
714 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
715 });
716 }
717 let vtype = ext_state
718 .vtype()
719 .ok_or(ExecutionError::IllegalInstruction {
720 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
721 })?;
722 let data_group_regs = vtype.vlmul().register_count();
723 let index_group_regs = vtype
724 .vlmul()
725 .index_register_count(index_eew, vtype.vsew())
726 .ok_or(ExecutionError::IllegalInstruction {
727 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
728 })?;
729 zvexx_load_helpers::validate_segment_registers::<Reg, _, _, _>(
730 program_counter,
731 vd,
732 vm,
733 data_group_regs,
734 nf,
735 )?;
736 zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
737 program_counter,
738 vs2,
739 index_group_regs,
740 )?;
741 for f in 0..nf.fields_per_segment() {
742 // SAFETY: `vd.to_bits() + f * data_group_regs < 32` because
743 // `validate_segment_registers` established `vd.to_bits() + nf * data_group_regs
744 // <= 32` and `f < nf`. The value is in [0, 31], so it is a valid `VReg`
745 // encoding.
746 let field_vd = unsafe {
747 VReg::from_bits(vd.to_bits() + f * data_group_regs).unwrap_unchecked()
748 };
749 if zvexx_load_helpers::groups_overlap(
750 field_vd,
751 data_group_regs,
752 vs2,
753 index_group_regs,
754 ) {
755 return Err(ExecutionError::IllegalInstruction {
756 address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
757 });
758 }
759 }
760 // SAFETY: preconditions identical to `Vluxseg`; see that arm for the full
761 // argument
762 unsafe {
763 zvexx_load_helpers::execute_indexed_load(
764 ext_state,
765 memory,
766 vd,
767 vs2,
768 vm,
769 rs1_value.as_u64(),
770 vtype.vsew().as_eew(),
771 index_eew,
772 data_group_regs,
773 nf,
774 )?;
775 }
776 }
777 }
778
779 Ok(ControlFlow::Continue(Default::default()))
780 }
781}