ab_riscv_interpreter/v/zve64x/store.rs
1//! Zve64x vector store instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zve64x_store_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::load::zve64x_load_helpers;
9use crate::v::zve64x::zve64x_helpers;
10use crate::{
11 ExecutableInstruction, ExecutionError, InterpreterState, ProgramCounter, VirtualMemory,
12};
13use ab_riscv_macros::instruction_execution;
14use ab_riscv_primitives::instructions::v::zve64x::store::Zve64xStoreInstruction;
15use ab_riscv_primitives::registers::general_purpose::{RegType, Register};
16use ab_riscv_primitives::registers::vector::VReg;
17use core::fmt;
18use core::ops::ControlFlow;
19
20#[instruction_execution]
21impl<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>
22 ExecutableInstruction<
23 InterpreterState<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>,
24 CustomError,
25 > for Zve64xStoreInstruction<Reg>
26where
27 Reg: Register,
28 [(); Reg::N]:,
29 ExtState: VectorRegistersExt<Reg, CustomError>,
30 [(); ExtState::ELEN as usize]:,
31 [(); ExtState::VLEN as usize]:,
32 [(); ExtState::VLENB as usize]:,
33 Memory: VirtualMemory,
34 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
35 CustomError: fmt::Debug,
36{
37 #[inline(always)]
38 fn execute(
39 self,
40 state: &mut InterpreterState<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>,
41 ) -> Result<ControlFlow<()>, ExecutionError<Reg::Type, CustomError>> {
42 match self {
43 // Whole-register store: stores `nreg` consecutive registers starting at `vs3` directly
44 // to memory. `vs3` must be aligned to `nreg`. Ignores vtype, vl, vstart, masking.
45 Self::Vsr { vs3, rs1, nreg } => {
46 if !state.ext_state.vector_instructions_allowed() {
47 Err(ExecutionError::IllegalInstruction {
48 address: state
49 .instruction_fetcher
50 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
51 })?;
52 }
53 if u32::from(vs3.bits()) % u32::from(nreg) != 0 {
54 Err(ExecutionError::IllegalInstruction {
55 address: state
56 .instruction_fetcher
57 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
58 })?;
59 }
60 let base = state.regs.read(rs1).as_u64();
61 let vlenb = u64::from(ExtState::VLENB);
62 for reg_off in 0..u64::from(nreg) {
63 let reg_idx = u64::from(vs3.bits()) + reg_off;
64 // SAFETY: `reg_idx < 32` because the decoder guarantees `nreg` in {1,2,4,8}
65 // and `vs3` is `nreg`-aligned (checked above), so
66 // `vs3.bits() + nreg - 1 <= 31`.
67 let src =
68 unsafe { state.ext_state.read_vreg().get_unchecked(reg_idx as usize) };
69 state
70 .memory
71 .write_slice(base + reg_off * vlenb, src)
72 .map_err(ExecutionError::MemoryAccess)?;
73 }
74 state.ext_state.reset_vstart();
75 }
76 // Mask store: stores `ceil(vl / 8)` bytes from `vs3` to memory with no masking.
77 // Does not require a valid vtype: when vill is set vl is 0, so zero bytes are written.
78 Self::Vsm { vs3, rs1 } => {
79 if !state.ext_state.vector_instructions_allowed() {
80 Err(ExecutionError::IllegalInstruction {
81 address: state
82 .instruction_fetcher
83 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
84 })?;
85 }
86 let vl = state.ext_state.vl();
87 let byte_count = vl.div_ceil(u8::BITS) as usize;
88 if byte_count > 0 {
89 let base = state.regs.read(rs1).as_u64();
90 // SAFETY: `vs3.bits() < 32` is guaranteed by `VReg`.
91 // `byte_count = vl.div_ceil(8) <= VLEN / 8 = VLENB` because `vl <= VLMAX <=
92 // VLEN`, so `..byte_count` is in bounds within the
93 // `VLENB`-byte source register.
94 let src = unsafe {
95 state
96 .ext_state
97 .read_vreg()
98 .get_unchecked(usize::from(vs3.bits()))
99 .get_unchecked(..byte_count)
100 };
101 state
102 .memory
103 .write_slice(base, src)
104 .map_err(ExecutionError::MemoryAccess)?;
105 }
106 state.ext_state.reset_vstart();
107 }
108 // Unit-stride store.
109 //
110 // Source EMUL = EEW/SEW * LMUL, computed via `index_register_count`. This gives
111 // `group_regs` such that `VLMAX = group_regs * VLENB / eew.bytes()` matches the
112 // architectural `vl`.
113 Self::Vse { vs3, rs1, vm, eew } => {
114 if !state.ext_state.vector_instructions_allowed() {
115 Err(ExecutionError::IllegalInstruction {
116 address: state
117 .instruction_fetcher
118 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
119 })?;
120 }
121 let vtype = state
122 .ext_state
123 .vtype()
124 .ok_or(ExecutionError::IllegalInstruction {
125 address: state
126 .instruction_fetcher
127 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
128 })?;
129 let group_regs = vtype
130 .vlmul()
131 .index_register_count(eew, vtype.vsew())
132 .ok_or(ExecutionError::IllegalInstruction {
133 address: state
134 .instruction_fetcher
135 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
136 })?;
137 zve64x_load_helpers::check_register_group_alignment(state, vs3, group_regs)?;
138 if !vm && zve64x_load_helpers::groups_overlap(vs3, group_regs, VReg::V0, 1) {
139 Err(ExecutionError::IllegalInstruction {
140 address: state
141 .instruction_fetcher
142 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
143 })?;
144 }
145 // SAFETY:
146 // - alignment: `check_register_group_alignment` verified `vs3 % group_regs == 0`
147 // and `vs3 + group_regs <= 32`
148 // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL computed for
149 // this `eew` and `vtype`, so this VLMAX equals the architectural VLMAX that
150 // bounds `vl`
151 // - mask overlap: checked above via `groups_overlap`
152 unsafe {
153 zve64x_store_helpers::execute_unit_stride_store(
154 state,
155 vs3,
156 vm,
157 state.ext_state.vl(),
158 state.ext_state.vstart(),
159 state.regs.read(rs1).as_u64(),
160 eew,
161 group_regs,
162 1,
163 )?;
164 }
165 }
166 // Strided store
167 Self::Vsse {
168 vs3,
169 rs1,
170 rs2,
171 vm,
172 eew,
173 } => {
174 if !state.ext_state.vector_instructions_allowed() {
175 Err(ExecutionError::IllegalInstruction {
176 address: state
177 .instruction_fetcher
178 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
179 })?;
180 }
181 let vtype = state
182 .ext_state
183 .vtype()
184 .ok_or(ExecutionError::IllegalInstruction {
185 address: state
186 .instruction_fetcher
187 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
188 })?;
189 let group_regs = vtype
190 .vlmul()
191 .index_register_count(eew, vtype.vsew())
192 .ok_or(ExecutionError::IllegalInstruction {
193 address: state
194 .instruction_fetcher
195 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
196 })?;
197 zve64x_load_helpers::check_register_group_alignment(state, vs3, group_regs)?;
198 if !vm && zve64x_load_helpers::groups_overlap(vs3, group_regs, VReg::V0, 1) {
199 Err(ExecutionError::IllegalInstruction {
200 address: state
201 .instruction_fetcher
202 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
203 })?;
204 }
205 let stride = state.regs.read(rs2).as_u64().cast_signed();
206 // SAFETY: same preconditions as `Vse`.
207 unsafe {
208 zve64x_store_helpers::execute_strided_store(
209 state,
210 vs3,
211 vm,
212 state.ext_state.vl(),
213 state.ext_state.vstart(),
214 state.regs.read(rs1).as_u64(),
215 stride,
216 eew,
217 group_regs,
218 1,
219 )?;
220 }
221 }
222 // Indexed-unordered store. Ordering between elements is not guaranteed.
223 Self::Vsuxei {
224 vs3,
225 rs1,
226 vs2,
227 vm,
228 eew: index_eew,
229 } => {
230 if !state.ext_state.vector_instructions_allowed() {
231 Err(ExecutionError::IllegalInstruction {
232 address: state
233 .instruction_fetcher
234 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
235 })?;
236 }
237 let vtype = state
238 .ext_state
239 .vtype()
240 .ok_or(ExecutionError::IllegalInstruction {
241 address: state
242 .instruction_fetcher
243 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
244 })?;
245 let data_eew = vtype.vsew().as_eew();
246 let data_group_regs = vtype.vlmul().register_count();
247 let index_group_regs = vtype
248 .vlmul()
249 .index_register_count(index_eew, vtype.vsew())
250 .ok_or(ExecutionError::IllegalInstruction {
251 address: state
252 .instruction_fetcher
253 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
254 })?;
255 zve64x_load_helpers::check_register_group_alignment(state, vs3, data_group_regs)?;
256 zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
257 if !vm && zve64x_load_helpers::groups_overlap(vs3, data_group_regs, VReg::V0, 1) {
258 Err(ExecutionError::IllegalInstruction {
259 address: state
260 .instruction_fetcher
261 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
262 })?;
263 }
264 // SAFETY:
265 // - `vs3` alignment/bounds: `check_register_group_alignment` verified both
266 // - `vs2` alignment/bounds: `check_register_group_alignment` verified both
267 // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: `data_group_regs` is the
268 // EMUL that bounds `vl`
269 // - `vl <= index_group_regs * VLENB / index_eew.bytes()`: `index_register_count`
270 // returns the EMUL for the index group, which by the same argument bounds `vl`
271 // - mask overlap: checked above
272 unsafe {
273 zve64x_store_helpers::execute_indexed_store(
274 state,
275 vs3,
276 vs2,
277 vm,
278 state.ext_state.vl(),
279 u32::from(state.ext_state.vstart()),
280 state.regs.read(rs1).as_u64(),
281 data_eew,
282 index_eew,
283 data_group_regs,
284 1,
285 )?;
286 }
287 }
288 // Indexed-ordered store. Elements must be written in element order.
289 // The ordering constraint is visible only to other harts/devices; the implementation
290 // here is already sequential, so no additional logic is needed.
291 Self::Vsoxei {
292 vs3,
293 rs1,
294 vs2,
295 vm,
296 eew: index_eew,
297 } => {
298 if !state.ext_state.vector_instructions_allowed() {
299 Err(ExecutionError::IllegalInstruction {
300 address: state
301 .instruction_fetcher
302 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
303 })?;
304 }
305 let vtype = state
306 .ext_state
307 .vtype()
308 .ok_or(ExecutionError::IllegalInstruction {
309 address: state
310 .instruction_fetcher
311 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
312 })?;
313 let data_eew = vtype.vsew().as_eew();
314 let data_group_regs = vtype.vlmul().register_count();
315 let index_group_regs = vtype
316 .vlmul()
317 .index_register_count(index_eew, vtype.vsew())
318 .ok_or(ExecutionError::IllegalInstruction {
319 address: state
320 .instruction_fetcher
321 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
322 })?;
323 zve64x_load_helpers::check_register_group_alignment(state, vs3, data_group_regs)?;
324 zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
325 if !vm && zve64x_load_helpers::groups_overlap(vs3, data_group_regs, VReg::V0, 1) {
326 Err(ExecutionError::IllegalInstruction {
327 address: state
328 .instruction_fetcher
329 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
330 })?;
331 }
332 // SAFETY: identical precondition argument to `Vsuxei`
333 unsafe {
334 zve64x_store_helpers::execute_indexed_store(
335 state,
336 vs3,
337 vs2,
338 vm,
339 state.ext_state.vl(),
340 u32::from(state.ext_state.vstart()),
341 state.regs.read(rs1).as_u64(),
342 data_eew,
343 index_eew,
344 data_group_regs,
345 1,
346 )?;
347 }
348 }
349 // Unit-stride segment store: `nf` fields per element, stored contiguously
350 Self::Vsseg {
351 vs3,
352 rs1,
353 vm,
354 eew,
355 nf,
356 } => {
357 if !state.ext_state.vector_instructions_allowed() {
358 Err(ExecutionError::IllegalInstruction {
359 address: state
360 .instruction_fetcher
361 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
362 })?;
363 }
364 let vtype = state
365 .ext_state
366 .vtype()
367 .ok_or(ExecutionError::IllegalInstruction {
368 address: state
369 .instruction_fetcher
370 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
371 })?;
372 let group_regs = vtype
373 .vlmul()
374 .index_register_count(eew, vtype.vsew())
375 .ok_or(ExecutionError::IllegalInstruction {
376 address: state
377 .instruction_fetcher
378 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
379 })?;
380 zve64x_load_helpers::validate_segment_registers(state, vs3, vm, group_regs, nf)?;
381 // SAFETY:
382 // - `validate_segment_registers` guarantees `vs3 % group_regs == 0` and `vs3 + nf *
383 // group_regs <= 32`
384 // - when `vm=false`, `validate_segment_registers` ensures `vs3 != 0`, so `vs3` does
385 // not overlap `v0`
386 // - `vl <= group_regs * VLENB / eew.bytes()`: same EMUL argument as `Vse`
387 unsafe {
388 zve64x_store_helpers::execute_unit_stride_store(
389 state,
390 vs3,
391 vm,
392 state.ext_state.vl(),
393 state.ext_state.vstart(),
394 state.regs.read(rs1).as_u64(),
395 eew,
396 group_regs,
397 nf,
398 )?;
399 }
400 }
401 // Strided segment store
402 Self::Vssseg {
403 vs3,
404 rs1,
405 rs2,
406 vm,
407 eew,
408 nf,
409 } => {
410 if !state.ext_state.vector_instructions_allowed() {
411 Err(ExecutionError::IllegalInstruction {
412 address: state
413 .instruction_fetcher
414 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
415 })?;
416 }
417 let vtype = state
418 .ext_state
419 .vtype()
420 .ok_or(ExecutionError::IllegalInstruction {
421 address: state
422 .instruction_fetcher
423 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
424 })?;
425 let group_regs = vtype
426 .vlmul()
427 .index_register_count(eew, vtype.vsew())
428 .ok_or(ExecutionError::IllegalInstruction {
429 address: state
430 .instruction_fetcher
431 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
432 })?;
433 zve64x_load_helpers::validate_segment_registers(state, vs3, vm, group_regs, nf)?;
434 let stride = state.regs.read(rs2).as_u64().cast_signed();
435 // SAFETY: same as `Vsseg`; `validate_segment_registers` covers alignment/bounds.
436 unsafe {
437 zve64x_store_helpers::execute_strided_store(
438 state,
439 vs3,
440 vm,
441 state.ext_state.vl(),
442 state.ext_state.vstart(),
443 state.regs.read(rs1).as_u64(),
444 stride,
445 eew,
446 group_regs,
447 nf,
448 )?;
449 }
450 }
451 // Indexed-unordered segment store
452 Self::Vsuxseg {
453 vs3,
454 rs1,
455 vs2,
456 vm,
457 eew: index_eew,
458 nf,
459 } => {
460 if !state.ext_state.vector_instructions_allowed() {
461 Err(ExecutionError::IllegalInstruction {
462 address: state
463 .instruction_fetcher
464 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
465 })?;
466 }
467 let vtype = state
468 .ext_state
469 .vtype()
470 .ok_or(ExecutionError::IllegalInstruction {
471 address: state
472 .instruction_fetcher
473 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
474 })?;
475 let data_eew = vtype.vsew().as_eew();
476 let data_group_regs = vtype.vlmul().register_count();
477 let index_group_regs = vtype
478 .vlmul()
479 .index_register_count(index_eew, vtype.vsew())
480 .ok_or(ExecutionError::IllegalInstruction {
481 address: state
482 .instruction_fetcher
483 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
484 })?;
485 zve64x_load_helpers::validate_segment_registers(
486 state,
487 vs3,
488 vm,
489 data_group_regs,
490 nf,
491 )?;
492 zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
493 // SAFETY:
494 // - `validate_segment_registers` covers `vs3` alignment/bounds/mask-overlap
495 // - `check_register_group_alignment` covers `vs2` alignment/bounds
496 // - `vl` bounded by both EMUL groups as in `Vsuxei`
497 unsafe {
498 zve64x_store_helpers::execute_indexed_store(
499 state,
500 vs3,
501 vs2,
502 vm,
503 state.ext_state.vl(),
504 u32::from(state.ext_state.vstart()),
505 state.regs.read(rs1).as_u64(),
506 data_eew,
507 index_eew,
508 data_group_regs,
509 nf,
510 )?;
511 }
512 }
513 // Indexed-ordered segment store. Sequential iteration satisfies the ordering
514 // requirement.
515 Self::Vsoxseg {
516 vs3,
517 rs1,
518 vs2,
519 vm,
520 eew: index_eew,
521 nf,
522 } => {
523 if !state.ext_state.vector_instructions_allowed() {
524 Err(ExecutionError::IllegalInstruction {
525 address: state
526 .instruction_fetcher
527 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
528 })?;
529 }
530 let vtype = state
531 .ext_state
532 .vtype()
533 .ok_or(ExecutionError::IllegalInstruction {
534 address: state
535 .instruction_fetcher
536 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
537 })?;
538 let data_eew = vtype.vsew().as_eew();
539 let data_group_regs = vtype.vlmul().register_count();
540 let index_group_regs = vtype
541 .vlmul()
542 .index_register_count(index_eew, vtype.vsew())
543 .ok_or(ExecutionError::IllegalInstruction {
544 address: state
545 .instruction_fetcher
546 .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
547 })?;
548 zve64x_load_helpers::validate_segment_registers(
549 state,
550 vs3,
551 vm,
552 data_group_regs,
553 nf,
554 )?;
555 zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
556 // SAFETY: identical precondition argument to `Vsuxseg`
557 unsafe {
558 zve64x_store_helpers::execute_indexed_store(
559 state,
560 vs3,
561 vs2,
562 vm,
563 state.ext_state.vl(),
564 u32::from(state.ext_state.vstart()),
565 state.regs.read(rs1).as_u64(),
566 data_eew,
567 index_eew,
568 data_group_regs,
569 nf,
570 )?;
571 }
572 }
573 }
574
575 Ok(ControlFlow::Continue(()))
576 }
577}