ab_riscv_interpreter/v/zve64x/store.rs
1//! Zve64x vector store instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zve64x_store_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::load::zve64x_load_helpers;
9use crate::v::zve64x::zve64x_helpers;
10use crate::{ExecutableInstruction, ExecutionError, ProgramCounter, RegisterFile, VirtualMemory};
11use ab_riscv_macros::instruction_execution;
12use ab_riscv_primitives::prelude::*;
13use core::fmt;
14use core::ops::ControlFlow;
15
16#[instruction_execution]
17impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
18 ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
19 for Zve64xStoreInstruction<Reg>
20where
21 Reg: Register,
22 Regs: RegisterFile<Reg>,
23 ExtState: VectorRegistersExt<Reg, CustomError>,
24 [(); ExtState::ELEN as usize]:,
25 [(); ExtState::VLEN as usize]:,
26 [(); ExtState::VLENB as usize]:,
27 Memory: VirtualMemory,
28 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
29 CustomError: fmt::Debug,
30{
31 #[inline(always)]
32 fn execute(
33 self,
34 regs: &mut Regs,
35 ext_state: &mut ExtState,
36 memory: &mut Memory,
37 program_counter: &mut PC,
38 _system_instruction_handler: &mut InstructionHandler,
39 ) -> Result<ControlFlow<()>, ExecutionError<Reg::Type, CustomError>> {
40 match self {
41 // Whole-register store: stores `nreg` consecutive registers starting at `vs3` directly
42 // to memory as a flat byte array of `EVL = nreg * VLENB` bytes. `vs3` must be aligned
43 // to `nreg`. Ignores vtype, vl, masking. Honors `vstart` in byte units: the first
44 // `vstart` bytes are skipped. If `vstart >= EVL`, the instruction is a no-op.
45 Self::Vsr { vs3, rs1, nreg } => {
46 if !ext_state.vector_instructions_allowed() {
47 Err(ExecutionError::IllegalInstruction {
48 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
49 })?;
50 }
51 if u32::from(vs3.bits()) % u32::from(nreg) != 0 {
52 Err(ExecutionError::IllegalInstruction {
53 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
54 })?;
55 }
56 let vlenb = u64::from(ExtState::VLENB);
57 let evl = u64::from(nreg) * vlenb;
58 let vstart = u64::from(ext_state.vstart());
59 if vstart < evl {
60 let base = regs.read(rs1).as_u64();
61 let mut byte_off = vstart;
62 while byte_off < evl {
63 let reg_off = byte_off / vlenb;
64 let in_reg = (byte_off % vlenb) as usize;
65 let reg_idx = (u64::from(vs3.bits()) + reg_off) as usize;
66 // SAFETY: `reg_idx < 32` because the decoder guarantees `nreg` in
67 // {1,2,4,8} and `vs3` is `nreg`-aligned (checked above), so
68 // `vs3.bits() + nreg - 1 <= 31`. `in_reg < VLENB` by construction.
69 let src = unsafe {
70 ext_state
71 .read_vreg()
72 .get_unchecked(reg_idx)
73 .get_unchecked(in_reg..)
74 };
75 if let Err(error) = memory.write_slice(base + byte_off, src) {
76 ext_state.set_vstart(byte_off as u16);
77 return Err(ExecutionError::MemoryAccess(error));
78 }
79 byte_off += src.len() as u64;
80 }
81 }
82 ext_state.reset_vstart();
83 }
84 // Mask store: stores `ceil(vl / 8)` bytes from `vs3` to memory with no masking.
85 // Does not require a valid vtype: when vill is set vl is 0, so zero bytes are written.
86 // Honors `vstart` at byte granularity: the first `vstart / 8` bytes are skipped.
87 Self::Vsm { vs3, rs1 } => {
88 if !ext_state.vector_instructions_allowed() {
89 Err(ExecutionError::IllegalInstruction {
90 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
91 })?;
92 }
93 let vl = ext_state.vl();
94 let evl_bytes = vl.div_ceil(u8::BITS);
95 let start_byte = u32::from(ext_state.vstart());
96 if start_byte < evl_bytes {
97 let base = regs.read(rs1).as_u64();
98 // SAFETY: `vs3.bits() < 32` is guaranteed by `VReg`.
99 // `evl_bytes = vl.div_ceil(8) <= VLEN / 8 = VLENB` because `vl <= VLMAX <=
100 // VLEN`, so the slice `start_byte..evl_bytes` is in bounds of the
101 // `VLENB`-byte source register.
102 let src = unsafe {
103 ext_state
104 .read_vreg()
105 .get_unchecked(usize::from(vs3.bits()))
106 .get_unchecked(start_byte as usize..evl_bytes as usize)
107 };
108 memory
109 .write_slice(base + u64::from(start_byte), src)
110 .map_err(ExecutionError::MemoryAccess)?;
111 }
112 ext_state.reset_vstart();
113 }
114 // Unit-stride store.
115 //
116 // Source EMUL = EEW/SEW * LMUL, computed via `data_register_count`. This gives
117 // `group_regs` such that `VLMAX = group_regs * VLENB / eew.bytes()` matches the
118 // architectural `vl`.
119 Self::Vse { vs3, rs1, vm, eew } => {
120 if !ext_state.vector_instructions_allowed() {
121 Err(ExecutionError::IllegalInstruction {
122 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
123 })?;
124 }
125 let vtype = ext_state
126 .vtype()
127 .ok_or(ExecutionError::IllegalInstruction {
128 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
129 })?;
130 let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
131 ExecutionError::IllegalInstruction {
132 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
133 },
134 )?;
135 zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
136 program_counter,
137 vs3,
138 group_regs,
139 )?;
140 // SAFETY:
141 // - alignment: `check_register_group_alignment` verified `vs3 % group_regs == 0`
142 // and `vs3 + group_regs <= 32`
143 // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL computed for
144 // this `eew` and `vtype`, so this VLMAX equals the architectural VLMAX that
145 // bounds `vl`
146 // - vs3/v0 overlap: stores read vs3 as a source; the spec does not restrict
147 // source/v0 overlap
148 unsafe {
149 zve64x_store_helpers::execute_unit_stride_store(
150 ext_state,
151 memory,
152 vs3,
153 vm,
154 ext_state.vl(),
155 ext_state.vstart(),
156 regs.read(rs1).as_u64(),
157 eew,
158 group_regs,
159 1,
160 )?;
161 }
162 }
163 // Strided store
164 Self::Vsse {
165 vs3,
166 rs1,
167 rs2,
168 vm,
169 eew,
170 } => {
171 if !ext_state.vector_instructions_allowed() {
172 Err(ExecutionError::IllegalInstruction {
173 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
174 })?;
175 }
176 let vtype = ext_state
177 .vtype()
178 .ok_or(ExecutionError::IllegalInstruction {
179 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
180 })?;
181 let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
182 ExecutionError::IllegalInstruction {
183 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
184 },
185 )?;
186 zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
187 program_counter,
188 vs3,
189 group_regs,
190 )?;
191 let stride = regs.read(rs2).as_u64().cast_signed();
192 // SAFETY: same preconditions as `Vse`.
193 unsafe {
194 zve64x_store_helpers::execute_strided_store(
195 ext_state,
196 memory,
197 vs3,
198 vm,
199 ext_state.vl(),
200 ext_state.vstart(),
201 regs.read(rs1).as_u64(),
202 stride,
203 eew,
204 group_regs,
205 1,
206 )?;
207 }
208 }
209 // Indexed-unordered store. Ordering between elements is not guaranteed.
210 Self::Vsuxei {
211 vs3,
212 rs1,
213 vs2,
214 vm,
215 eew: index_eew,
216 } => {
217 if !ext_state.vector_instructions_allowed() {
218 Err(ExecutionError::IllegalInstruction {
219 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
220 })?;
221 }
222 let vtype = ext_state
223 .vtype()
224 .ok_or(ExecutionError::IllegalInstruction {
225 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
226 })?;
227 let data_eew = vtype.vsew().as_eew();
228 let data_group_regs = vtype.vlmul().register_count();
229 let index_group_regs = vtype
230 .vlmul()
231 .index_register_count(index_eew, vtype.vsew())
232 .ok_or(ExecutionError::IllegalInstruction {
233 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
234 })?;
235 zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
236 program_counter,
237 vs3,
238 data_group_regs,
239 )?;
240 zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
241 program_counter,
242 vs2,
243 index_group_regs,
244 )?;
245 // SAFETY:
246 // - `vs3` alignment/bounds: `check_register_group_alignment` verified both
247 // - `vs2` alignment/bounds: `check_register_group_alignment` verified both
248 // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: `data_group_regs` is the
249 // EMUL that bounds `vl`
250 // - `vl <= index_group_regs * VLENB / index_eew.bytes()`: `index_register_count`
251 // returns the EMUL for the index group, which by the same argument bounds `vl`
252 // - vs3/v0 overlap: stores read vs3 as a source; no restriction
253 unsafe {
254 zve64x_store_helpers::execute_indexed_store(
255 ext_state,
256 memory,
257 vs3,
258 vs2,
259 vm,
260 ext_state.vl(),
261 u32::from(ext_state.vstart()),
262 regs.read(rs1).as_u64(),
263 data_eew,
264 index_eew,
265 data_group_regs,
266 1,
267 )?;
268 }
269 }
270 // Indexed-ordered store. Elements must be written in element order.
271 // The ordering constraint is visible only to other harts/devices; the implementation
272 // here is already sequential, so no additional logic is needed.
273 Self::Vsoxei {
274 vs3,
275 rs1,
276 vs2,
277 vm,
278 eew: index_eew,
279 } => {
280 if !ext_state.vector_instructions_allowed() {
281 Err(ExecutionError::IllegalInstruction {
282 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
283 })?;
284 }
285 let vtype = ext_state
286 .vtype()
287 .ok_or(ExecutionError::IllegalInstruction {
288 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
289 })?;
290 let data_eew = vtype.vsew().as_eew();
291 let data_group_regs = vtype.vlmul().register_count();
292 let index_group_regs = vtype
293 .vlmul()
294 .index_register_count(index_eew, vtype.vsew())
295 .ok_or(ExecutionError::IllegalInstruction {
296 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
297 })?;
298 zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
299 program_counter,
300 vs3,
301 data_group_regs,
302 )?;
303 zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
304 program_counter,
305 vs2,
306 index_group_regs,
307 )?;
308 // SAFETY: identical precondition argument to `Vsuxei`
309 unsafe {
310 zve64x_store_helpers::execute_indexed_store(
311 ext_state,
312 memory,
313 vs3,
314 vs2,
315 vm,
316 ext_state.vl(),
317 u32::from(ext_state.vstart()),
318 regs.read(rs1).as_u64(),
319 data_eew,
320 index_eew,
321 data_group_regs,
322 1,
323 )?;
324 }
325 }
326 // Unit-stride segment store: `nf` fields per element, stored contiguously
327 Self::Vsseg {
328 vs3,
329 rs1,
330 vm,
331 eew,
332 nf,
333 } => {
334 if !ext_state.vector_instructions_allowed() {
335 Err(ExecutionError::IllegalInstruction {
336 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
337 })?;
338 }
339 let vtype = ext_state
340 .vtype()
341 .ok_or(ExecutionError::IllegalInstruction {
342 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
343 })?;
344 let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
345 ExecutionError::IllegalInstruction {
346 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
347 },
348 )?;
349 zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
350 program_counter,
351 vs3,
352 group_regs,
353 nf,
354 )?;
355 // SAFETY:
356 // - `validate_segment_store_registers` guarantees `vs3 % group_regs == 0` and `vs3
357 // + nf * group_regs <= 32`
358 // - `vl <= group_regs * VLENB / eew.bytes()`: same EMUL argument as `Vse`
359 // - vs3/v0 overlap: stores read vs3 as a source; no restriction
360 unsafe {
361 zve64x_store_helpers::execute_unit_stride_store(
362 ext_state,
363 memory,
364 vs3,
365 vm,
366 ext_state.vl(),
367 ext_state.vstart(),
368 regs.read(rs1).as_u64(),
369 eew,
370 group_regs,
371 nf,
372 )?;
373 }
374 }
375 // Strided segment store
376 Self::Vssseg {
377 vs3,
378 rs1,
379 rs2,
380 vm,
381 eew,
382 nf,
383 } => {
384 if !ext_state.vector_instructions_allowed() {
385 Err(ExecutionError::IllegalInstruction {
386 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
387 })?;
388 }
389 let vtype = ext_state
390 .vtype()
391 .ok_or(ExecutionError::IllegalInstruction {
392 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
393 })?;
394 let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
395 ExecutionError::IllegalInstruction {
396 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
397 },
398 )?;
399 zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
400 program_counter,
401 vs3,
402 group_regs,
403 nf,
404 )?;
405 let stride = regs.read(rs2).as_u64().cast_signed();
406 // SAFETY: same as `Vsseg`.
407 unsafe {
408 zve64x_store_helpers::execute_strided_store(
409 ext_state,
410 memory,
411 vs3,
412 vm,
413 ext_state.vl(),
414 ext_state.vstart(),
415 regs.read(rs1).as_u64(),
416 stride,
417 eew,
418 group_regs,
419 nf,
420 )?;
421 }
422 }
423 // Indexed-unordered segment store
424 Self::Vsuxseg {
425 vs3,
426 rs1,
427 vs2,
428 vm,
429 eew: index_eew,
430 nf,
431 } => {
432 if !ext_state.vector_instructions_allowed() {
433 Err(ExecutionError::IllegalInstruction {
434 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
435 })?;
436 }
437 let vtype = ext_state
438 .vtype()
439 .ok_or(ExecutionError::IllegalInstruction {
440 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
441 })?;
442 let data_eew = vtype.vsew().as_eew();
443 let data_group_regs = vtype.vlmul().register_count();
444 let index_group_regs = vtype
445 .vlmul()
446 .index_register_count(index_eew, vtype.vsew())
447 .ok_or(ExecutionError::IllegalInstruction {
448 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
449 })?;
450 zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
451 program_counter,
452 vs3,
453 data_group_regs,
454 nf,
455 )?;
456 zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
457 program_counter,
458 vs2,
459 index_group_regs,
460 )?;
461 // SAFETY:
462 // - `validate_segment_store_registers` covers `vs3` alignment/bounds
463 // - `check_register_group_alignment` covers `vs2` alignment/bounds
464 // - `vl` bounded by both EMUL groups as in `Vsuxei`
465 // - vs3/v0 overlap: stores read vs3 as a source; no restriction
466 unsafe {
467 zve64x_store_helpers::execute_indexed_store(
468 ext_state,
469 memory,
470 vs3,
471 vs2,
472 vm,
473 ext_state.vl(),
474 u32::from(ext_state.vstart()),
475 regs.read(rs1).as_u64(),
476 data_eew,
477 index_eew,
478 data_group_regs,
479 nf,
480 )?;
481 }
482 }
483 // Indexed-ordered segment store. Sequential iteration satisfies the ordering
484 // requirement.
485 Self::Vsoxseg {
486 vs3,
487 rs1,
488 vs2,
489 vm,
490 eew: index_eew,
491 nf,
492 } => {
493 if !ext_state.vector_instructions_allowed() {
494 Err(ExecutionError::IllegalInstruction {
495 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
496 })?;
497 }
498 let vtype = ext_state
499 .vtype()
500 .ok_or(ExecutionError::IllegalInstruction {
501 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
502 })?;
503 let data_eew = vtype.vsew().as_eew();
504 let data_group_regs = vtype.vlmul().register_count();
505 let index_group_regs = vtype
506 .vlmul()
507 .index_register_count(index_eew, vtype.vsew())
508 .ok_or(ExecutionError::IllegalInstruction {
509 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
510 })?;
511 zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
512 program_counter,
513 vs3,
514 data_group_regs,
515 nf,
516 )?;
517 zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
518 program_counter,
519 vs2,
520 index_group_regs,
521 )?;
522 // SAFETY: identical precondition argument to `Vsuxseg`
523 unsafe {
524 zve64x_store_helpers::execute_indexed_store(
525 ext_state,
526 memory,
527 vs3,
528 vs2,
529 vm,
530 ext_state.vl(),
531 u32::from(ext_state.vstart()),
532 regs.read(rs1).as_u64(),
533 data_eew,
534 index_eew,
535 data_group_regs,
536 nf,
537 )?;
538 }
539 }
540 }
541
542 Ok(ControlFlow::Continue(()))
543 }
544}