Skip to main content

ab_riscv_interpreter/v/zvexx/
store.rs

1//! ZveXx vector store instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zvexx_store_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zvexx::load::zvexx_load_helpers;
9use crate::v::zvexx::zvexx_helpers;
10use crate::{
11    ExecutableInstruction, ExecutableInstructionCsr, ExecutableInstructionOperands, ExecutionError,
12    ProgramCounter, RegisterFile, Rs1Rs2OperandValues, Rs1Rs2Operands, VirtualMemory,
13};
14use ab_riscv_macros::instruction_execution;
15use ab_riscv_primitives::prelude::*;
16use core::fmt;
17use core::ops::ControlFlow;
18
19#[instruction_execution]
20impl<Reg> ExecutableInstructionOperands for ZveXxStoreInstruction<Reg> where Reg: Register {}
21
22#[instruction_execution]
23impl<Reg, ExtState, CustomError> ExecutableInstructionCsr<ExtState, CustomError>
24    for ZveXxStoreInstruction<Reg>
25where
26    Reg: Register,
27{
28}
29
30#[instruction_execution]
31impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
32    ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
33    for ZveXxStoreInstruction<Reg>
34where
35    Reg: Register,
36    Regs: RegisterFile<Reg>,
37    ExtState: VectorRegistersExt<Reg, CustomError>,
38    [(); ExtState::ELEN as usize]:,
39    [(); ExtState::VLEN as usize]:,
40    [(); ExtState::VLENB as usize]:,
41    Memory: VirtualMemory,
42    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
43    CustomError: fmt::Debug,
44{
45    #[inline(always)]
46    fn execute(
47        self,
48        Rs1Rs2OperandValues {
49            rs1_value,
50            rs2_value,
51        }: Rs1Rs2OperandValues<<Self::Reg as Register>::Type>,
52        _regs: &mut Regs,
53        ext_state: &mut ExtState,
54        memory: &mut Memory,
55        program_counter: &mut PC,
56        _system_instruction_handler: &mut InstructionHandler,
57    ) -> Result<
58        ControlFlow<(), (Self::Reg, <Self::Reg as Register>::Type)>,
59        ExecutionError<Reg::Type, CustomError>,
60    > {
61        match self {
62            // Whole-register store: stores `nreg` consecutive registers starting at `vs3` directly
63            // to memory as a flat byte array of `EVL = nreg * VLENB` bytes. `vs3` must be aligned
64            // to `nreg`. Ignores vtype, vl, masking. Honors `vstart` in byte units: the first
65            // `vstart` bytes are skipped. If `vstart >= EVL`, the instruction is a no-op.
66            Self::Vsr { vs3, rs1: _, nreg } => {
67                let nreg = nreg.num_registers();
68                if !ext_state.vector_instructions_allowed() {
69                    ::core::hint::cold_path();
70                    return Err(ExecutionError::IllegalInstruction {
71                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
72                    });
73                }
74                if vs3.to_bits() % nreg != 0 {
75                    ::core::hint::cold_path();
76                    return Err(ExecutionError::IllegalInstruction {
77                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
78                    });
79                }
80                let vlenb = u64::from(ExtState::VLENB);
81                let evl = u64::from(nreg) * vlenb;
82                let vstart = ext_state.vstart();
83                if u64::from(vstart) < evl {
84                    let base = rs1_value.as_u64();
85                    let mut byte_off = u64::from(vstart);
86                    while byte_off < evl {
87                        let reg_off = byte_off / vlenb;
88                        let in_reg = (byte_off % vlenb) as usize;
89                        // SAFETY: the decoder guarantees `nreg` in {1,2,4,8} and `vs3` is
90                        // `nreg`-aligned (checked above), so `vs3.to_bits() + nreg - 1 <= 31`
91                        let reg = unsafe {
92                            VReg::from_bits(vs3.to_bits() + reg_off as u8).unwrap_unchecked()
93                        };
94                        // SAFETY: `in_reg < VLENB` by construction
95                        let src =
96                            unsafe { ext_state.read_vregs().get(reg).get_unchecked(in_reg..) };
97                        if let Err(error) = memory.write_slice(base + byte_off, src) {
98                            ext_state.set_vstart(byte_off as u16);
99                            return Err(ExecutionError::MemoryAccess(error));
100                        }
101                        byte_off += src.len() as u64;
102                    }
103                }
104                ext_state.reset_vstart();
105            }
106            // Mask store: stores `ceil(vl / 8)` bytes from `vs3` to memory with no masking.
107            // Does not require a valid vtype: when vill is set vl is 0, so zero bytes are written.
108            // Honors `vstart` at byte granularity: the first `vstart / 8` bytes are skipped.
109            Self::Vsm { vs3, rs1: _ } => {
110                if !ext_state.vector_instructions_allowed() {
111                    ::core::hint::cold_path();
112                    return Err(ExecutionError::IllegalInstruction {
113                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
114                    });
115                }
116                let vl = ext_state.vl();
117                let evl_bytes = vl.div_ceil(u8::BITS);
118                let start_byte = ext_state.vstart();
119                if u32::from(start_byte) < evl_bytes {
120                    let base = rs1_value.as_u64();
121                    // SAFETY: `evl_bytes = vl.div_ceil(8) <= VLEN / 8 = VLENB` because
122                    // `vl <= VLMAX <= VLEN`, so the slice `start_byte..evl_bytes` is in bounds of
123                    // the `VLENB`-byte source register
124                    let src = unsafe {
125                        ext_state
126                            .read_vregs()
127                            .get(vs3)
128                            .get_unchecked(usize::from(start_byte)..evl_bytes as usize)
129                    };
130                    memory
131                        .write_slice(base + u64::from(start_byte), src)
132                        .map_err(ExecutionError::MemoryAccess)?;
133                }
134                ext_state.reset_vstart();
135            }
136            // Unit-stride store.
137            //
138            // Source EMUL = EEW/SEW * LMUL, computed via `data_register_count`. This gives
139            // `group_regs` such that `VLMAX = group_regs * VLENB / eew.bytes()` matches the
140            // architectural `vl`.
141            Self::Vse {
142                vs3,
143                rs1: _,
144                vm,
145                eew,
146            } => {
147                if !ext_state.vector_instructions_allowed() {
148                    ::core::hint::cold_path();
149                    return Err(ExecutionError::IllegalInstruction {
150                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
151                    });
152                }
153                let Some(vtype) = ext_state.vtype() else {
154                    ::core::hint::cold_path();
155                    return Err(ExecutionError::IllegalInstruction {
156                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
157                    });
158                };
159                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
160                    ExecutionError::IllegalInstruction {
161                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
162                    },
163                )?;
164                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
165                    program_counter,
166                    vs3,
167                    group_regs,
168                )?;
169                // SAFETY:
170                // - alignment: `check_register_group_alignment` verified `vs3 % group_regs == 0`
171                //   and `vs3 + group_regs <= 32`
172                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL computed for
173                //   this `eew` and `vtype`, so this VLMAX equals the architectural VLMAX that
174                //   bounds `vl`
175                // - vs3/v0 overlap: stores read vs3 as a source; the spec does not restrict
176                //   source/v0 overlap
177                unsafe {
178                    zvexx_store_helpers::execute_unit_stride_store(
179                        ext_state,
180                        memory,
181                        vs3,
182                        vm,
183                        rs1_value.as_u64(),
184                        eew,
185                        group_regs,
186                        Nf::N1,
187                    )?;
188                }
189            }
190            // Strided store
191            Self::Vsse {
192                vs3,
193                rs1: _,
194                rs2: _,
195                vm,
196                eew,
197            } => {
198                if !ext_state.vector_instructions_allowed() {
199                    ::core::hint::cold_path();
200                    return Err(ExecutionError::IllegalInstruction {
201                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
202                    });
203                }
204                let Some(vtype) = ext_state.vtype() else {
205                    ::core::hint::cold_path();
206                    return Err(ExecutionError::IllegalInstruction {
207                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
208                    });
209                };
210                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
211                    ExecutionError::IllegalInstruction {
212                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
213                    },
214                )?;
215                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
216                    program_counter,
217                    vs3,
218                    group_regs,
219                )?;
220                let stride = rs2_value.as_i64();
221                // SAFETY: same preconditions as `Vse`.
222                unsafe {
223                    zvexx_store_helpers::execute_strided_store(
224                        ext_state,
225                        memory,
226                        vs3,
227                        vm,
228                        rs1_value.as_u64(),
229                        stride,
230                        eew,
231                        group_regs,
232                        Nf::N1,
233                    )?;
234                }
235            }
236            // Indexed-unordered store. Ordering between elements is not guaranteed.
237            Self::Vsuxei {
238                vs3,
239                rs1: _,
240                vs2,
241                vm,
242                eew: index_eew,
243            } => {
244                if !ext_state.vector_instructions_allowed() {
245                    ::core::hint::cold_path();
246                    return Err(ExecutionError::IllegalInstruction {
247                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
248                    });
249                }
250                let Some(vtype) = ext_state.vtype() else {
251                    ::core::hint::cold_path();
252                    return Err(ExecutionError::IllegalInstruction {
253                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
254                    });
255                };
256                let data_eew = vtype.vsew().as_eew();
257                let data_group_regs = vtype.vlmul().register_count();
258                let index_group_regs = vtype
259                    .vlmul()
260                    .index_register_count(index_eew, vtype.vsew())
261                    .ok_or(ExecutionError::IllegalInstruction {
262                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
263                    })?;
264                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
265                    program_counter,
266                    vs3,
267                    data_group_regs,
268                )?;
269                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
270                    program_counter,
271                    vs2,
272                    index_group_regs,
273                )?;
274                // SAFETY:
275                // - `vs3` alignment/bounds: `check_register_group_alignment` verified both
276                // - `vs2` alignment/bounds: `check_register_group_alignment` verified both
277                // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: `data_group_regs` is the
278                //   EMUL that bounds `vl`
279                // - `vl <= index_group_regs * VLENB / index_eew.bytes()`: `index_register_count`
280                //   returns the EMUL for the index group, which by the same argument bounds `vl`
281                // - vs3/v0 overlap: stores read vs3 as a source; no restriction
282                unsafe {
283                    zvexx_store_helpers::execute_indexed_store(
284                        ext_state,
285                        memory,
286                        vs3,
287                        vs2,
288                        vm,
289                        rs1_value.as_u64(),
290                        data_eew,
291                        index_eew,
292                        data_group_regs,
293                        Nf::N1,
294                    )?;
295                }
296            }
297            // Indexed-ordered store. Elements must be written in element order.
298            // The ordering constraint is visible only to other harts/devices; the implementation
299            // here is already sequential, so no additional logic is needed.
300            Self::Vsoxei {
301                vs3,
302                rs1: _,
303                vs2,
304                vm,
305                eew: index_eew,
306            } => {
307                if !ext_state.vector_instructions_allowed() {
308                    ::core::hint::cold_path();
309                    return Err(ExecutionError::IllegalInstruction {
310                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
311                    });
312                }
313                let Some(vtype) = ext_state.vtype() else {
314                    ::core::hint::cold_path();
315                    return Err(ExecutionError::IllegalInstruction {
316                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
317                    });
318                };
319                let data_eew = vtype.vsew().as_eew();
320                let data_group_regs = vtype.vlmul().register_count();
321                let index_group_regs = vtype
322                    .vlmul()
323                    .index_register_count(index_eew, vtype.vsew())
324                    .ok_or(ExecutionError::IllegalInstruction {
325                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
326                    })?;
327                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
328                    program_counter,
329                    vs3,
330                    data_group_regs,
331                )?;
332                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
333                    program_counter,
334                    vs2,
335                    index_group_regs,
336                )?;
337                // SAFETY: identical precondition argument to `Vsuxei`
338                unsafe {
339                    zvexx_store_helpers::execute_indexed_store(
340                        ext_state,
341                        memory,
342                        vs3,
343                        vs2,
344                        vm,
345                        rs1_value.as_u64(),
346                        data_eew,
347                        index_eew,
348                        data_group_regs,
349                        Nf::N1,
350                    )?;
351                }
352            }
353            // Unit-stride segment store: `nf` fields per element, stored contiguously
354            Self::Vsseg {
355                vs3,
356                rs1: _,
357                eew,
358                vm_nf,
359            } => {
360                let vm = vm_nf.vm();
361                let nf = vm_nf.nf();
362                if !ext_state.vector_instructions_allowed() {
363                    ::core::hint::cold_path();
364                    return Err(ExecutionError::IllegalInstruction {
365                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
366                    });
367                }
368                let Some(vtype) = ext_state.vtype() else {
369                    ::core::hint::cold_path();
370                    return Err(ExecutionError::IllegalInstruction {
371                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
372                    });
373                };
374                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
375                    ExecutionError::IllegalInstruction {
376                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
377                    },
378                )?;
379                zvexx_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
380                    program_counter,
381                    vs3,
382                    group_regs,
383                    nf,
384                )?;
385                // SAFETY:
386                // - `validate_segment_store_registers` guarantees `vs3 % group_regs == 0` and `vs3
387                //   + nf * group_regs <= 32`
388                // - `vl <= group_regs * VLENB / eew.bytes()`: same EMUL argument as `Vse`
389                // - vs3/v0 overlap: stores read vs3 as a source; no restriction
390                unsafe {
391                    zvexx_store_helpers::execute_unit_stride_store(
392                        ext_state,
393                        memory,
394                        vs3,
395                        vm,
396                        rs1_value.as_u64(),
397                        eew,
398                        group_regs,
399                        nf,
400                    )?;
401                }
402            }
403            // Strided segment store
404            Self::Vssseg {
405                vs3,
406                rs1: _,
407                rs2: _,
408                eew,
409                vm_nf,
410            } => {
411                let vm = vm_nf.vm();
412                let nf = vm_nf.nf();
413                if !ext_state.vector_instructions_allowed() {
414                    ::core::hint::cold_path();
415                    return Err(ExecutionError::IllegalInstruction {
416                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
417                    });
418                }
419                let Some(vtype) = ext_state.vtype() else {
420                    ::core::hint::cold_path();
421                    return Err(ExecutionError::IllegalInstruction {
422                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
423                    });
424                };
425                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
426                    ExecutionError::IllegalInstruction {
427                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
428                    },
429                )?;
430                zvexx_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
431                    program_counter,
432                    vs3,
433                    group_regs,
434                    nf,
435                )?;
436                let stride = rs2_value.as_i64();
437                // SAFETY: same as `Vsseg`.
438                unsafe {
439                    zvexx_store_helpers::execute_strided_store(
440                        ext_state,
441                        memory,
442                        vs3,
443                        vm,
444                        rs1_value.as_u64(),
445                        stride,
446                        eew,
447                        group_regs,
448                        nf,
449                    )?;
450                }
451            }
452            // Indexed-unordered segment store
453            Self::Vsuxseg {
454                vs3,
455                rs1: _,
456                vs2,
457                eew: index_eew,
458                vm_nf,
459            } => {
460                let vm = vm_nf.vm();
461                let nf = vm_nf.nf();
462                if !ext_state.vector_instructions_allowed() {
463                    ::core::hint::cold_path();
464                    return Err(ExecutionError::IllegalInstruction {
465                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
466                    });
467                }
468                let Some(vtype) = ext_state.vtype() else {
469                    ::core::hint::cold_path();
470                    return Err(ExecutionError::IllegalInstruction {
471                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
472                    });
473                };
474                let data_eew = vtype.vsew().as_eew();
475                let data_group_regs = vtype.vlmul().register_count();
476                let index_group_regs = vtype
477                    .vlmul()
478                    .index_register_count(index_eew, vtype.vsew())
479                    .ok_or(ExecutionError::IllegalInstruction {
480                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
481                    })?;
482                zvexx_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
483                    program_counter,
484                    vs3,
485                    data_group_regs,
486                    nf,
487                )?;
488                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
489                    program_counter,
490                    vs2,
491                    index_group_regs,
492                )?;
493                // SAFETY:
494                // - `validate_segment_store_registers` covers `vs3` alignment/bounds
495                // - `check_register_group_alignment` covers `vs2` alignment/bounds
496                // - `vl` bounded by both EMUL groups as in `Vsuxei`
497                // - vs3/v0 overlap: stores read vs3 as a source; no restriction
498                unsafe {
499                    zvexx_store_helpers::execute_indexed_store(
500                        ext_state,
501                        memory,
502                        vs3,
503                        vs2,
504                        vm,
505                        rs1_value.as_u64(),
506                        data_eew,
507                        index_eew,
508                        data_group_regs,
509                        nf,
510                    )?;
511                }
512            }
513            // Indexed-ordered segment store. Sequential iteration satisfies the ordering
514            // requirement.
515            Self::Vsoxseg {
516                vs3,
517                rs1: _,
518                vs2,
519                eew: index_eew,
520                vm_nf,
521            } => {
522                let vm = vm_nf.vm();
523                let nf = vm_nf.nf();
524                if !ext_state.vector_instructions_allowed() {
525                    ::core::hint::cold_path();
526                    return Err(ExecutionError::IllegalInstruction {
527                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
528                    });
529                }
530                let Some(vtype) = ext_state.vtype() else {
531                    ::core::hint::cold_path();
532                    return Err(ExecutionError::IllegalInstruction {
533                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
534                    });
535                };
536                let data_eew = vtype.vsew().as_eew();
537                let data_group_regs = vtype.vlmul().register_count();
538                let index_group_regs = vtype
539                    .vlmul()
540                    .index_register_count(index_eew, vtype.vsew())
541                    .ok_or(ExecutionError::IllegalInstruction {
542                        address: program_counter.old_pc(zvexx_helpers::INSTRUCTION_SIZE),
543                    })?;
544                zvexx_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
545                    program_counter,
546                    vs3,
547                    data_group_regs,
548                    nf,
549                )?;
550                zvexx_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
551                    program_counter,
552                    vs2,
553                    index_group_regs,
554                )?;
555                // SAFETY: identical precondition argument to `Vsuxseg`
556                unsafe {
557                    zvexx_store_helpers::execute_indexed_store(
558                        ext_state,
559                        memory,
560                        vs3,
561                        vs2,
562                        vm,
563                        rs1_value.as_u64(),
564                        data_eew,
565                        index_eew,
566                        data_group_regs,
567                        nf,
568                    )?;
569                }
570            }
571        }
572
573        Ok(ControlFlow::Continue(Default::default()))
574    }
575}