Skip to main content

ab_riscv_interpreter/v/zve64x/
store.rs

1//! Zve64x vector store instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zve64x_store_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::load::zve64x_load_helpers;
9use crate::v::zve64x::zve64x_helpers;
10use crate::{
11    ExecutableInstruction, ExecutableInstructionCsr, ExecutableInstructionOperands, ExecutionError,
12    ProgramCounter, RegisterFile, Rs1Rs2OperandValues, Rs1Rs2Operands, VirtualMemory,
13};
14use ab_riscv_macros::instruction_execution;
15use ab_riscv_primitives::prelude::*;
16use core::fmt;
17use core::ops::ControlFlow;
18
19#[instruction_execution]
20impl<Reg> ExecutableInstructionOperands for Zve64xStoreInstruction<Reg> where Reg: Register {}
21
22#[instruction_execution]
23impl<Reg, ExtState, CustomError> ExecutableInstructionCsr<ExtState, CustomError>
24    for Zve64xStoreInstruction<Reg>
25where
26    Reg: Register,
27{
28}
29
30#[instruction_execution]
31impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
32    ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
33    for Zve64xStoreInstruction<Reg>
34where
35    Reg: Register,
36    Regs: RegisterFile<Reg>,
37    ExtState: VectorRegistersExt<Reg, CustomError>,
38    [(); ExtState::ELEN as usize]:,
39    [(); ExtState::VLEN as usize]:,
40    [(); ExtState::VLENB as usize]:,
41    Memory: VirtualMemory,
42    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
43    CustomError: fmt::Debug,
44{
45    #[inline(always)]
46    fn execute(
47        self,
48        Rs1Rs2OperandValues {
49            rs1_value,
50            rs2_value,
51        }: Rs1Rs2OperandValues<<Self::Reg as Register>::Type>,
52        _regs: &mut Regs,
53        ext_state: &mut ExtState,
54        memory: &mut Memory,
55        program_counter: &mut PC,
56        _system_instruction_handler: &mut InstructionHandler,
57    ) -> Result<
58        ControlFlow<(), (Self::Reg, <Self::Reg as Register>::Type)>,
59        ExecutionError<Reg::Type, CustomError>,
60    > {
61        match self {
62            // Whole-register store: stores `nreg` consecutive registers starting at `vs3` directly
63            // to memory as a flat byte array of `EVL = nreg * VLENB` bytes. `vs3` must be aligned
64            // to `nreg`. Ignores vtype, vl, masking. Honors `vstart` in byte units: the first
65            // `vstart` bytes are skipped. If `vstart >= EVL`, the instruction is a no-op.
66            Self::Vsr { vs3, rs1: _, nreg } => {
67                if !ext_state.vector_instructions_allowed() {
68                    Err(ExecutionError::IllegalInstruction {
69                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
70                    })?;
71                }
72                if u32::from(vs3.bits()) % u32::from(nreg) != 0 {
73                    Err(ExecutionError::IllegalInstruction {
74                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
75                    })?;
76                }
77                let vlenb = u64::from(ExtState::VLENB);
78                let evl = u64::from(nreg) * vlenb;
79                let vstart = u64::from(ext_state.vstart());
80                if vstart < evl {
81                    let base = rs1_value.as_u64();
82                    let mut byte_off = vstart;
83                    while byte_off < evl {
84                        let reg_off = byte_off / vlenb;
85                        let in_reg = (byte_off % vlenb) as usize;
86                        let reg_idx = (u64::from(vs3.bits()) + reg_off) as usize;
87                        // SAFETY: `reg_idx < 32` because the decoder guarantees `nreg` in
88                        // {1,2,4,8} and `vs3` is `nreg`-aligned (checked above), so
89                        // `vs3.bits() + nreg - 1 <= 31`. `in_reg < VLENB` by construction.
90                        let src = unsafe {
91                            ext_state
92                                .read_vreg()
93                                .get_unchecked(reg_idx)
94                                .get_unchecked(in_reg..)
95                        };
96                        if let Err(error) = memory.write_slice(base + byte_off, src) {
97                            ext_state.set_vstart(byte_off as u16);
98                            return Err(ExecutionError::MemoryAccess(error));
99                        }
100                        byte_off += src.len() as u64;
101                    }
102                }
103                ext_state.reset_vstart();
104            }
105            // Mask store: stores `ceil(vl / 8)` bytes from `vs3` to memory with no masking.
106            // Does not require a valid vtype: when vill is set vl is 0, so zero bytes are written.
107            // Honors `vstart` at byte granularity: the first `vstart / 8` bytes are skipped.
108            Self::Vsm { vs3, rs1: _ } => {
109                if !ext_state.vector_instructions_allowed() {
110                    Err(ExecutionError::IllegalInstruction {
111                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
112                    })?;
113                }
114                let vl = ext_state.vl();
115                let evl_bytes = vl.div_ceil(u8::BITS);
116                let start_byte = u32::from(ext_state.vstart());
117                if start_byte < evl_bytes {
118                    let base = rs1_value.as_u64();
119                    // SAFETY: `vs3.bits() < 32` is guaranteed by `VReg`.
120                    // `evl_bytes = vl.div_ceil(8) <= VLEN / 8 = VLENB` because `vl <= VLMAX <=
121                    // VLEN`, so the slice `start_byte..evl_bytes` is in bounds of the
122                    // `VLENB`-byte source register.
123                    let src = unsafe {
124                        ext_state
125                            .read_vreg()
126                            .get_unchecked(usize::from(vs3.bits()))
127                            .get_unchecked(start_byte as usize..evl_bytes as usize)
128                    };
129                    memory
130                        .write_slice(base + u64::from(start_byte), src)
131                        .map_err(ExecutionError::MemoryAccess)?;
132                }
133                ext_state.reset_vstart();
134            }
135            // Unit-stride store.
136            //
137            // Source EMUL = EEW/SEW * LMUL, computed via `data_register_count`. This gives
138            // `group_regs` such that `VLMAX = group_regs * VLENB / eew.bytes()` matches the
139            // architectural `vl`.
140            Self::Vse {
141                vs3,
142                rs1: _,
143                vm,
144                eew,
145            } => {
146                if !ext_state.vector_instructions_allowed() {
147                    Err(ExecutionError::IllegalInstruction {
148                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
149                    })?;
150                }
151                let vtype = ext_state
152                    .vtype()
153                    .ok_or(ExecutionError::IllegalInstruction {
154                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
155                    })?;
156                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
157                    ExecutionError::IllegalInstruction {
158                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
159                    },
160                )?;
161                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
162                    program_counter,
163                    vs3,
164                    group_regs,
165                )?;
166                // SAFETY:
167                // - alignment: `check_register_group_alignment` verified `vs3 % group_regs == 0`
168                //   and `vs3 + group_regs <= 32`
169                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL computed for
170                //   this `eew` and `vtype`, so this VLMAX equals the architectural VLMAX that
171                //   bounds `vl`
172                // - vs3/v0 overlap: stores read vs3 as a source; the spec does not restrict
173                //   source/v0 overlap
174                unsafe {
175                    zve64x_store_helpers::execute_unit_stride_store(
176                        ext_state,
177                        memory,
178                        vs3,
179                        vm,
180                        ext_state.vl(),
181                        ext_state.vstart(),
182                        rs1_value.as_u64(),
183                        eew,
184                        group_regs,
185                        1,
186                    )?;
187                }
188            }
189            // Strided store
190            Self::Vsse {
191                vs3,
192                rs1: _,
193                rs2: _,
194                vm,
195                eew,
196            } => {
197                if !ext_state.vector_instructions_allowed() {
198                    Err(ExecutionError::IllegalInstruction {
199                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
200                    })?;
201                }
202                let vtype = ext_state
203                    .vtype()
204                    .ok_or(ExecutionError::IllegalInstruction {
205                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
206                    })?;
207                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
208                    ExecutionError::IllegalInstruction {
209                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
210                    },
211                )?;
212                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
213                    program_counter,
214                    vs3,
215                    group_regs,
216                )?;
217                let stride = rs2_value.as_u64().cast_signed();
218                // SAFETY: same preconditions as `Vse`.
219                unsafe {
220                    zve64x_store_helpers::execute_strided_store(
221                        ext_state,
222                        memory,
223                        vs3,
224                        vm,
225                        ext_state.vl(),
226                        ext_state.vstart(),
227                        rs1_value.as_u64(),
228                        stride,
229                        eew,
230                        group_regs,
231                        1,
232                    )?;
233                }
234            }
235            // Indexed-unordered store. Ordering between elements is not guaranteed.
236            Self::Vsuxei {
237                vs3,
238                rs1: _,
239                vs2,
240                vm,
241                eew: index_eew,
242            } => {
243                if !ext_state.vector_instructions_allowed() {
244                    Err(ExecutionError::IllegalInstruction {
245                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
246                    })?;
247                }
248                let vtype = ext_state
249                    .vtype()
250                    .ok_or(ExecutionError::IllegalInstruction {
251                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
252                    })?;
253                let data_eew = vtype.vsew().as_eew();
254                let data_group_regs = vtype.vlmul().register_count();
255                let index_group_regs = vtype
256                    .vlmul()
257                    .index_register_count(index_eew, vtype.vsew())
258                    .ok_or(ExecutionError::IllegalInstruction {
259                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
260                    })?;
261                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
262                    program_counter,
263                    vs3,
264                    data_group_regs,
265                )?;
266                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
267                    program_counter,
268                    vs2,
269                    index_group_regs,
270                )?;
271                // SAFETY:
272                // - `vs3` alignment/bounds: `check_register_group_alignment` verified both
273                // - `vs2` alignment/bounds: `check_register_group_alignment` verified both
274                // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: `data_group_regs` is the
275                //   EMUL that bounds `vl`
276                // - `vl <= index_group_regs * VLENB / index_eew.bytes()`: `index_register_count`
277                //   returns the EMUL for the index group, which by the same argument bounds `vl`
278                // - vs3/v0 overlap: stores read vs3 as a source; no restriction
279                unsafe {
280                    zve64x_store_helpers::execute_indexed_store(
281                        ext_state,
282                        memory,
283                        vs3,
284                        vs2,
285                        vm,
286                        ext_state.vl(),
287                        u32::from(ext_state.vstart()),
288                        rs1_value.as_u64(),
289                        data_eew,
290                        index_eew,
291                        data_group_regs,
292                        1,
293                    )?;
294                }
295            }
296            // Indexed-ordered store. Elements must be written in element order.
297            // The ordering constraint is visible only to other harts/devices; the implementation
298            // here is already sequential, so no additional logic is needed.
299            Self::Vsoxei {
300                vs3,
301                rs1: _,
302                vs2,
303                vm,
304                eew: index_eew,
305            } => {
306                if !ext_state.vector_instructions_allowed() {
307                    Err(ExecutionError::IllegalInstruction {
308                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
309                    })?;
310                }
311                let vtype = ext_state
312                    .vtype()
313                    .ok_or(ExecutionError::IllegalInstruction {
314                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
315                    })?;
316                let data_eew = vtype.vsew().as_eew();
317                let data_group_regs = vtype.vlmul().register_count();
318                let index_group_regs = vtype
319                    .vlmul()
320                    .index_register_count(index_eew, vtype.vsew())
321                    .ok_or(ExecutionError::IllegalInstruction {
322                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
323                    })?;
324                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
325                    program_counter,
326                    vs3,
327                    data_group_regs,
328                )?;
329                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
330                    program_counter,
331                    vs2,
332                    index_group_regs,
333                )?;
334                // SAFETY: identical precondition argument to `Vsuxei`
335                unsafe {
336                    zve64x_store_helpers::execute_indexed_store(
337                        ext_state,
338                        memory,
339                        vs3,
340                        vs2,
341                        vm,
342                        ext_state.vl(),
343                        u32::from(ext_state.vstart()),
344                        rs1_value.as_u64(),
345                        data_eew,
346                        index_eew,
347                        data_group_regs,
348                        1,
349                    )?;
350                }
351            }
352            // Unit-stride segment store: `nf` fields per element, stored contiguously
353            Self::Vsseg {
354                vs3,
355                rs1: _,
356                vm,
357                eew,
358                nf,
359            } => {
360                if !ext_state.vector_instructions_allowed() {
361                    Err(ExecutionError::IllegalInstruction {
362                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
363                    })?;
364                }
365                let vtype = ext_state
366                    .vtype()
367                    .ok_or(ExecutionError::IllegalInstruction {
368                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
369                    })?;
370                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
371                    ExecutionError::IllegalInstruction {
372                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
373                    },
374                )?;
375                zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
376                    program_counter,
377                    vs3,
378                    group_regs,
379                    nf,
380                )?;
381                // SAFETY:
382                // - `validate_segment_store_registers` guarantees `vs3 % group_regs == 0` and `vs3
383                //   + nf * group_regs <= 32`
384                // - `vl <= group_regs * VLENB / eew.bytes()`: same EMUL argument as `Vse`
385                // - vs3/v0 overlap: stores read vs3 as a source; no restriction
386                unsafe {
387                    zve64x_store_helpers::execute_unit_stride_store(
388                        ext_state,
389                        memory,
390                        vs3,
391                        vm,
392                        ext_state.vl(),
393                        ext_state.vstart(),
394                        rs1_value.as_u64(),
395                        eew,
396                        group_regs,
397                        nf,
398                    )?;
399                }
400            }
401            // Strided segment store
402            Self::Vssseg {
403                vs3,
404                rs1: _,
405                rs2: _,
406                vm,
407                eew,
408                nf,
409            } => {
410                if !ext_state.vector_instructions_allowed() {
411                    Err(ExecutionError::IllegalInstruction {
412                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
413                    })?;
414                }
415                let vtype = ext_state
416                    .vtype()
417                    .ok_or(ExecutionError::IllegalInstruction {
418                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
419                    })?;
420                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
421                    ExecutionError::IllegalInstruction {
422                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
423                    },
424                )?;
425                zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
426                    program_counter,
427                    vs3,
428                    group_regs,
429                    nf,
430                )?;
431                let stride = rs2_value.as_u64().cast_signed();
432                // SAFETY: same as `Vsseg`.
433                unsafe {
434                    zve64x_store_helpers::execute_strided_store(
435                        ext_state,
436                        memory,
437                        vs3,
438                        vm,
439                        ext_state.vl(),
440                        ext_state.vstart(),
441                        rs1_value.as_u64(),
442                        stride,
443                        eew,
444                        group_regs,
445                        nf,
446                    )?;
447                }
448            }
449            // Indexed-unordered segment store
450            Self::Vsuxseg {
451                vs3,
452                rs1: _,
453                vs2,
454                vm,
455                eew: index_eew,
456                nf,
457            } => {
458                if !ext_state.vector_instructions_allowed() {
459                    Err(ExecutionError::IllegalInstruction {
460                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
461                    })?;
462                }
463                let vtype = ext_state
464                    .vtype()
465                    .ok_or(ExecutionError::IllegalInstruction {
466                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
467                    })?;
468                let data_eew = vtype.vsew().as_eew();
469                let data_group_regs = vtype.vlmul().register_count();
470                let index_group_regs = vtype
471                    .vlmul()
472                    .index_register_count(index_eew, vtype.vsew())
473                    .ok_or(ExecutionError::IllegalInstruction {
474                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
475                    })?;
476                zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
477                    program_counter,
478                    vs3,
479                    data_group_regs,
480                    nf,
481                )?;
482                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
483                    program_counter,
484                    vs2,
485                    index_group_regs,
486                )?;
487                // SAFETY:
488                // - `validate_segment_store_registers` covers `vs3` alignment/bounds
489                // - `check_register_group_alignment` covers `vs2` alignment/bounds
490                // - `vl` bounded by both EMUL groups as in `Vsuxei`
491                // - vs3/v0 overlap: stores read vs3 as a source; no restriction
492                unsafe {
493                    zve64x_store_helpers::execute_indexed_store(
494                        ext_state,
495                        memory,
496                        vs3,
497                        vs2,
498                        vm,
499                        ext_state.vl(),
500                        u32::from(ext_state.vstart()),
501                        rs1_value.as_u64(),
502                        data_eew,
503                        index_eew,
504                        data_group_regs,
505                        nf,
506                    )?;
507                }
508            }
509            // Indexed-ordered segment store. Sequential iteration satisfies the ordering
510            // requirement.
511            Self::Vsoxseg {
512                vs3,
513                rs1: _,
514                vs2,
515                vm,
516                eew: index_eew,
517                nf,
518            } => {
519                if !ext_state.vector_instructions_allowed() {
520                    Err(ExecutionError::IllegalInstruction {
521                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
522                    })?;
523                }
524                let vtype = ext_state
525                    .vtype()
526                    .ok_or(ExecutionError::IllegalInstruction {
527                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
528                    })?;
529                let data_eew = vtype.vsew().as_eew();
530                let data_group_regs = vtype.vlmul().register_count();
531                let index_group_regs = vtype
532                    .vlmul()
533                    .index_register_count(index_eew, vtype.vsew())
534                    .ok_or(ExecutionError::IllegalInstruction {
535                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
536                    })?;
537                zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
538                    program_counter,
539                    vs3,
540                    data_group_regs,
541                    nf,
542                )?;
543                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
544                    program_counter,
545                    vs2,
546                    index_group_regs,
547                )?;
548                // SAFETY: identical precondition argument to `Vsuxseg`
549                unsafe {
550                    zve64x_store_helpers::execute_indexed_store(
551                        ext_state,
552                        memory,
553                        vs3,
554                        vs2,
555                        vm,
556                        ext_state.vl(),
557                        u32::from(ext_state.vstart()),
558                        rs1_value.as_u64(),
559                        data_eew,
560                        index_eew,
561                        data_group_regs,
562                        nf,
563                    )?;
564                }
565            }
566        }
567
568        Ok(ControlFlow::Continue(Default::default()))
569    }
570}