Skip to main content

ab_riscv_interpreter/v/zve64x/
store.rs

1//! Zve64x vector store instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zve64x_store_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::load::zve64x_load_helpers;
9use crate::v::zve64x::zve64x_helpers;
10use crate::{ExecutableInstruction, ExecutionError, ProgramCounter, RegisterFile, VirtualMemory};
11use ab_riscv_macros::instruction_execution;
12use ab_riscv_primitives::prelude::*;
13use core::fmt;
14use core::ops::ControlFlow;
15
16#[instruction_execution]
17impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
18    ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
19    for Zve64xStoreInstruction<Reg>
20where
21    Reg: Register,
22    Regs: RegisterFile<Reg>,
23    ExtState: VectorRegistersExt<Reg, CustomError>,
24    [(); ExtState::ELEN as usize]:,
25    [(); ExtState::VLEN as usize]:,
26    [(); ExtState::VLENB as usize]:,
27    Memory: VirtualMemory,
28    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
29    CustomError: fmt::Debug,
30{
31    #[inline(always)]
32    fn execute(
33        self,
34        regs: &mut Regs,
35        ext_state: &mut ExtState,
36        memory: &mut Memory,
37        program_counter: &mut PC,
38        _system_instruction_handler: &mut InstructionHandler,
39    ) -> Result<ControlFlow<()>, ExecutionError<Reg::Type, CustomError>> {
40        match self {
41            // Whole-register store: stores `nreg` consecutive registers starting at `vs3` directly
42            // to memory as a flat byte array of `EVL = nreg * VLENB` bytes. `vs3` must be aligned
43            // to `nreg`. Ignores vtype, vl, masking. Honors `vstart` in byte units: the first
44            // `vstart` bytes are skipped. If `vstart >= EVL`, the instruction is a no-op.
45            Self::Vsr { vs3, rs1, nreg } => {
46                if !ext_state.vector_instructions_allowed() {
47                    Err(ExecutionError::IllegalInstruction {
48                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
49                    })?;
50                }
51                if u32::from(vs3.bits()) % u32::from(nreg) != 0 {
52                    Err(ExecutionError::IllegalInstruction {
53                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
54                    })?;
55                }
56                let vlenb = u64::from(ExtState::VLENB);
57                let evl = u64::from(nreg) * vlenb;
58                let vstart = u64::from(ext_state.vstart());
59                if vstart < evl {
60                    let base = regs.read(rs1).as_u64();
61                    let mut byte_off = vstart;
62                    while byte_off < evl {
63                        let reg_off = byte_off / vlenb;
64                        let in_reg = (byte_off % vlenb) as usize;
65                        let reg_idx = (u64::from(vs3.bits()) + reg_off) as usize;
66                        // SAFETY: `reg_idx < 32` because the decoder guarantees `nreg` in
67                        // {1,2,4,8} and `vs3` is `nreg`-aligned (checked above), so
68                        // `vs3.bits() + nreg - 1 <= 31`. `in_reg < VLENB` by construction.
69                        let src = unsafe {
70                            ext_state
71                                .read_vreg()
72                                .get_unchecked(reg_idx)
73                                .get_unchecked(in_reg..)
74                        };
75                        if let Err(error) = memory.write_slice(base + byte_off, src) {
76                            ext_state.set_vstart(byte_off as u16);
77                            return Err(ExecutionError::MemoryAccess(error));
78                        }
79                        byte_off += src.len() as u64;
80                    }
81                }
82                ext_state.reset_vstart();
83            }
84            // Mask store: stores `ceil(vl / 8)` bytes from `vs3` to memory with no masking.
85            // Does not require a valid vtype: when vill is set vl is 0, so zero bytes are written.
86            // Honors `vstart` at byte granularity: the first `vstart / 8` bytes are skipped.
87            Self::Vsm { vs3, rs1 } => {
88                if !ext_state.vector_instructions_allowed() {
89                    Err(ExecutionError::IllegalInstruction {
90                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
91                    })?;
92                }
93                let vl = ext_state.vl();
94                let evl_bytes = vl.div_ceil(u8::BITS);
95                let start_byte = u32::from(ext_state.vstart());
96                if start_byte < evl_bytes {
97                    let base = regs.read(rs1).as_u64();
98                    // SAFETY: `vs3.bits() < 32` is guaranteed by `VReg`.
99                    // `evl_bytes = vl.div_ceil(8) <= VLEN / 8 = VLENB` because `vl <= VLMAX <=
100                    // VLEN`, so the slice `start_byte..evl_bytes` is in bounds of the
101                    // `VLENB`-byte source register.
102                    let src = unsafe {
103                        ext_state
104                            .read_vreg()
105                            .get_unchecked(usize::from(vs3.bits()))
106                            .get_unchecked(start_byte as usize..evl_bytes as usize)
107                    };
108                    memory
109                        .write_slice(base + u64::from(start_byte), src)
110                        .map_err(ExecutionError::MemoryAccess)?;
111                }
112                ext_state.reset_vstart();
113            }
114            // Unit-stride store.
115            //
116            // Source EMUL = EEW/SEW * LMUL, computed via `data_register_count`. This gives
117            // `group_regs` such that `VLMAX = group_regs * VLENB / eew.bytes()` matches the
118            // architectural `vl`.
119            Self::Vse { vs3, rs1, vm, eew } => {
120                if !ext_state.vector_instructions_allowed() {
121                    Err(ExecutionError::IllegalInstruction {
122                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
123                    })?;
124                }
125                let vtype = ext_state
126                    .vtype()
127                    .ok_or(ExecutionError::IllegalInstruction {
128                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
129                    })?;
130                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
131                    ExecutionError::IllegalInstruction {
132                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
133                    },
134                )?;
135                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
136                    program_counter,
137                    vs3,
138                    group_regs,
139                )?;
140                // SAFETY:
141                // - alignment: `check_register_group_alignment` verified `vs3 % group_regs == 0`
142                //   and `vs3 + group_regs <= 32`
143                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL computed for
144                //   this `eew` and `vtype`, so this VLMAX equals the architectural VLMAX that
145                //   bounds `vl`
146                // - vs3/v0 overlap: stores read vs3 as a source; the spec does not restrict
147                //   source/v0 overlap
148                unsafe {
149                    zve64x_store_helpers::execute_unit_stride_store(
150                        ext_state,
151                        memory,
152                        vs3,
153                        vm,
154                        ext_state.vl(),
155                        ext_state.vstart(),
156                        regs.read(rs1).as_u64(),
157                        eew,
158                        group_regs,
159                        1,
160                    )?;
161                }
162            }
163            // Strided store
164            Self::Vsse {
165                vs3,
166                rs1,
167                rs2,
168                vm,
169                eew,
170            } => {
171                if !ext_state.vector_instructions_allowed() {
172                    Err(ExecutionError::IllegalInstruction {
173                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
174                    })?;
175                }
176                let vtype = ext_state
177                    .vtype()
178                    .ok_or(ExecutionError::IllegalInstruction {
179                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
180                    })?;
181                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
182                    ExecutionError::IllegalInstruction {
183                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
184                    },
185                )?;
186                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
187                    program_counter,
188                    vs3,
189                    group_regs,
190                )?;
191                let stride = regs.read(rs2).as_u64().cast_signed();
192                // SAFETY: same preconditions as `Vse`.
193                unsafe {
194                    zve64x_store_helpers::execute_strided_store(
195                        ext_state,
196                        memory,
197                        vs3,
198                        vm,
199                        ext_state.vl(),
200                        ext_state.vstart(),
201                        regs.read(rs1).as_u64(),
202                        stride,
203                        eew,
204                        group_regs,
205                        1,
206                    )?;
207                }
208            }
209            // Indexed-unordered store. Ordering between elements is not guaranteed.
210            Self::Vsuxei {
211                vs3,
212                rs1,
213                vs2,
214                vm,
215                eew: index_eew,
216            } => {
217                if !ext_state.vector_instructions_allowed() {
218                    Err(ExecutionError::IllegalInstruction {
219                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
220                    })?;
221                }
222                let vtype = ext_state
223                    .vtype()
224                    .ok_or(ExecutionError::IllegalInstruction {
225                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
226                    })?;
227                let data_eew = vtype.vsew().as_eew();
228                let data_group_regs = vtype.vlmul().register_count();
229                let index_group_regs = vtype
230                    .vlmul()
231                    .index_register_count(index_eew, vtype.vsew())
232                    .ok_or(ExecutionError::IllegalInstruction {
233                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
234                    })?;
235                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
236                    program_counter,
237                    vs3,
238                    data_group_regs,
239                )?;
240                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
241                    program_counter,
242                    vs2,
243                    index_group_regs,
244                )?;
245                // SAFETY:
246                // - `vs3` alignment/bounds: `check_register_group_alignment` verified both
247                // - `vs2` alignment/bounds: `check_register_group_alignment` verified both
248                // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: `data_group_regs` is the
249                //   EMUL that bounds `vl`
250                // - `vl <= index_group_regs * VLENB / index_eew.bytes()`: `index_register_count`
251                //   returns the EMUL for the index group, which by the same argument bounds `vl`
252                // - vs3/v0 overlap: stores read vs3 as a source; no restriction
253                unsafe {
254                    zve64x_store_helpers::execute_indexed_store(
255                        ext_state,
256                        memory,
257                        vs3,
258                        vs2,
259                        vm,
260                        ext_state.vl(),
261                        u32::from(ext_state.vstart()),
262                        regs.read(rs1).as_u64(),
263                        data_eew,
264                        index_eew,
265                        data_group_regs,
266                        1,
267                    )?;
268                }
269            }
270            // Indexed-ordered store. Elements must be written in element order.
271            // The ordering constraint is visible only to other harts/devices; the implementation
272            // here is already sequential, so no additional logic is needed.
273            Self::Vsoxei {
274                vs3,
275                rs1,
276                vs2,
277                vm,
278                eew: index_eew,
279            } => {
280                if !ext_state.vector_instructions_allowed() {
281                    Err(ExecutionError::IllegalInstruction {
282                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
283                    })?;
284                }
285                let vtype = ext_state
286                    .vtype()
287                    .ok_or(ExecutionError::IllegalInstruction {
288                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
289                    })?;
290                let data_eew = vtype.vsew().as_eew();
291                let data_group_regs = vtype.vlmul().register_count();
292                let index_group_regs = vtype
293                    .vlmul()
294                    .index_register_count(index_eew, vtype.vsew())
295                    .ok_or(ExecutionError::IllegalInstruction {
296                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
297                    })?;
298                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
299                    program_counter,
300                    vs3,
301                    data_group_regs,
302                )?;
303                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
304                    program_counter,
305                    vs2,
306                    index_group_regs,
307                )?;
308                // SAFETY: identical precondition argument to `Vsuxei`
309                unsafe {
310                    zve64x_store_helpers::execute_indexed_store(
311                        ext_state,
312                        memory,
313                        vs3,
314                        vs2,
315                        vm,
316                        ext_state.vl(),
317                        u32::from(ext_state.vstart()),
318                        regs.read(rs1).as_u64(),
319                        data_eew,
320                        index_eew,
321                        data_group_regs,
322                        1,
323                    )?;
324                }
325            }
326            // Unit-stride segment store: `nf` fields per element, stored contiguously
327            Self::Vsseg {
328                vs3,
329                rs1,
330                vm,
331                eew,
332                nf,
333            } => {
334                if !ext_state.vector_instructions_allowed() {
335                    Err(ExecutionError::IllegalInstruction {
336                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
337                    })?;
338                }
339                let vtype = ext_state
340                    .vtype()
341                    .ok_or(ExecutionError::IllegalInstruction {
342                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
343                    })?;
344                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
345                    ExecutionError::IllegalInstruction {
346                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
347                    },
348                )?;
349                zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
350                    program_counter,
351                    vs3,
352                    group_regs,
353                    nf,
354                )?;
355                // SAFETY:
356                // - `validate_segment_store_registers` guarantees `vs3 % group_regs == 0` and `vs3
357                //   + nf * group_regs <= 32`
358                // - `vl <= group_regs * VLENB / eew.bytes()`: same EMUL argument as `Vse`
359                // - vs3/v0 overlap: stores read vs3 as a source; no restriction
360                unsafe {
361                    zve64x_store_helpers::execute_unit_stride_store(
362                        ext_state,
363                        memory,
364                        vs3,
365                        vm,
366                        ext_state.vl(),
367                        ext_state.vstart(),
368                        regs.read(rs1).as_u64(),
369                        eew,
370                        group_regs,
371                        nf,
372                    )?;
373                }
374            }
375            // Strided segment store
376            Self::Vssseg {
377                vs3,
378                rs1,
379                rs2,
380                vm,
381                eew,
382                nf,
383            } => {
384                if !ext_state.vector_instructions_allowed() {
385                    Err(ExecutionError::IllegalInstruction {
386                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
387                    })?;
388                }
389                let vtype = ext_state
390                    .vtype()
391                    .ok_or(ExecutionError::IllegalInstruction {
392                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
393                    })?;
394                let group_regs = vtype.vlmul().data_register_count(eew, vtype.vsew()).ok_or(
395                    ExecutionError::IllegalInstruction {
396                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
397                    },
398                )?;
399                zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
400                    program_counter,
401                    vs3,
402                    group_regs,
403                    nf,
404                )?;
405                let stride = regs.read(rs2).as_u64().cast_signed();
406                // SAFETY: same as `Vsseg`.
407                unsafe {
408                    zve64x_store_helpers::execute_strided_store(
409                        ext_state,
410                        memory,
411                        vs3,
412                        vm,
413                        ext_state.vl(),
414                        ext_state.vstart(),
415                        regs.read(rs1).as_u64(),
416                        stride,
417                        eew,
418                        group_regs,
419                        nf,
420                    )?;
421                }
422            }
423            // Indexed-unordered segment store
424            Self::Vsuxseg {
425                vs3,
426                rs1,
427                vs2,
428                vm,
429                eew: index_eew,
430                nf,
431            } => {
432                if !ext_state.vector_instructions_allowed() {
433                    Err(ExecutionError::IllegalInstruction {
434                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
435                    })?;
436                }
437                let vtype = ext_state
438                    .vtype()
439                    .ok_or(ExecutionError::IllegalInstruction {
440                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
441                    })?;
442                let data_eew = vtype.vsew().as_eew();
443                let data_group_regs = vtype.vlmul().register_count();
444                let index_group_regs = vtype
445                    .vlmul()
446                    .index_register_count(index_eew, vtype.vsew())
447                    .ok_or(ExecutionError::IllegalInstruction {
448                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
449                    })?;
450                zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
451                    program_counter,
452                    vs3,
453                    data_group_regs,
454                    nf,
455                )?;
456                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
457                    program_counter,
458                    vs2,
459                    index_group_regs,
460                )?;
461                // SAFETY:
462                // - `validate_segment_store_registers` covers `vs3` alignment/bounds
463                // - `check_register_group_alignment` covers `vs2` alignment/bounds
464                // - `vl` bounded by both EMUL groups as in `Vsuxei`
465                // - vs3/v0 overlap: stores read vs3 as a source; no restriction
466                unsafe {
467                    zve64x_store_helpers::execute_indexed_store(
468                        ext_state,
469                        memory,
470                        vs3,
471                        vs2,
472                        vm,
473                        ext_state.vl(),
474                        u32::from(ext_state.vstart()),
475                        regs.read(rs1).as_u64(),
476                        data_eew,
477                        index_eew,
478                        data_group_regs,
479                        nf,
480                    )?;
481                }
482            }
483            // Indexed-ordered segment store. Sequential iteration satisfies the ordering
484            // requirement.
485            Self::Vsoxseg {
486                vs3,
487                rs1,
488                vs2,
489                vm,
490                eew: index_eew,
491                nf,
492            } => {
493                if !ext_state.vector_instructions_allowed() {
494                    Err(ExecutionError::IllegalInstruction {
495                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
496                    })?;
497                }
498                let vtype = ext_state
499                    .vtype()
500                    .ok_or(ExecutionError::IllegalInstruction {
501                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
502                    })?;
503                let data_eew = vtype.vsew().as_eew();
504                let data_group_regs = vtype.vlmul().register_count();
505                let index_group_regs = vtype
506                    .vlmul()
507                    .index_register_count(index_eew, vtype.vsew())
508                    .ok_or(ExecutionError::IllegalInstruction {
509                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
510                    })?;
511                zve64x_store_helpers::validate_segment_store_registers::<Reg, _, _, _>(
512                    program_counter,
513                    vs3,
514                    data_group_regs,
515                    nf,
516                )?;
517                zve64x_load_helpers::check_register_group_alignment::<Reg, _, _, _>(
518                    program_counter,
519                    vs2,
520                    index_group_regs,
521                )?;
522                // SAFETY: identical precondition argument to `Vsuxseg`
523                unsafe {
524                    zve64x_store_helpers::execute_indexed_store(
525                        ext_state,
526                        memory,
527                        vs3,
528                        vs2,
529                        vm,
530                        ext_state.vl(),
531                        u32::from(ext_state.vstart()),
532                        regs.read(rs1).as_u64(),
533                        data_eew,
534                        index_eew,
535                        data_group_regs,
536                        nf,
537                    )?;
538                }
539            }
540        }
541
542        Ok(ControlFlow::Continue(()))
543    }
544}