Skip to main content

ab_riscv_interpreter/v/zve64x/
store.rs

1//! Zve64x vector store instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zve64x_store_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::load::zve64x_load_helpers;
9use crate::v::zve64x::zve64x_helpers;
10use crate::{
11    ExecutableInstruction, ExecutionError, InterpreterState, ProgramCounter, VirtualMemory,
12};
13use ab_riscv_macros::instruction_execution;
14use ab_riscv_primitives::instructions::v::zve64x::store::Zve64xStoreInstruction;
15use ab_riscv_primitives::registers::general_purpose::{RegType, Register};
16use ab_riscv_primitives::registers::vector::VReg;
17use core::fmt;
18use core::ops::ControlFlow;
19
20#[instruction_execution]
21impl<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>
22    ExecutableInstruction<
23        InterpreterState<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>,
24        CustomError,
25    > for Zve64xStoreInstruction<Reg>
26where
27    Reg: Register,
28    [(); Reg::N]:,
29    ExtState: VectorRegistersExt<Reg, CustomError>,
30    [(); ExtState::ELEN as usize]:,
31    [(); ExtState::VLEN as usize]:,
32    [(); ExtState::VLENB as usize]:,
33    Memory: VirtualMemory,
34    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
35    CustomError: fmt::Debug,
36{
37    #[inline(always)]
38    fn execute(
39        self,
40        state: &mut InterpreterState<Reg, ExtState, Memory, PC, InstructionHandler, CustomError>,
41    ) -> Result<ControlFlow<()>, ExecutionError<Reg::Type, CustomError>> {
42        match self {
43            // Whole-register store: stores `nreg` consecutive registers starting at `vs3` directly
44            // to memory. `vs3` must be aligned to `nreg`. Ignores vtype, vl, vstart, masking.
45            Self::Vsr { vs3, rs1, nreg } => {
46                if !state.ext_state.vector_instructions_allowed() {
47                    Err(ExecutionError::IllegalInstruction {
48                        address: state
49                            .instruction_fetcher
50                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
51                    })?;
52                }
53                if u32::from(vs3.bits()) % u32::from(nreg) != 0 {
54                    Err(ExecutionError::IllegalInstruction {
55                        address: state
56                            .instruction_fetcher
57                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
58                    })?;
59                }
60                let base = state.regs.read(rs1).as_u64();
61                let vlenb = u64::from(ExtState::VLENB);
62                for reg_off in 0..u64::from(nreg) {
63                    let reg_idx = u64::from(vs3.bits()) + reg_off;
64                    // SAFETY: `reg_idx < 32` because the decoder guarantees `nreg` in {1,2,4,8}
65                    // and `vs3` is `nreg`-aligned (checked above), so
66                    // `vs3.bits() + nreg - 1 <= 31`.
67                    let src =
68                        unsafe { state.ext_state.read_vreg().get_unchecked(reg_idx as usize) };
69                    state
70                        .memory
71                        .write_slice(base + reg_off * vlenb, src)
72                        .map_err(ExecutionError::MemoryAccess)?;
73                }
74                state.ext_state.reset_vstart();
75            }
76            // Mask store: stores `ceil(vl / 8)` bytes from `vs3` to memory with no masking.
77            // Does not require a valid vtype: when vill is set vl is 0, so zero bytes are written.
78            Self::Vsm { vs3, rs1 } => {
79                if !state.ext_state.vector_instructions_allowed() {
80                    Err(ExecutionError::IllegalInstruction {
81                        address: state
82                            .instruction_fetcher
83                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
84                    })?;
85                }
86                let vl = state.ext_state.vl();
87                let byte_count = vl.div_ceil(u8::BITS) as usize;
88                if byte_count > 0 {
89                    let base = state.regs.read(rs1).as_u64();
90                    // SAFETY: `vs3.bits() < 32` is guaranteed by `VReg`.
91                    // `byte_count = vl.div_ceil(8) <= VLEN / 8 = VLENB` because `vl <= VLMAX <=
92                    // VLEN`, so `..byte_count` is in bounds within the
93                    // `VLENB`-byte source register.
94                    let src = unsafe {
95                        state
96                            .ext_state
97                            .read_vreg()
98                            .get_unchecked(usize::from(vs3.bits()))
99                            .get_unchecked(..byte_count)
100                    };
101                    state
102                        .memory
103                        .write_slice(base, src)
104                        .map_err(ExecutionError::MemoryAccess)?;
105                }
106                state.ext_state.reset_vstart();
107            }
108            // Unit-stride store.
109            //
110            // Source EMUL = EEW/SEW * LMUL, computed via `index_register_count`. This gives
111            // `group_regs` such that `VLMAX = group_regs * VLENB / eew.bytes()` matches the
112            // architectural `vl`.
113            Self::Vse { vs3, rs1, vm, eew } => {
114                if !state.ext_state.vector_instructions_allowed() {
115                    Err(ExecutionError::IllegalInstruction {
116                        address: state
117                            .instruction_fetcher
118                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
119                    })?;
120                }
121                let vtype = state
122                    .ext_state
123                    .vtype()
124                    .ok_or(ExecutionError::IllegalInstruction {
125                        address: state
126                            .instruction_fetcher
127                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
128                    })?;
129                let group_regs = vtype
130                    .vlmul()
131                    .index_register_count(eew, vtype.vsew())
132                    .ok_or(ExecutionError::IllegalInstruction {
133                        address: state
134                            .instruction_fetcher
135                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
136                    })?;
137                zve64x_load_helpers::check_register_group_alignment(state, vs3, group_regs)?;
138                if !vm && zve64x_load_helpers::groups_overlap(vs3, group_regs, VReg::V0, 1) {
139                    Err(ExecutionError::IllegalInstruction {
140                        address: state
141                            .instruction_fetcher
142                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
143                    })?;
144                }
145                // SAFETY:
146                // - alignment: `check_register_group_alignment` verified `vs3 % group_regs == 0`
147                //   and `vs3 + group_regs <= 32`
148                // - `vl <= group_regs * VLENB / eew.bytes()`: `group_regs` is the EMUL computed for
149                //   this `eew` and `vtype`, so this VLMAX equals the architectural VLMAX that
150                //   bounds `vl`
151                // - mask overlap: checked above via `groups_overlap`
152                unsafe {
153                    zve64x_store_helpers::execute_unit_stride_store(
154                        state,
155                        vs3,
156                        vm,
157                        state.ext_state.vl(),
158                        state.ext_state.vstart(),
159                        state.regs.read(rs1).as_u64(),
160                        eew,
161                        group_regs,
162                        1,
163                    )?;
164                }
165            }
166            // Strided store
167            Self::Vsse {
168                vs3,
169                rs1,
170                rs2,
171                vm,
172                eew,
173            } => {
174                if !state.ext_state.vector_instructions_allowed() {
175                    Err(ExecutionError::IllegalInstruction {
176                        address: state
177                            .instruction_fetcher
178                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
179                    })?;
180                }
181                let vtype = state
182                    .ext_state
183                    .vtype()
184                    .ok_or(ExecutionError::IllegalInstruction {
185                        address: state
186                            .instruction_fetcher
187                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
188                    })?;
189                let group_regs = vtype
190                    .vlmul()
191                    .index_register_count(eew, vtype.vsew())
192                    .ok_or(ExecutionError::IllegalInstruction {
193                        address: state
194                            .instruction_fetcher
195                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
196                    })?;
197                zve64x_load_helpers::check_register_group_alignment(state, vs3, group_regs)?;
198                if !vm && zve64x_load_helpers::groups_overlap(vs3, group_regs, VReg::V0, 1) {
199                    Err(ExecutionError::IllegalInstruction {
200                        address: state
201                            .instruction_fetcher
202                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
203                    })?;
204                }
205                let stride = state.regs.read(rs2).as_u64().cast_signed();
206                // SAFETY: same preconditions as `Vse`.
207                unsafe {
208                    zve64x_store_helpers::execute_strided_store(
209                        state,
210                        vs3,
211                        vm,
212                        state.ext_state.vl(),
213                        state.ext_state.vstart(),
214                        state.regs.read(rs1).as_u64(),
215                        stride,
216                        eew,
217                        group_regs,
218                        1,
219                    )?;
220                }
221            }
222            // Indexed-unordered store. Ordering between elements is not guaranteed.
223            Self::Vsuxei {
224                vs3,
225                rs1,
226                vs2,
227                vm,
228                eew: index_eew,
229            } => {
230                if !state.ext_state.vector_instructions_allowed() {
231                    Err(ExecutionError::IllegalInstruction {
232                        address: state
233                            .instruction_fetcher
234                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
235                    })?;
236                }
237                let vtype = state
238                    .ext_state
239                    .vtype()
240                    .ok_or(ExecutionError::IllegalInstruction {
241                        address: state
242                            .instruction_fetcher
243                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
244                    })?;
245                let data_eew = vtype.vsew().as_eew();
246                let data_group_regs = vtype.vlmul().register_count();
247                let index_group_regs = vtype
248                    .vlmul()
249                    .index_register_count(index_eew, vtype.vsew())
250                    .ok_or(ExecutionError::IllegalInstruction {
251                        address: state
252                            .instruction_fetcher
253                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
254                    })?;
255                zve64x_load_helpers::check_register_group_alignment(state, vs3, data_group_regs)?;
256                zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
257                if !vm && zve64x_load_helpers::groups_overlap(vs3, data_group_regs, VReg::V0, 1) {
258                    Err(ExecutionError::IllegalInstruction {
259                        address: state
260                            .instruction_fetcher
261                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
262                    })?;
263                }
264                // SAFETY:
265                // - `vs3` alignment/bounds: `check_register_group_alignment` verified both
266                // - `vs2` alignment/bounds: `check_register_group_alignment` verified both
267                // - `vl <= data_group_regs * VLENB / data_eew.bytes()`: `data_group_regs` is the
268                //   EMUL that bounds `vl`
269                // - `vl <= index_group_regs * VLENB / index_eew.bytes()`: `index_register_count`
270                //   returns the EMUL for the index group, which by the same argument bounds `vl`
271                // - mask overlap: checked above
272                unsafe {
273                    zve64x_store_helpers::execute_indexed_store(
274                        state,
275                        vs3,
276                        vs2,
277                        vm,
278                        state.ext_state.vl(),
279                        u32::from(state.ext_state.vstart()),
280                        state.regs.read(rs1).as_u64(),
281                        data_eew,
282                        index_eew,
283                        data_group_regs,
284                        1,
285                    )?;
286                }
287            }
288            // Indexed-ordered store. Elements must be written in element order.
289            // The ordering constraint is visible only to other harts/devices; the implementation
290            // here is already sequential, so no additional logic is needed.
291            Self::Vsoxei {
292                vs3,
293                rs1,
294                vs2,
295                vm,
296                eew: index_eew,
297            } => {
298                if !state.ext_state.vector_instructions_allowed() {
299                    Err(ExecutionError::IllegalInstruction {
300                        address: state
301                            .instruction_fetcher
302                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
303                    })?;
304                }
305                let vtype = state
306                    .ext_state
307                    .vtype()
308                    .ok_or(ExecutionError::IllegalInstruction {
309                        address: state
310                            .instruction_fetcher
311                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
312                    })?;
313                let data_eew = vtype.vsew().as_eew();
314                let data_group_regs = vtype.vlmul().register_count();
315                let index_group_regs = vtype
316                    .vlmul()
317                    .index_register_count(index_eew, vtype.vsew())
318                    .ok_or(ExecutionError::IllegalInstruction {
319                        address: state
320                            .instruction_fetcher
321                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
322                    })?;
323                zve64x_load_helpers::check_register_group_alignment(state, vs3, data_group_regs)?;
324                zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
325                if !vm && zve64x_load_helpers::groups_overlap(vs3, data_group_regs, VReg::V0, 1) {
326                    Err(ExecutionError::IllegalInstruction {
327                        address: state
328                            .instruction_fetcher
329                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
330                    })?;
331                }
332                // SAFETY: identical precondition argument to `Vsuxei`
333                unsafe {
334                    zve64x_store_helpers::execute_indexed_store(
335                        state,
336                        vs3,
337                        vs2,
338                        vm,
339                        state.ext_state.vl(),
340                        u32::from(state.ext_state.vstart()),
341                        state.regs.read(rs1).as_u64(),
342                        data_eew,
343                        index_eew,
344                        data_group_regs,
345                        1,
346                    )?;
347                }
348            }
349            // Unit-stride segment store: `nf` fields per element, stored contiguously
350            Self::Vsseg {
351                vs3,
352                rs1,
353                vm,
354                eew,
355                nf,
356            } => {
357                if !state.ext_state.vector_instructions_allowed() {
358                    Err(ExecutionError::IllegalInstruction {
359                        address: state
360                            .instruction_fetcher
361                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
362                    })?;
363                }
364                let vtype = state
365                    .ext_state
366                    .vtype()
367                    .ok_or(ExecutionError::IllegalInstruction {
368                        address: state
369                            .instruction_fetcher
370                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
371                    })?;
372                let group_regs = vtype
373                    .vlmul()
374                    .index_register_count(eew, vtype.vsew())
375                    .ok_or(ExecutionError::IllegalInstruction {
376                        address: state
377                            .instruction_fetcher
378                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
379                    })?;
380                zve64x_load_helpers::validate_segment_registers(state, vs3, vm, group_regs, nf)?;
381                // SAFETY:
382                // - `validate_segment_registers` guarantees `vs3 % group_regs == 0` and `vs3 + nf *
383                //   group_regs <= 32`
384                // - when `vm=false`, `validate_segment_registers` ensures `vs3 != 0`, so `vs3` does
385                //   not overlap `v0`
386                // - `vl <= group_regs * VLENB / eew.bytes()`: same EMUL argument as `Vse`
387                unsafe {
388                    zve64x_store_helpers::execute_unit_stride_store(
389                        state,
390                        vs3,
391                        vm,
392                        state.ext_state.vl(),
393                        state.ext_state.vstart(),
394                        state.regs.read(rs1).as_u64(),
395                        eew,
396                        group_regs,
397                        nf,
398                    )?;
399                }
400            }
401            // Strided segment store
402            Self::Vssseg {
403                vs3,
404                rs1,
405                rs2,
406                vm,
407                eew,
408                nf,
409            } => {
410                if !state.ext_state.vector_instructions_allowed() {
411                    Err(ExecutionError::IllegalInstruction {
412                        address: state
413                            .instruction_fetcher
414                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
415                    })?;
416                }
417                let vtype = state
418                    .ext_state
419                    .vtype()
420                    .ok_or(ExecutionError::IllegalInstruction {
421                        address: state
422                            .instruction_fetcher
423                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
424                    })?;
425                let group_regs = vtype
426                    .vlmul()
427                    .index_register_count(eew, vtype.vsew())
428                    .ok_or(ExecutionError::IllegalInstruction {
429                        address: state
430                            .instruction_fetcher
431                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
432                    })?;
433                zve64x_load_helpers::validate_segment_registers(state, vs3, vm, group_regs, nf)?;
434                let stride = state.regs.read(rs2).as_u64().cast_signed();
435                // SAFETY: same as `Vsseg`; `validate_segment_registers` covers alignment/bounds.
436                unsafe {
437                    zve64x_store_helpers::execute_strided_store(
438                        state,
439                        vs3,
440                        vm,
441                        state.ext_state.vl(),
442                        state.ext_state.vstart(),
443                        state.regs.read(rs1).as_u64(),
444                        stride,
445                        eew,
446                        group_regs,
447                        nf,
448                    )?;
449                }
450            }
451            // Indexed-unordered segment store
452            Self::Vsuxseg {
453                vs3,
454                rs1,
455                vs2,
456                vm,
457                eew: index_eew,
458                nf,
459            } => {
460                if !state.ext_state.vector_instructions_allowed() {
461                    Err(ExecutionError::IllegalInstruction {
462                        address: state
463                            .instruction_fetcher
464                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
465                    })?;
466                }
467                let vtype = state
468                    .ext_state
469                    .vtype()
470                    .ok_or(ExecutionError::IllegalInstruction {
471                        address: state
472                            .instruction_fetcher
473                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
474                    })?;
475                let data_eew = vtype.vsew().as_eew();
476                let data_group_regs = vtype.vlmul().register_count();
477                let index_group_regs = vtype
478                    .vlmul()
479                    .index_register_count(index_eew, vtype.vsew())
480                    .ok_or(ExecutionError::IllegalInstruction {
481                        address: state
482                            .instruction_fetcher
483                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
484                    })?;
485                zve64x_load_helpers::validate_segment_registers(
486                    state,
487                    vs3,
488                    vm,
489                    data_group_regs,
490                    nf,
491                )?;
492                zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
493                // SAFETY:
494                // - `validate_segment_registers` covers `vs3` alignment/bounds/mask-overlap
495                // - `check_register_group_alignment` covers `vs2` alignment/bounds
496                // - `vl` bounded by both EMUL groups as in `Vsuxei`
497                unsafe {
498                    zve64x_store_helpers::execute_indexed_store(
499                        state,
500                        vs3,
501                        vs2,
502                        vm,
503                        state.ext_state.vl(),
504                        u32::from(state.ext_state.vstart()),
505                        state.regs.read(rs1).as_u64(),
506                        data_eew,
507                        index_eew,
508                        data_group_regs,
509                        nf,
510                    )?;
511                }
512            }
513            // Indexed-ordered segment store. Sequential iteration satisfies the ordering
514            // requirement.
515            Self::Vsoxseg {
516                vs3,
517                rs1,
518                vs2,
519                vm,
520                eew: index_eew,
521                nf,
522            } => {
523                if !state.ext_state.vector_instructions_allowed() {
524                    Err(ExecutionError::IllegalInstruction {
525                        address: state
526                            .instruction_fetcher
527                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
528                    })?;
529                }
530                let vtype = state
531                    .ext_state
532                    .vtype()
533                    .ok_or(ExecutionError::IllegalInstruction {
534                        address: state
535                            .instruction_fetcher
536                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
537                    })?;
538                let data_eew = vtype.vsew().as_eew();
539                let data_group_regs = vtype.vlmul().register_count();
540                let index_group_regs = vtype
541                    .vlmul()
542                    .index_register_count(index_eew, vtype.vsew())
543                    .ok_or(ExecutionError::IllegalInstruction {
544                        address: state
545                            .instruction_fetcher
546                            .old_pc(zve64x_helpers::INSTRUCTION_SIZE),
547                    })?;
548                zve64x_load_helpers::validate_segment_registers(
549                    state,
550                    vs3,
551                    vm,
552                    data_group_regs,
553                    nf,
554                )?;
555                zve64x_load_helpers::check_register_group_alignment(state, vs2, index_group_regs)?;
556                // SAFETY: identical precondition argument to `Vsuxseg`
557                unsafe {
558                    zve64x_store_helpers::execute_indexed_store(
559                        state,
560                        vs3,
561                        vs2,
562                        vm,
563                        state.ext_state.vl(),
564                        u32::from(state.ext_state.vstart()),
565                        state.regs.read(rs1).as_u64(),
566                        data_eew,
567                        index_eew,
568                        data_group_regs,
569                        nf,
570                    )?;
571                }
572            }
573        }
574
575        Ok(ControlFlow::Continue(()))
576    }
577}