Skip to main content

ab_riscv_interpreter/zvbb/zvkb/
zvkb_helpers.rs

1//! Opaque helpers for Zvkb extension
2
3use crate::v::vector_registers::VectorRegistersExt;
4pub use crate::v::zvexx::arith::zvexx_arith_helpers::{OpSrc, check_vreg_group_alignment};
5use crate::v::zvexx::arith::zvexx_arith_helpers::{read_element_u64, sew_mask, write_element_u64};
6use crate::v::zvexx::load::zvexx_load_helpers::mask_bit;
7use ab_riscv_primitives::prelude::*;
8use core::fmt;
9
10/// Execute element-wise and-not over `vstart..vl`, writing SEW-wide results into `vd`.
11///
12/// For each active element i: `vd[i] = ~src[i] & vs2[i]`.
13///
14/// When `vm=true` all elements are active. When `vm=false` the mask register `v0` gates each
15/// element; masked-off elements are left undisturbed (undisturbed policy).
16///
17/// # Safety
18/// - `vd.to_bits() % group_regs == 0` and `vd.to_bits() + group_regs <= 32`
19/// - `vs2.to_bits() % group_regs == 0` and `vs2.to_bits() + group_regs <= 32`
20/// - `src` register (if `Vreg`) satisfies the same alignment as `vs2`
21/// - `vl <= group_regs * VLENB / sew_bytes`
22#[inline(always)]
23#[doc(hidden)]
24pub unsafe fn execute_vandn<Reg, ExtState, CustomError>(
25    ext_state: &mut ExtState,
26    vd: VReg,
27    vs2: VReg,
28    src: OpSrc,
29    sew: Vsew,
30    vm: bool,
31) where
32    Reg: Register,
33    ExtState: VectorRegistersExt<Reg, CustomError>,
34    [(); ExtState::ELEN as usize]:,
35    [(); ExtState::VLEN as usize]:,
36    [(); ExtState::VLENB as usize]:,
37    CustomError: fmt::Debug,
38{
39    let vl = ext_state.vl();
40    let vstart = ext_state.vstart();
41    for i in u32::from(vstart)..vl {
42        if !vm && !mask_bit(ext_state.read_vregs().get(VReg::V0), i) {
43            continue;
44        }
45        // SAFETY: `vs2 % group_regs == 0` and `vs2 + group_regs <= 32` (caller precondition);
46        // `i < vl <= group_regs * elems_per_reg`, so
47        // `vs2 + i / elems_per_reg < vs2 + group_regs <= 32`
48        let a = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
49        let b = match src {
50            OpSrc::Vreg(vs1_base) => {
51                // SAFETY: caller verified that the vs1 register group satisfies the same alignment
52                // constraint as vs2; the index argument is identical, so the same bound holds
53                unsafe { read_element_u64(ext_state.read_vregs(), vs1_base, i, sew) }
54            }
55            OpSrc::Scalar(val) => val,
56        };
57        // `a` is zero-extended to SEW bits by `read_element_u64`; `!b` may have high bits set, but
58        // AND with `a` (whose upper bits are zero) zeros them out naturally
59        let result = !b & a;
60        // SAFETY: `vd % group_regs == 0` and `vd + group_regs <= 32` (caller precondition);
61        // `i < vl <= group_regs * elems_per_reg`, so
62        // `vd + i / elems_per_reg < vd + group_regs <= 32`
63        unsafe {
64            write_element_u64(ext_state.write_vregs(), vd, i, sew, result);
65        }
66    }
67    ext_state.mark_vs_dirty();
68    ext_state.reset_vstart();
69}
70
71/// Execute element-wise bit-reversal within bytes over `vstart..vl`, writing results into `vd`.
72///
73/// For each active element i: the bits within each byte of `vs2[i]` are reversed. The byte order
74/// within the element is preserved; only the bit order within each byte changes.
75///
76/// When `vm=false`, masked-off elements are left undisturbed.
77///
78/// # Safety
79/// Same register-group constraints as [`execute_vandn`], minus the `src` constraint.
80#[inline(always)]
81#[doc(hidden)]
82pub unsafe fn execute_vbrev8<Reg, ExtState, CustomError>(
83    ext_state: &mut ExtState,
84    vd: VReg,
85    vs2: VReg,
86    sew: Vsew,
87    vm: bool,
88) where
89    Reg: Register,
90    ExtState: VectorRegistersExt<Reg, CustomError>,
91    [(); ExtState::ELEN as usize]:,
92    [(); ExtState::VLEN as usize]:,
93    [(); ExtState::VLENB as usize]:,
94    CustomError: fmt::Debug,
95{
96    let vl = ext_state.vl();
97    let vstart = ext_state.vstart();
98    let sew_bytes = u32::from(sew.bytes_width());
99    for i in u32::from(vstart)..vl {
100        if !vm && !mask_bit(ext_state.read_vregs().get(VReg::V0), i) {
101            continue;
102        }
103        // SAFETY: `vs2 % group_regs == 0` and `vs2 + group_regs <= 32`; `i < vl`
104        let elem = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
105        // Decompose into bytes (LE = index 0 is least-significant), reverse bits within each active
106        // byte, then reassemble; bytes beyond sew_bytes are already zero because `read_element_u64`
107        // zero-extends to u64
108        let mut bytes = elem.to_le_bytes();
109        for byte in &mut bytes[..sew_bytes as usize] {
110            *byte = byte.reverse_bits();
111        }
112        let result = u64::from_le_bytes(bytes);
113        // SAFETY: `vd % group_regs == 0` and `vd + group_regs <= 32`; `i < vl`
114        unsafe {
115            write_element_u64(ext_state.write_vregs(), vd, i, sew, result);
116        }
117    }
118    ext_state.mark_vs_dirty();
119    ext_state.reset_vstart();
120}
121
122/// Execute element-wise byte reversal over `vstart..vl`, writing results into `vd`.
123///
124/// For each active element i: the bytes within `vs2[i]` are reversed.
125///
126/// When `vm=false`, masked-off elements are left undisturbed.
127///
128/// # Safety
129/// Same register-group constraints as [`execute_vandn`], minus the `src` constraint.
130#[inline(always)]
131#[doc(hidden)]
132pub unsafe fn execute_vrev8<Reg, ExtState, CustomError>(
133    ext_state: &mut ExtState,
134    vd: VReg,
135    vs2: VReg,
136    sew: Vsew,
137    vm: bool,
138) where
139    Reg: Register,
140    ExtState: VectorRegistersExt<Reg, CustomError>,
141    [(); ExtState::ELEN as usize]:,
142    [(); ExtState::VLEN as usize]:,
143    [(); ExtState::VLENB as usize]:,
144    CustomError: fmt::Debug,
145{
146    let vl = ext_state.vl();
147    let vstart = ext_state.vstart();
148    let sew_bytes = u32::from(sew.bytes_width());
149    for i in u32::from(vstart)..vl {
150        if !vm && !mask_bit(ext_state.read_vregs().get(VReg::V0), i) {
151            continue;
152        }
153        // SAFETY: `vs2 % group_regs == 0` and `vs2 + group_regs <= 32`; `i < vl`
154        let elem = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
155        // Reverse the byte slice covering exactly the SEW-wide element; bytes beyond sew_bytes are
156        // zero (from zero-extension) and are left untouched
157        let mut bytes = elem.to_le_bytes();
158        bytes[..sew_bytes as usize].reverse();
159        let result = u64::from_le_bytes(bytes);
160        // SAFETY: `vd % group_regs == 0` and `vd + group_regs <= 32`; `i < vl`
161        unsafe {
162            write_element_u64(ext_state.write_vregs(), vd, i, sew, result);
163        }
164    }
165    ext_state.mark_vs_dirty();
166    ext_state.reset_vstart();
167}
168
169/// Execute element-wise rotate-left over `vstart..vl`, writing SEW-wide results into `vd`.
170///
171/// For each active element i: `vd[i] = rotate_left(vs2[i], src[i] % SEW)`.
172///
173/// When `vm=false`, masked-off elements are left undisturbed.
174///
175/// # Safety
176/// Same register-group constraints as [`execute_vandn`].
177#[inline(always)]
178#[doc(hidden)]
179pub unsafe fn execute_vrol<Reg, ExtState, CustomError>(
180    ext_state: &mut ExtState,
181    vd: VReg,
182    vs2: VReg,
183    src: OpSrc,
184    sew: Vsew,
185    vm: bool,
186) where
187    Reg: Register,
188    ExtState: VectorRegistersExt<Reg, CustomError>,
189    [(); ExtState::ELEN as usize]:,
190    [(); ExtState::VLEN as usize]:,
191    [(); ExtState::VLENB as usize]:,
192    CustomError: fmt::Debug,
193{
194    let vl = ext_state.vl();
195    let vstart = ext_state.vstart();
196    let sew_bits = u64::from(sew.bits_width());
197    let mask = sew_mask(sew);
198    for i in u32::from(vstart)..vl {
199        if !vm && !mask_bit(ext_state.read_vregs().get(VReg::V0), i) {
200            continue;
201        }
202        // SAFETY: `vs2 % group_regs == 0` and `vs2 + group_regs <= 32`; `i < vl`
203        let a = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
204        let amount = match src {
205            OpSrc::Vreg(vs1_base) => {
206                // SAFETY: same alignment constraint as vs2; same index bound
207                unsafe { read_element_u64(ext_state.read_vregs(), vs1_base, i, sew) }
208            }
209            OpSrc::Scalar(val) => val,
210        };
211        // `shift < sew_bits`, so `a << shift` never shifts by >= 64 and is safe.
212        // When shift == 0, `sew_bits - shift` == sew_bits; `unbounded_shr` defines
213        // shifts >= bit-width as 0, which is correct: a zero rotation contributes no low bits.
214        let shift = (amount % sew_bits) as u32;
215        let hi = (a << shift) & mask;
216        let lo = a.unbounded_shr(sew_bits as u32 - shift);
217        let result = hi | lo;
218        // SAFETY: `vd % group_regs == 0` and `vd + group_regs <= 32`; `i < vl`
219        unsafe {
220            write_element_u64(ext_state.write_vregs(), vd, i, sew, result);
221        }
222    }
223    ext_state.mark_vs_dirty();
224    ext_state.reset_vstart();
225}
226
227/// Execute element-wise rotate-right over `vstart..vl`, writing SEW-wide results into `vd`.
228///
229/// For each active element i: `vd[i] = rotate_right(vs2[i], src[i] % SEW)`.
230///
231/// Pass `vm=true` for `vror.vi` (bit[25] is consumed as imm[5]; no mask bit exists).
232///
233/// When `vm=false`, masked-off elements are left undisturbed.
234///
235/// # Safety
236/// Same register-group constraints as [`execute_vandn`].
237#[inline(always)]
238#[doc(hidden)]
239pub unsafe fn execute_vror<Reg, ExtState, CustomError>(
240    ext_state: &mut ExtState,
241    vd: VReg,
242    vs2: VReg,
243    src: OpSrc,
244    sew: Vsew,
245    vm: bool,
246) where
247    Reg: Register,
248    ExtState: VectorRegistersExt<Reg, CustomError>,
249    [(); ExtState::ELEN as usize]:,
250    [(); ExtState::VLEN as usize]:,
251    [(); ExtState::VLENB as usize]:,
252    CustomError: fmt::Debug,
253{
254    let vl = ext_state.vl();
255    let vstart = ext_state.vstart();
256    let sew_bits = u64::from(sew.bits_width());
257    let mask = sew_mask(sew);
258    for i in u32::from(vstart)..vl {
259        if !vm && !mask_bit(ext_state.read_vregs().get(VReg::V0), i) {
260            continue;
261        }
262        // SAFETY: `vs2 % group_regs == 0` and `vs2 + group_regs <= 32`; `i < vl`
263        let a = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
264        let amount = match src {
265            OpSrc::Vreg(vs1_base) => {
266                // SAFETY: same alignment constraint as vs2; same index bound
267                unsafe { read_element_u64(ext_state.read_vregs(), vs1_base, i, sew) }
268            }
269            OpSrc::Scalar(val) => val,
270        };
271        // `shift < sew_bits`, so `a >> shift` never shifts by >= 64 and is safe.
272        // When shift == 0, `sew_bits - shift` == sew_bits; `unbounded_shl` defines
273        // shifts >= bit-width as 0, which is correct: a zero rotation contributes no high bits.
274        let shift = (amount % sew_bits) as u32;
275        let lo = a >> shift;
276        let hi = a.unbounded_shl(sew_bits as u32 - shift) & mask;
277        let result = lo | hi;
278        // SAFETY: `vd % group_regs == 0` and `vd + group_regs <= 32`; `i < vl`
279        unsafe {
280            write_element_u64(ext_state.write_vregs(), vd, i, sew, result);
281        }
282    }
283    ext_state.mark_vs_dirty();
284    ext_state.reset_vstart();
285}