ab_riscv_interpreter/v/zvexx/arith/
zvexx_arith_helpers.rs

1//! Opaque helpers for ZveXx extension
2
3use crate::v::vector_registers::{VectorRegisterFile, VectorRegistersExt};
4use crate::v::zvexx::load::zvexx_load_helpers::{mask_bit, snapshot_mask};
5use crate::v::zvexx::zvexx_helpers::INSTRUCTION_SIZE;
6use crate::{ExecutionError, ProgramCounter};
7use ab_riscv_primitives::prelude::*;
8use core::fmt;
9use core::hint::cold_path;
10use core::num::NonZeroU8;
11
12/// Check that `vreg` (`vd`/`vs`) is aligned to `group_regs` and fits within `[0, 32)`
13#[inline(always)]
14#[doc(hidden)]
15#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
16pub fn check_vreg_group_alignment<Reg, Memory, PC, CustomError>(
17    program_counter: &PC,
18    vreg: VReg,
19    group_regs: NonZeroU8,
20) -> Result<(), ExecutionError<Reg::Type, CustomError>>
21where
22    Reg: Register,
23    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
24{
25    let group_regs = group_regs.get();
26    let vreg_idx = vreg.to_bits();
27    if !vreg_idx.is_multiple_of(group_regs) || vreg_idx + group_regs > 32 {
28        cold_path();
29        return Err(ExecutionError::IllegalInstruction {
30            address: program_counter.old_pc(INSTRUCTION_SIZE),
31        });
32    }
33    Ok(())
34}
35
36/// Check mask-destination / source overlap constraint for compare instructions.
37///
38/// Per RVV §11.8: a mask destination register may overlap a source register group only when
39/// the source group occupies a single register (LMUL ≤ 1, i.e. `group_regs == 1`). Otherwise
40/// the encoding is reserved and raises an illegal instruction.
41#[inline(always)]
42#[doc(hidden)]
43#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
44pub fn check_mask_dest_no_overlap<Reg, Memory, PC, CustomError>(
45    program_counter: &PC,
46    vd: VReg,
47    src_base: VReg,
48    group_regs: NonZeroU8,
49) -> Result<(), ExecutionError<Reg::Type, CustomError>>
50where
51    Reg: Register,
52    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
53{
54    let group_regs = group_regs.get();
55    if group_regs > 1 {
56        let vd_idx = vd.to_bits();
57        let src = src_base.to_bits();
58        if vd_idx >= src && vd_idx < src + group_regs {
59            cold_path();
60            return Err(ExecutionError::IllegalInstruction {
61                address: program_counter.old_pc(INSTRUCTION_SIZE),
62            });
63        }
64    }
65    Ok(())
66}
67
68/// Read a SEW-wide element from register group `[base_reg, base_reg + group_regs)` as `u64`.
69///
70/// Element `elem_i` occupies bytes at:
71///   - register `base_reg + elem_i / elems_per_reg`
72///   - byte offset `(elem_i % elems_per_reg) * sew_bytes`
73///
74/// The value is zero-extended to `u64`.
75///
76/// # Safety
77/// `base_reg + elem_i / (VLEN.bytes() / sew_bytes) < 32` must hold.
78#[inline(always)]
79#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
80pub(crate) unsafe fn read_element_u64<const VLEN: Vlen>(
81    vregs: &VectorRegisterFile<VLEN>,
82    base_reg: VReg,
83    elem_i: u16,
84    sew: Vsew,
85) -> u64 {
86    let sew_bytes = u32::from(sew.bytes_width());
87    let elems_per_reg = VLEN.bytes() / sew_bytes;
88    let reg_off = u32::from(elem_i) / elems_per_reg;
89    let byte_off = (u32::from(elem_i) % elems_per_reg) * sew_bytes;
90    // SAFETY: `base_reg + reg_off < 32` by caller's precondition
91    let reg = vregs
92        .get(unsafe { VReg::from_bits(base_reg.to_bits() + reg_off as u8).unwrap_unchecked() });
93    // SAFETY: `byte_off + sew_bytes <= VLEN.bytes()` because `byte_off` is at most
94    // `(elems_per_reg - 1) * sew_bytes = VLEN.bytes() - sew_bytes`
95    let src = unsafe { reg.get_unchecked(byte_off as usize..(byte_off + sew_bytes) as usize) };
96    let mut buf = [0u8; 8];
97    // SAFETY: `sew_bytes <= 8` for all `Vsew` variants
98    unsafe { buf.get_unchecked_mut(..sew_bytes as usize) }.copy_from_slice(src);
99    u64::from_le_bytes(buf)
100}
101
102/// Write a SEW-wide element (low `sew_bytes` of `value`) into register group
103/// `[base_reg, base_reg + group_regs)` at element index `elem_i`.
104///
105/// # Safety
106/// `base_reg + elem_i / (VLEN.bytes() / sew_bytes) < 32` must hold.
107#[inline(always)]
108#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
109pub(crate) unsafe fn write_element_u64<const VLEN: Vlen>(
110    vregs: &mut VectorRegisterFile<VLEN>,
111    base_reg: VReg,
112    elem_i: u16,
113    sew: Vsew,
114    value: u64,
115) {
116    let sew_bytes = u32::from(sew.bytes_width());
117    let elems_per_reg = VLEN.bytes() / sew_bytes;
118    let reg_off = u32::from(elem_i) / elems_per_reg;
119    let byte_off = (u32::from(elem_i) % elems_per_reg) * sew_bytes;
120    let buf = value.to_le_bytes();
121    // SAFETY: `base_reg + reg_off < 32` by caller's precondition
122    let reg = vregs
123        .get_mut(unsafe { VReg::from_bits(base_reg.to_bits() + reg_off as u8).unwrap_unchecked() });
124    // SAFETY: `byte_off + sew_bytes <= VLEN.bytes()` - same argument as `read_element_u64`.
125    // `sew_bytes <= 8` for all `Vsew` variants.
126    let dst = unsafe { reg.get_unchecked_mut(byte_off as usize..(byte_off + sew_bytes) as usize) };
127    // SAFETY: `sew_bytes <= 8` for all `Vsew` variants
128    dst.copy_from_slice(unsafe { buf.get_unchecked(..sew_bytes as usize) });
129}
130
131/// Write one mask bit (the comparison result for element `elem_i`) into register `vd`.
132///
133/// Bits are stored LSB-first: element `i` lives at byte `i / 8`, bit `i % 8`.
134/// Only the target bit is modified; all other bits are undisturbed (tail-undisturbed semantics
135/// required for mask destinations per spec §5.3).
136///
137/// # Safety
138/// `elem_i / 8 < VLEN.bytes()` must hold, i.e. `elem_i < VLEN`. This is guaranteed when
139/// `elem_i < vl <= VLMAX <= VLEN`.
140#[inline(always)]
141#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
142pub(in super::super) unsafe fn write_mask_bit<const VLEN: Vlen>(
143    vregs: &mut VectorRegisterFile<VLEN>,
144    vd: VReg,
145    elem_i: u16,
146    result: bool,
147) {
148    let byte_idx = usize::from(elem_i / u8::BITS as u16);
149    let bit_idx = elem_i % u8::BITS as u16;
150    // SAFETY: `byte_idx < VLEN.bytes()` by the caller's precondition
151    let byte = unsafe { vregs.get_mut(vd).get_unchecked_mut(byte_idx) };
152    if result {
153        *byte |= 1 << bit_idx;
154    } else {
155        *byte &= !(1 << bit_idx);
156    }
157}
158
159/// Operand source
160#[derive(Debug)]
161#[doc(hidden)]
162pub enum OpSrc {
163    /// Vector-vector: source register index
164    Vreg(VReg),
165    /// Vector-scalar: scalar value (sign- or zero-extended to u64)
166    Scalar(u64),
167}
168
169/// Execute a single-width element-wise arithmetic operation over `vstart..vl`.
170///
171/// `op` receives `(vs2_elem: u64, src_elem: u64, sew: Vsew)` and returns the `u64` result (only the
172/// low `sew.bits_width()` are written back).
173///
174/// # Safety
175/// - `vd.to_bits() % group_regs == 0` and `vd.to_bits() + group_regs <= 32` (verified by caller)
176/// - `src` register (when `OpSrc::Vreg`) satisfies the same alignment (verified by caller)
177/// - `vl <= group_regs * VLEN.bytes() / sew_bytes` (all `vl` elements fit within the register
178///   group)
179/// - When `vm=false`: `vd.to_bits() != 0` (vd does not overlap v0)
180#[inline(always)]
181#[doc(hidden)]
182// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
183pub unsafe fn execute_arith_op<Reg, ExtState, CustomError, F>(
184    ext_state: &mut ExtState,
185    vd: VReg,
186    vs2: VReg,
187    src: OpSrc,
188    vm: bool,
189    sew: Vsew,
190    op: F,
191) where
192    Reg: Register,
193    ExtState: VectorRegistersExt<Reg, CustomError>,
194    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
195    CustomError: fmt::Debug,
196    F: Fn(u64, u64, Vsew) -> u64,
197{
198    let vl = ext_state.vl();
199    let vstart = ext_state.vstart();
200    // SAFETY: `vl <= VLMAX <= VLEN`, so `vl.div_ceil(8) <= VLEN.bytes()`
201    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
202
203    for i in vstart.range_to(vl) {
204        if !mask_bit(&mask_buf, i) {
205            continue;
206        }
207
208        // SAFETY: `vs2 % group_regs == 0` and `i < vl <= group_regs * elems_per_reg`, so
209        // `vs2 + i / elems_per_reg < vs2 + group_regs <= 32`
210        let a = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
211
212        let b = match src {
213            OpSrc::Vreg(vs1_base) => {
214                // SAFETY: same argument as vs2
215                unsafe { read_element_u64(ext_state.read_vregs(), vs1_base, i, sew) }
216            }
217            OpSrc::Scalar(val) => val,
218        };
219
220        let result = op(a, b, sew);
221
222        // SAFETY: `vd % group_regs == 0` and `i < vl <= group_regs * elems_per_reg`, so
223        // `vd + i / elems_per_reg < vd + group_regs <= 32`
224        unsafe {
225            write_element_u64(ext_state.write_vregs(), vd, i, sew, result);
226        }
227    }
228
229    ext_state.mark_vs_dirty();
230    ext_state.reset_vstart();
231}
232
233/// Execute a single-width element-wise integer compare over `vstart..vl`, writing one result
234/// bit per element into the mask register `vd`.
235///
236/// `op` receives `(vs2_elem: u64, src_elem: u64, sew: Vsew) -> bool`.
237///
238/// Mask destination tail bits (indices `>= vl`) are always left undisturbed per spec §5.3,
239/// regardless of `vta`. Only bits in `vstart..vl` are written.
240///
241/// # Safety
242/// - `vs2.to_bits() % group_regs == 0` and `vs2.to_bits() + group_regs <= 32` (verified by caller)
243/// - `src` register (when `OpSrc::Vreg`) satisfies the same alignment (verified by caller)
244/// - `vl <= group_regs * VLEN.bytes() / sew_bytes`
245/// - `vl <= VLEN` (so every element index fits within the mask register)
246#[inline(always)]
247#[doc(hidden)]
248// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
249pub unsafe fn execute_compare_op<Reg, ExtState, CustomError, F>(
250    ext_state: &mut ExtState,
251    vd: VReg,
252    vs2: VReg,
253    src: OpSrc,
254    vm: bool,
255    sew: Vsew,
256    op: F,
257) where
258    Reg: Register,
259    ExtState: VectorRegistersExt<Reg, CustomError>,
260    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
261    CustomError: fmt::Debug,
262    F: Fn(u64, u64, Vsew) -> bool,
263{
264    let vl = ext_state.vl();
265    let vstart = ext_state.vstart();
266    // SAFETY: `vl <= VLEN`, so `vl.div_ceil(8) <= VLEN.bytes()`.
267    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
268
269    for i in vstart.range_to(vl) {
270        // When masked, inactive elements in the destination mask register are left undisturbed
271        // (spec §12.8: "mask register results follow mask-undisturbed policy")
272        if !mask_bit(&mask_buf, i) {
273            continue;
274        }
275
276        // SAFETY: same argument as in `execute_arith_op`
277        let a = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
278
279        let b = match src {
280            OpSrc::Vreg(vs1_base) => {
281                // SAFETY: same argument as vs2
282                unsafe { read_element_u64(ext_state.read_vregs(), vs1_base, i, sew) }
283            }
284            OpSrc::Scalar(val) => val,
285        };
286
287        let result = op(a, b, sew);
288
289        // SAFETY: `i < vl <= VLMAX <= VLEN`, so `i / 8 < VLEN / 8 = VLEN.bytes()`
290        unsafe {
291            write_mask_bit(ext_state.write_vregs(), vd, i, result);
292        }
293    }
294
295    ext_state.mark_vs_dirty();
296    ext_state.reset_vstart();
297}
298
299/// Sign-extend the low `sew.bits_width()` of `val` to a full `i64`
300#[inline(always)]
301#[doc(hidden)]
302#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
303pub fn sign_extend(val: u64, sew: Vsew) -> i64 {
304    let shift = u64::BITS - u32::from(sew.bits_width());
305    (val.cast_signed() << shift) >> shift
306}
307
308/// Mask off the upper bits of a `u64` to leave only the low `sew.bits_width()`.
309///
310/// Used for unsigned arithmetic and comparisons where only the SEW-wide portion is significant. For
311/// SEW = 64 this is a no-op (all bits are significant).
312#[inline(always)]
313#[doc(hidden)]
314#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
315pub fn sew_mask(sew: Vsew) -> u64 {
316    if u32::from(sew.bits_width()) == u64::BITS {
317        u64::MAX
318    } else {
319        (1u64 << sew.bits_width()) - 1
320    }
321}
ab_riscv_interpreter/v/zvexx/arith/zvexx_arith_helpers.rs

ab_riscv_interpreter/v/zvexx/arith/
zvexx_arith_helpers.rs