ab_riscv_interpreter/v/zvexx/muldiv/
zvexx_muldiv_helpers.rs

1//! Opaque helpers for ZveXx extension
2
3use crate::v::vector_registers::{VectorRegisterFile, VectorRegistersExt};
4pub use crate::v::zvexx::arith::zvexx_arith_helpers::{
5    OpSrc, check_vreg_group_alignment, sew_mask, sign_extend,
6};
7use crate::v::zvexx::arith::zvexx_arith_helpers::{read_element_u64, write_element_u64};
8use crate::v::zvexx::fixed_point::zvexx_fixed_point_helpers::read_wide_element_u64;
9use crate::v::zvexx::load::zvexx_load_helpers::{mask_bit, snapshot_mask};
10use crate::v::zvexx::zvexx_helpers::INSTRUCTION_SIZE;
11use crate::{ExecutionError, ProgramCounter};
12use ab_riscv_primitives::prelude::*;
13use core::fmt;
14use core::hint::cold_path;
15use core::num::NonZeroU8;
16
17/// Compute the destination register count for a widening operation (`EMUL = 2 × LMUL`).
18///
19/// Returns `None` when the resulting EMUL falls outside the legal range `[1/8, 8]`, i.e. when
20/// `LMUL` is already `M8` (EMUL would be 16) or the caller asks for a multiplication factor that
21/// pushes the fraction past the legal lower bound.
22///
23/// The register count returned is `max(1, EMUL)`: fractional EMUL values (1/2, 1/4) still occupy
24/// exactly one physical register.
25#[inline(always)]
26#[doc(hidden)]
27#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
28pub fn widening_dest_register_count(vlmul: Vlmul) -> Option<NonZeroU8> {
29    let (lmul_num, lmul_den) = vlmul.as_fraction();
30    // EMUL = 2 × LMUL = (2 * lmul_num) / lmul_den
31    let Some(emul_num) = 2u8.checked_mul(lmul_num.get()) else {
32        cold_path();
33        return None;
34    };
35    let emul_den = lmul_den.get();
36    // Reduce the fraction by GCD (both are powers of two so min works as GCD)
37    let g = emul_num.min(emul_den);
38    let (n, d) = (emul_num / g, emul_den / g);
39    // Legal EMUL fractions: 1/8, 1/4, 1/2, 1, 2, 4, 8
40    let legal = matches!(
41        (n, d),
42        (1, 8) | (1, 4) | (1, 2) | (1, 1) | (2, 1) | (4, 1) | (8, 1)
43    );
44    if !legal {
45        cold_path();
46        return None;
47    }
48    // Register count: max(1, n/d) = n when d==1, else 1
49    Some(NonZeroU8::new(if d > 1 { 1 } else { n }).expect("Not zero; qed"))
50}
51
52/// Check that a narrower source register group does not *illegally* overlap the wider destination
53/// group of a widening instruction.
54///
55/// For widening instructions `vd` occupies `dest_group_regs` registers (which is
56/// [`widening_dest_register_count()`] of the source LMUL); `vs` occupies `src_group_regs`.
57///
58/// Per the RISC-V "V" spec §5.2, because the destination EEW (`2*SEW`) is greater than the source
59/// EEW (`SEW`), the source group *may* overlap the destination group, but only when both of the
60/// following hold:
61/// - the source EMUL is at least 1, and
62/// - the overlap is in the highest-numbered part of the destination register group, i.e. the source
63///   occupies exactly the top `src_group_regs` registers of the destination group.
64///
65/// When the source EMUL is at least 1, `dest_group_regs == 2 * src_group_regs`, so
66/// `dest_group_regs > src_group_regs` is an equivalent test for "source EMUL >= 1": a fractional
67/// source EMUL (`< 1`) yields `dest_group_regs == src_group_regs == 1`, in which case no overlap is
68/// ever legal. Any overlap that is not the legal "source in the highest-numbered part" form is
69/// rejected as an illegal instruction.
70#[inline(always)]
71#[doc(hidden)]
72#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
73pub fn check_no_widening_overlap<Reg, Memory, PC, CustomError>(
74    program_counter: &PC,
75    vd: VReg,
76    vs: VReg,
77    dest_group_regs: NonZeroU8,
78    src_group_regs: NonZeroU8,
79) -> Result<(), ExecutionError<Reg::Type, CustomError>>
80where
81    Reg: Register,
82    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
83{
84    let dest_group_regs = dest_group_regs.get();
85    let src_group_regs = src_group_regs.get();
86    let vd_start = vd.to_bits();
87    let vd_end = vd_start + dest_group_regs;
88    let vs_start = vs.to_bits();
89    let vs_end = vs_start + src_group_regs;
90    // Disjoint register groups are always fine
91    if vs_start >= vd_end || vd_start >= vs_end {
92        return Ok(());
93    }
94    // The groups overlap. This is legal only when the source EMUL is at least 1
95    // (`dest_group_regs > src_group_regs`) and the source occupies exactly the highest-numbered
96    // part of the destination group (`vs_start == vd_end - src_group_regs`).
97    if dest_group_regs > src_group_regs && vs_start == vd_end - src_group_regs {
98        return Ok(());
99    }
100
101    cold_path();
102    Err(ExecutionError::IllegalInstruction {
103        address: program_counter.old_pc(INSTRUCTION_SIZE),
104    })
105}
106
107/// Write a 2*SEW-wide element into the widened destination register group at element index
108/// `elem_i`.
109///
110/// # Safety
111/// `base_reg + elem_i / (VLEN.bytes() / (2*sew_bytes)) < 32` must hold.
112#[inline(always)]
113#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
114unsafe fn write_wide_element_u64<const VLEN: Vlen>(
115    vregs: &mut VectorRegisterFile<VLEN>,
116    base_reg: VReg,
117    elem_i: u16,
118    sew: Vsew,
119    value: u64,
120) {
121    let wide_bytes = u32::from(sew.bytes_width()) * 2;
122    let elems_per_reg = VLEN.bytes() / wide_bytes;
123    let reg_off = u32::from(elem_i) / elems_per_reg;
124    let byte_off = (u32::from(elem_i) % elems_per_reg) * wide_bytes;
125    let buf = value.to_le_bytes();
126    // SAFETY: `base_reg + reg_off < 32` by caller's precondition
127    let reg = unsafe {
128        vregs.get_mut(VReg::from_bits(base_reg.to_bits() + reg_off as u8).unwrap_unchecked())
129    };
130    // SAFETY: `byte_off + wide_bytes <= VLEN.bytes()`; `wide_bytes <= 8` for SEW < 64
131    let dst = unsafe { reg.get_unchecked_mut(byte_off as usize..(byte_off + wide_bytes) as usize) };
132    // SAFETY: `wide_bytes <= 8` because SEW < 64 is enforced before widening ops are called
133    dst.copy_from_slice(unsafe { buf.get_unchecked(..wide_bytes as usize) });
134}
135
136/// Execute a single-width element-wise arithmetic operation over `vstart..vl`.
137///
138/// `op` receives `(vs2_elem: u64, src_elem: u64, sew: Vsew)` and returns the `u64` result.
139/// Only the low `sew.bytes()` of the result are written back.
140///
141/// # Safety
142/// - `vd` and source register alignment verified by caller
143/// - `vl <= group_regs * VLEN.bytes() / sew_bytes`
144/// - When `vm=false`: `vd.to_bits() != 0`
145#[inline(always)]
146#[doc(hidden)]
147// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
148pub unsafe fn execute_arith_op<Reg, ExtState, CustomError, F>(
149    ext_state: &mut ExtState,
150    vd: VReg,
151    vs2: VReg,
152    src: OpSrc,
153    vm: bool,
154    sew: Vsew,
155    op: F,
156) where
157    Reg: Register,
158    ExtState: VectorRegistersExt<Reg, CustomError>,
159    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
160    CustomError: fmt::Debug,
161    F: Fn(u64, u64, Vsew) -> u64,
162{
163    let vl = ext_state.vl();
164    let vstart = ext_state.vstart();
165    // SAFETY: `vl <= VLMAX <= VLEN`
166    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
167    for i in vstart.range_to(vl) {
168        if !mask_bit(&mask_buf, i) {
169            continue;
170        }
171        // SAFETY: register bounds verified by caller
172        let a = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
173        let b = match src {
174            // SAFETY: register bounds verified by caller
175            OpSrc::Vreg(vs1_base) => unsafe {
176                read_element_u64(ext_state.read_vregs(), vs1_base, i, sew)
177            },
178            OpSrc::Scalar(val) => val,
179        };
180        let result = op(a, b, sew);
181        // SAFETY: register bounds verified by caller
182        unsafe {
183            write_element_u64(ext_state.write_vregs(), vd, i, sew, result);
184        }
185    }
186    ext_state.mark_vs_dirty();
187    ext_state.reset_vstart();
188}
189
190/// Execute a single-width widening operation over `vstart..vl`.
191///
192/// Reads SEW-wide elements from `vs2` and `src`, computes `op`, and writes a 2*SEW-wide result
193/// into `vd`.
194///
195/// # Safety
196/// - `vd` uses `dest_group_regs` registers (result of `widening_dest_register_count()`); alignment
197///   and non-overlap verified by caller
198/// - `vl <= src_group_regs * VLEN.bytes() / sew_bytes`
199/// - SEW < 64 verified by caller (so 2*SEW <= 64 and fits in u64)
200/// - When `vm=false`: `vd.to_bits() != 0`
201#[inline(always)]
202#[doc(hidden)]
203// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
204pub unsafe fn execute_widening_op<Reg, ExtState, CustomError, F>(
205    ext_state: &mut ExtState,
206    vd: VReg,
207    vs2: VReg,
208    src: OpSrc,
209    vm: bool,
210    sew: Vsew,
211    op: F,
212) where
213    Reg: Register,
214    ExtState: VectorRegistersExt<Reg, CustomError>,
215    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
216    CustomError: fmt::Debug,
217    F: Fn(u64, u64, Vsew) -> u64,
218{
219    let vl = ext_state.vl();
220    let vstart = ext_state.vstart();
221    // SAFETY: `vl <= VLMAX <= VLEN`
222    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
223    for i in vstart.range_to(vl) {
224        if !mask_bit(&mask_buf, i) {
225            continue;
226        }
227        // SAFETY: register bounds verified by caller
228        let a = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
229        let b = match src {
230            // SAFETY: register bounds verified by caller
231            OpSrc::Vreg(vs1_base) => unsafe {
232                read_element_u64(ext_state.read_vregs(), vs1_base, i, sew)
233            },
234            OpSrc::Scalar(val) => val,
235        };
236        let result = op(a, b, sew);
237        // SAFETY: vd has dest_group_regs registers; element `i` fits within them because
238        // `vl <= src_group_regs * VLEN.bytes() / sew_bytes` and dest stores at 2*SEW width so
239        // `i < dest_group_regs * VLEN.bytes() / (2*sew_bytes)`
240        unsafe {
241            write_wide_element_u64(ext_state.write_vregs(), vd, i, sew, result);
242        }
243    }
244    ext_state.mark_vs_dirty();
245    ext_state.reset_vstart();
246}
247
248/// Execute a single-width multiply-add where the first multiplier is a vector register group.
249///
250/// `op` receives `(acc: u64, a: u64, b: u64, sew: Vsew)` where `acc` is the current `vd[i]`,
251/// `a` is the element from `a_reg`, and `b` is the element from `src`. Returns the new `vd[i]`.
252///
253/// # Safety
254/// - `vd`, `a_reg`, and `src` register alignment verified by caller
255/// - `vl <= group_regs * VLEN.bytes() / sew_bytes`
256/// - When `vm=false`: `vd.to_bits() != 0`
257#[inline(always)]
258#[doc(hidden)]
259// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
260pub unsafe fn execute_muladd_op<Reg, ExtState, CustomError, F>(
261    ext_state: &mut ExtState,
262    vd: VReg,
263    a_reg: VReg,
264    src: OpSrc,
265    vm: bool,
266    sew: Vsew,
267    op: F,
268) where
269    Reg: Register,
270    ExtState: VectorRegistersExt<Reg, CustomError>,
271    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
272    CustomError: fmt::Debug,
273    F: Fn(u64, u64, u64, Vsew) -> u64,
274{
275    let vl = ext_state.vl();
276    let vstart = ext_state.vstart();
277    // SAFETY: `vl <= VLMAX <= VLEN`
278    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
279    for i in vstart.range_to(vl) {
280        if !mask_bit(&mask_buf, i) {
281            continue;
282        }
283        // SAFETY: register bounds verified by caller
284        let acc = unsafe { read_element_u64(ext_state.read_vregs(), vd, i, sew) };
285        // SAFETY: register bounds verified by caller
286        let a = unsafe { read_element_u64(ext_state.read_vregs(), a_reg, i, sew) };
287        let b = match src {
288            // SAFETY: register bounds verified by caller
289            OpSrc::Vreg(b_reg) => unsafe {
290                read_element_u64(ext_state.read_vregs(), b_reg, i, sew)
291            },
292            OpSrc::Scalar(val) => val,
293        };
294        let result = op(acc, a, b, sew);
295        // SAFETY: register bounds verified by caller
296        unsafe {
297            write_element_u64(ext_state.write_vregs(), vd, i, sew, result);
298        }
299    }
300    ext_state.mark_vs_dirty();
301    ext_state.reset_vstart();
302}
303
304/// Execute a single-width multiply-add where the first multiplier is a scalar.
305///
306/// Analogous to [`execute_muladd_op`] but `a` is a fixed scalar instead of a register element.
307///
308/// # Safety
309/// Same as [`execute_muladd_op`], minus constraints on `a_reg`.
310#[inline(always)]
311#[doc(hidden)]
312// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
313pub unsafe fn execute_muladd_scalar_op<Reg, ExtState, CustomError, F>(
314    ext_state: &mut ExtState,
315    vd: VReg,
316    scalar: u64,
317    src: OpSrc,
318    vm: bool,
319    sew: Vsew,
320    op: F,
321) where
322    Reg: Register,
323    ExtState: VectorRegistersExt<Reg, CustomError>,
324    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
325    CustomError: fmt::Debug,
326    F: Fn(u64, u64, u64, Vsew) -> u64,
327{
328    let vl = ext_state.vl();
329    let vstart = ext_state.vstart();
330    // SAFETY: `vl <= VLMAX <= VLEN`
331    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
332    for i in vstart.range_to(vl) {
333        if !mask_bit(&mask_buf, i) {
334            continue;
335        }
336        // SAFETY: register bounds verified by caller
337        let acc = unsafe { read_element_u64(ext_state.read_vregs(), vd, i, sew) };
338        let b = match src {
339            // SAFETY: register bounds verified by caller
340            OpSrc::Vreg(b_reg) => unsafe {
341                read_element_u64(ext_state.read_vregs(), b_reg, i, sew)
342            },
343            OpSrc::Scalar(val) => val,
344        };
345        let result = op(acc, scalar, b, sew);
346        // SAFETY: register bounds verified by caller
347        unsafe {
348            write_element_u64(ext_state.write_vregs(), vd, i, sew, result);
349        }
350    }
351    ext_state.mark_vs_dirty();
352    ext_state.reset_vstart();
353}
354
355/// Execute a widening multiply-add where the first multiplier is a vector register group.
356///
357/// Reads SEW-wide `acc` from the widened `vd` group, SEW-wide `a` from `a_reg`, and SEW-wide
358/// `b` from `src`. Writes a 2*SEW-wide result back into `vd`.
359///
360/// `op` receives `(acc: u64, a: u64, b: u64, sew: Vsew)`.
361///
362/// # Safety
363/// - `vd` uses `dest_group_regs` registers (result of `widening_dest_register_count()`); alignment
364///   and non-overlap verified by caller
365/// - SEW < 64 verified by caller
366/// - When `vm=false`: `vd.to_bits() != 0`
367#[inline(always)]
368#[doc(hidden)]
369// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
370pub unsafe fn execute_widening_muladd_op<Reg, ExtState, CustomError, F>(
371    ext_state: &mut ExtState,
372    vd: VReg,
373    a_reg: VReg,
374    src: OpSrc,
375    vm: bool,
376    sew: Vsew,
377    op: F,
378) where
379    Reg: Register,
380    ExtState: VectorRegistersExt<Reg, CustomError>,
381    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
382    CustomError: fmt::Debug,
383    F: Fn(u64, u64, u64, Vsew) -> u64,
384{
385    let vl = ext_state.vl();
386    let vstart = ext_state.vstart();
387    // SAFETY: `vl <= VLMAX <= VLEN`
388    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
389    for i in vstart.range_to(vl) {
390        if !mask_bit(&mask_buf, i) {
391            continue;
392        }
393        // Read the existing 2*SEW accumulator from vd
394        // SAFETY: vd has dest_group_regs registers; element `i` fits within them (see
395        // `execute_widening_op` for the bound argument)
396        let acc = unsafe { read_wide_element_u64(ext_state.read_vregs(), vd, i, sew) };
397        // SAFETY: register bounds verified by caller
398        let a = unsafe { read_element_u64(ext_state.read_vregs(), a_reg, i, sew) };
399        let b = match src {
400            // SAFETY: register bounds verified by caller
401            OpSrc::Vreg(b_reg) => unsafe {
402                read_element_u64(ext_state.read_vregs(), b_reg, i, sew)
403            },
404            OpSrc::Scalar(val) => val,
405        };
406        let result = op(acc, a, b, sew);
407        // SAFETY: same as acc read above
408        unsafe {
409            write_wide_element_u64(ext_state.write_vregs(), vd, i, sew, result);
410        }
411    }
412    ext_state.mark_vs_dirty();
413    ext_state.reset_vstart();
414}
415
416/// Execute a widening multiply-add where the first multiplier is a scalar.
417///
418/// Analogous to [`execute_widening_muladd_op`] but `a` is a fixed scalar.
419///
420/// # Safety
421/// Same as [`execute_widening_muladd_op`], minus constraints on `a_reg`.
422#[inline(always)]
423#[doc(hidden)]
424// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
425pub unsafe fn execute_widening_muladd_scalar_op<Reg, ExtState, CustomError, F>(
426    ext_state: &mut ExtState,
427    vd: VReg,
428    scalar: u64,
429    src: OpSrc,
430    vm: bool,
431    sew: Vsew,
432    op: F,
433) where
434    Reg: Register,
435    ExtState: VectorRegistersExt<Reg, CustomError>,
436    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
437    CustomError: fmt::Debug,
438    F: Fn(u64, u64, u64, Vsew) -> u64,
439{
440    let vl = ext_state.vl();
441    let vstart = ext_state.vstart();
442    // SAFETY: `vl <= VLMAX <= VLEN`
443    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
444    for i in vstart.range_to(vl) {
445        if !mask_bit(&mask_buf, i) {
446            continue;
447        }
448        // SAFETY: vd has dest_group_regs registers; element `i` fits within them (see
449        // `execute_widening_op` for the bound argument)
450        let acc = unsafe { read_wide_element_u64(ext_state.read_vregs(), vd, i, sew) };
451        let b = match src {
452            // SAFETY: register bounds verified by caller
453            OpSrc::Vreg(b_reg) => unsafe {
454                read_element_u64(ext_state.read_vregs(), b_reg, i, sew)
455            },
456            OpSrc::Scalar(val) => val,
457        };
458        let result = op(acc, scalar, b, sew);
459        // SAFETY: same as acc read above
460        unsafe {
461            write_wide_element_u64(ext_state.write_vregs(), vd, i, sew, result);
462        }
463    }
464    ext_state.mark_vs_dirty();
465    ext_state.reset_vstart();
466}
467
468/// Signed × signed high half.
469///
470/// Both operands are sign-extended to i64, multiplied as i128, and the upper SEW bits of the
471/// 2*SEW product are returned (zero-extended to u64 for writeback into a SEW-wide element slot).
472#[inline(always)]
473#[doc(hidden)]
474#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
475pub fn mulh_ss(a: u64, b: u64, sew: Vsew) -> u64 {
476    let sa = i128::from(sign_extend(a, sew));
477    let sb = i128::from(sign_extend(b, sew));
478    let product = sa.wrapping_mul(sb);
479    // Extract bits [2*SEW-1 : SEW] of the product
480    let high = (product >> u32::from(sew.bits_width())).cast_unsigned() as u64;
481    high & sew_mask(sew)
482}
483
484/// Unsigned × unsigned high half
485#[inline(always)]
486#[doc(hidden)]
487#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
488pub fn mulhu_uu(a: u64, b: u64, sew: Vsew) -> u64 {
489    let ua = u128::from(a & sew_mask(sew));
490    let ub = u128::from(b & sew_mask(sew));
491    let product = ua.wrapping_mul(ub);
492    let high = (product >> u32::from(sew.bits_width())) as u64;
493    high & sew_mask(sew)
494}
495
496/// Signed × unsigned high half.
497///
498/// `a` (vs2) is the signed operand; `b` (vs1/rs1) is the unsigned operand.
499#[inline(always)]
500#[doc(hidden)]
501#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
502pub fn mulhsu_su(a: u64, b: u64, sew: Vsew) -> u64 {
503    let sa = i128::from(sign_extend(a, sew));
504    let ub = u128::from(b & sew_mask(sew));
505    // Compute signed × unsigned as i128 to preserve sign
506    let product = sa.wrapping_mul(ub.cast_signed());
507    let high = (product >> u32::from(sew.bits_width())).cast_unsigned() as u64;
508    high & sew_mask(sew)
509}
510
511/// Signed divide with division-by-zero and signed-overflow semantics from the RISC-V V spec §12.11.
512///
513/// - Division by zero: result = all-ones (i.e., −1 as signed SEW-wide integer)
514/// - Signed overflow (MIN / −1): result = MIN (i.e., `1 << (SEW-1)`)
515#[inline(always)]
516#[doc(hidden)]
517// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
518pub fn sdiv(a: u64, b: u64, sew: Vsew) -> u64 {
519    let sa = sign_extend(a, sew);
520    let sb = sign_extend(b, sew);
521    // Division by zero: return all-ones in the SEW-wide slot (= −1 signed)
522    if sb == 0 {
523        return sew_mask(sew);
524    }
525    // Signed overflow: MIN / -1 returns MIN
526    let sew_min = i64::MIN >> (u64::BITS - u32::from(sew.bits_width()));
527    if sa == sew_min && sb == -1 {
528        return sew_min.cast_unsigned() & sew_mask(sew);
529    }
530    (sa / sb).cast_unsigned() & sew_mask(sew)
531}
532
533/// Signed remainder with division-by-zero and signed-overflow semantics from the RISC-V V spec
534/// §12.11.
535///
536/// - Division by zero: remainder = dividend
537/// - Signed overflow (MIN % −1): remainder = 0
538#[inline(always)]
539#[doc(hidden)]
540#[expect(
541    clippy::modulo_arithmetic,
542    reason = "This is what the code is supposed to do"
543)]
544// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
545pub fn srem(a: u64, b: u64, sew: Vsew) -> u64 {
546    let sa = sign_extend(a, sew);
547    let sb = sign_extend(b, sew);
548    // Division by zero: remainder = dividend
549    if sb == 0 {
550        return a & sew_mask(sew);
551    }
552    // Signed overflow: MIN % -1 = 0
553    let sew_min = i64::MIN >> (u64::BITS - u32::from(sew.bits_width()));
554    if sa == sew_min && sb == -1 {
555        return 0;
556    }
557    (sa % sb).cast_unsigned() & sew_mask(sew)
558}
ab_riscv_interpreter/v/zvexx/muldiv/zvexx_muldiv_helpers.rs

ab_riscv_interpreter/v/zvexx/muldiv/
zvexx_muldiv_helpers.rs