ab_riscv_interpreter/v/zvexx/widen_narrow/
zvexx_widen_narrow_helpers.rs

1//! Opaque helpers for ZveXx extension
2
3use crate::v::vector_registers::{VLENB_USIZE, VectorRegisterFile, VectorRegistersExt};
4pub use crate::v::zvexx::arith::zvexx_arith_helpers::{OpSrc, check_vreg_group_alignment};
5use crate::v::zvexx::zvexx_helpers::INSTRUCTION_SIZE;
6use crate::{ExecutionError, ProgramCounter};
7use ab_riscv_primitives::instructions::v::Vsew;
8use ab_riscv_primitives::prelude::*;
9use core::fmt;
10use core::hint::cold_path;
11use core::num::NonZeroU8;
12
13/// Check that a widening destination `vd` is aligned to `wide_group_regs` and fits within
14/// `[0,32)`, without any source overlap check
15#[inline(always)]
16#[doc(hidden)]
17#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
18pub fn check_vd_widen_no_src_check<Reg, Memory, PC, CustomError>(
19    program_counter: &PC,
20    vd: VReg,
21    wide_group_regs: NonZeroU8,
22) -> Result<(), ExecutionError<Reg::Type, CustomError>>
23where
24    Reg: Register,
25    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
26{
27    let wide_group_regs = wide_group_regs.get();
28    let vd_idx = vd.to_bits();
29    if !vd_idx.is_multiple_of(wide_group_regs) || vd_idx + wide_group_regs > 32 {
30        cold_path();
31        return Err(ExecutionError::IllegalInstruction {
32            address: program_counter.old_pc(INSTRUCTION_SIZE),
33        });
34    }
35    Ok(())
36}
37
38/// Check that an extension source `vs2` is aligned to `src_group_regs`, fits in `[0,32)`, and only
39/// overlaps `vd` (which occupies `group_regs` registers) in a manner permitted by the spec.
40///
41/// Per the vector spec §5.2, the destination EEW (SEW) of an extension is greater than the source
42/// EEW (SEW/factor), so the destination may overlap the source only when the source EMUL is at
43/// least 1 and the overlap is in the highest-numbered part of the destination register group (e.g.
44/// `vzext.vf4 v0, v6` with LMUL=8, where the narrow source `{v6,v7}` aliases the high registers of
45/// the wide `{v0..v7}` destination). Any other overlap is illegal.
46#[inline(always)]
47#[doc(hidden)]
48#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
49pub fn check_vs_ext_alignment<Reg, Memory, PC, CustomError>(
50    program_counter: &PC,
51    vs2: VReg,
52    src_group_regs: NonZeroU8,
53    vd: VReg,
54    group_regs: NonZeroU8,
55) -> Result<(), ExecutionError<Reg::Type, CustomError>>
56where
57    Reg: Register,
58    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
59{
60    let src_group_regs = src_group_regs.get();
61    let group_regs = group_regs.get();
62    let vs2_idx = vs2.to_bits();
63    if !vs2_idx.is_multiple_of(src_group_regs) || vs2_idx + src_group_regs > 32 {
64        cold_path();
65        return Err(ExecutionError::IllegalInstruction {
66            address: program_counter.old_pc(INSTRUCTION_SIZE),
67        });
68    }
69    // The wide destination (group_regs) may overlap the narrow source (src_group_regs) only in the
70    // highest-numbered part of the destination group, and only when the source EMUL >= 1.
71    if widen_src_overlap_illegal(vd.to_bits(), group_regs, vs2_idx, src_group_regs) {
72        cold_path();
73        return Err(ExecutionError::IllegalInstruction {
74            address: program_counter.old_pc(INSTRUCTION_SIZE),
75        });
76    }
77    Ok(())
78}
79
80/// Check that a widening destination `vd` is aligned to `wide_group_regs`, fits within `[0, 32)`,
81/// and only overlaps the `group_regs`-register narrow source(s) starting at `vs_a`/`vs_b` in a
82/// manner permitted by the spec.
83///
84/// `wide_group_regs` is the pre-computed register count for the wide EMUL (2*LMUL), obtained via
85/// `Vlmul::index_register_count(wide_eew, sew)`. `group_regs` is the narrow LMUL register count.
86///
87/// Per the vector spec §5.2, a destination whose EEW (2*SEW) is greater than a source's EEW (SEW)
88/// may overlap that source only when the source EMUL is at least 1 and the overlap is in the
89/// highest-numbered part of the destination register group (e.g. `vwsubu.wv v2, v14, v3` with
90/// LMUL=1, where the narrow `v3` aliases the high register of the wide `{v2, v3}` destination).
91/// Any other overlap is illegal.
92#[inline(always)]
93#[doc(hidden)]
94#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
95pub fn check_vd_widen_alignment<Reg, Memory, PC, CustomError>(
96    program_counter: &PC,
97    vd: VReg,
98    vs_a: VReg,
99    vs_b_opt: Option<VReg>,
100    group_regs: NonZeroU8,
101    wide_group_regs: NonZeroU8,
102) -> Result<(), ExecutionError<Reg::Type, CustomError>>
103where
104    Reg: Register,
105    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
106{
107    let wide_group_regs = wide_group_regs.get();
108    let group_regs = group_regs.get();
109    let vd_idx = vd.to_bits();
110    if !vd_idx.is_multiple_of(wide_group_regs) || vd_idx + wide_group_regs > 32 {
111        cold_path();
112        return Err(ExecutionError::IllegalInstruction {
113            address: program_counter.old_pc(INSTRUCTION_SIZE),
114        });
115    }
116    if widen_src_overlap_illegal(vd_idx, wide_group_regs, vs_a.to_bits(), group_regs) {
117        cold_path();
118        return Err(ExecutionError::IllegalInstruction {
119            address: program_counter.old_pc(INSTRUCTION_SIZE),
120        });
121    }
122    if let Some(vs_b) = vs_b_opt
123        && widen_src_overlap_illegal(vd_idx, wide_group_regs, vs_b.to_bits(), group_regs)
124    {
125        cold_path();
126        return Err(ExecutionError::IllegalInstruction {
127            address: program_counter.old_pc(INSTRUCTION_SIZE),
128        });
129    }
130    Ok(())
131}
132
133/// Returns `true` when a narrow source group of `group_regs` registers starting at `vs_idx`
134/// overlaps the wide destination group (`wide_group_regs` registers starting at `vd_idx`) in a way
135/// that is *not* permitted by the spec.
136///
137/// Overlap is only legal when the source EMUL is at least 1 - which, on widening, is exactly when
138/// the destination register count strictly exceeds the narrow source count (for fractional LMUL
139/// both counts collapse to 1) - and the source occupies the highest-numbered registers of the
140/// destination group.
141#[inline(always)]
142#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
143fn widen_src_overlap_illegal(vd_idx: u8, wide_group_regs: u8, vs_idx: u8, group_regs: u8) -> bool {
144    if !ranges_overlap(vd_idx, wide_group_regs, vs_idx, group_regs) {
145        return false;
146    }
147    let high_part_overlap =
148        wide_group_regs > group_regs && vs_idx == vd_idx + wide_group_regs - group_regs;
149    !high_part_overlap
150}
151
152/// Check that a widening source `vs2` that is already 2×SEW wide is aligned to `wide_group_regs`
153/// and fits within `[0, 32)`.
154#[inline(always)]
155#[doc(hidden)]
156#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
157pub fn check_vs_wide_alignment<Reg, Memory, PC, CustomError>(
158    program_counter: &PC,
159    vs: VReg,
160    wide_group_regs: NonZeroU8,
161) -> Result<(), ExecutionError<Reg::Type, CustomError>>
162where
163    Reg: Register,
164    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
165{
166    let wide_group_regs = wide_group_regs.get();
167    let vs_idx = vs.to_bits();
168    if !vs_idx.is_multiple_of(wide_group_regs) || vs_idx + wide_group_regs > 32 {
169        cold_path();
170        return Err(ExecutionError::IllegalInstruction {
171            address: program_counter.old_pc(INSTRUCTION_SIZE),
172        });
173    }
174    Ok(())
175}
176
177/// Check that a narrowing destination `vd` is aligned to `group_regs` and fits
178/// within `[0, 32)`.
179///
180/// No overlap check against `vs2` is performed here because narrowing instructions
181/// permit `vd` to alias the low half of the wide `vs2` register group per spec §11.7.
182#[inline(always)]
183#[doc(hidden)]
184#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
185pub fn check_vd_narrow_alignment<Reg, Memory, PC, CustomError>(
186    program_counter: &PC,
187    vd: VReg,
188    group_regs: NonZeroU8,
189) -> Result<(), ExecutionError<Reg::Type, CustomError>>
190where
191    Reg: Register,
192    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
193{
194    let group_regs = group_regs.get();
195    let vd_idx = vd.to_bits();
196    if !vd_idx.is_multiple_of(group_regs) || vd_idx + group_regs > 32 {
197        cold_path();
198        return Err(ExecutionError::IllegalInstruction {
199            address: program_counter.old_pc(INSTRUCTION_SIZE),
200        });
201    }
202    Ok(())
203}
204
205/// Returns `true` when `[a_start, a_start+a_len)` overlaps `[b_start, b_start+b_len)`.
206#[inline(always)]
207#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
208fn ranges_overlap(a_start: u8, a_len: u8, b_start: u8, b_len: u8) -> bool {
209    a_start < b_start + b_len && b_start < a_start + a_len
210}
211
212/// Return whether mask bit `i` is set in the mask byte slice (LSB-first within each byte).
213#[inline(always)]
214#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
215fn mask_bit(mask: &[u8], i: u16) -> bool {
216    mask.get(usize::from(i / u8::BITS as u16))
217        .is_some_and(|b| (b >> (i % u8::BITS as u16)) & 1 != 0)
218}
219
220/// Snapshot the mask register into a stack buffer.
221///
222/// When `vm=true` (unmasked), all bytes are `0xff`.
223///
224/// # Safety
225/// `vl <= VLEN` must hold
226#[inline(always)]
227#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
228unsafe fn snapshot_mask<const VLEN: Vlen>(
229    vregs: &VectorRegisterFile<VLEN>,
230    vm: bool,
231    vl: Vl,
232) -> [u8; VLENB_USIZE::<VLEN>] {
233    let mut buf = [0u8; _];
234    if vm {
235        buf = [0xffu8; _];
236    } else {
237        let mask_bytes = usize::from(vl.bytes());
238        // SAFETY: `mask_bytes <= VLEN.bytes()` by precondition
239        unsafe {
240            buf.get_unchecked_mut(..mask_bytes)
241                .copy_from_slice(vregs.get(VReg::V0).get_unchecked(..mask_bytes));
242        }
243    }
244    buf
245}
246
247/// Read the low `sew.bytes_width()` of the element `elem_i` from the register group `base_reg`,
248/// zero-extended to `u64`.
249///
250/// # Safety
251/// `base_reg + elem_i / (VLEN.bytes() / sew.bytes_width()) < 32`
252#[inline(always)]
253#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
254unsafe fn read_element_u64<const VLEN: Vlen>(
255    vregs: &VectorRegisterFile<VLEN>,
256    base_reg: VReg,
257    elem_i: u16,
258    sew: Vsew,
259) -> u64 {
260    let sew_bytes = u32::from(sew.bytes_width());
261    let elems_per_reg = VLEN.bytes() / sew_bytes;
262    let reg_off = u32::from(elem_i) / elems_per_reg;
263    let byte_off = (u32::from(elem_i) % elems_per_reg) * sew_bytes;
264    // SAFETY: `base_reg + reg_off < 32` by caller's precondition
265    let reg = unsafe {
266        vregs.get(VReg::from_bits(base_reg.to_bits() + reg_off as u8).unwrap_unchecked())
267    };
268    // SAFETY: `byte_off + sew_bytes <= VLEN.bytes()`
269    let src = unsafe { reg.get_unchecked(byte_off as usize..(byte_off + sew_bytes) as usize) };
270    let mut buf = [0u8; 8];
271    // SAFETY: `sew_bytes <= 8`
272    unsafe { buf.get_unchecked_mut(..sew_bytes as usize) }.copy_from_slice(src);
273    u64::from_le_bytes(buf)
274}
275
276/// Write the low `sew.bytes_width()` of `value` into element `elem_i` in register group `base_reg`.
277///
278/// # Safety
279/// `base_reg + elem_i / (VLEN.bytes() / sew.bytes_width()) < 32`
280#[inline(always)]
281#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
282unsafe fn write_element_u64<const VLEN: Vlen>(
283    vregs: &mut VectorRegisterFile<VLEN>,
284    base_reg: VReg,
285    elem_i: u16,
286    sew: Vsew,
287    value: u64,
288) {
289    let sew_bytes = u32::from(sew.bytes_width());
290    let elems_per_reg = VLEN.bytes() / sew_bytes;
291    let reg_off = u32::from(elem_i) / elems_per_reg;
292    let byte_off = (u32::from(elem_i) % elems_per_reg) * sew_bytes;
293    let buf = value.to_le_bytes();
294    // SAFETY: `base_reg + reg_off < 32` by caller's precondition
295    let reg = unsafe {
296        vregs.get_mut(VReg::from_bits(base_reg.to_bits() + reg_off as u8).unwrap_unchecked())
297    };
298    // SAFETY: `byte_off + sew_bytes <= VLEN.bytes()`
299    let dst = unsafe { reg.get_unchecked_mut(byte_off as usize..(byte_off + sew_bytes) as usize) };
300    // SAFETY: `sew_bytes <= 8`
301    dst.copy_from_slice(unsafe { buf.get_unchecked(..sew_bytes as usize) });
302}
303
304/// Sign-extend the low `bits` of `val` to `i64`.
305#[inline(always)]
306#[doc(hidden)]
307#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
308pub fn sign_extend_bits(val: u64, bits: u8) -> i64 {
309    let shift = u64::BITS - u32::from(bits);
310    (val.cast_signed() << shift) >> shift
311}
312
313/// Interpret a scalar operand as an unsigned SEW-wide value.
314///
315/// RVV widening scalar instructions (.vx/.wx) conceptually use a scalar
316/// operand whose width matches the current SEW, not the full XLEN width.
317///
318/// For example on RV64:
319///
320/// SEW=8:
321///     val = 0x0000_0000_0000_01ff
322///     result = 0x0000_0000_0000_00ff
323///
324/// SEW=16:
325///     val = 0x0000_0000_0000_01ff
326///     result = 0x0000_0000_0000_01ff
327///
328/// SEW=32:
329///     val = 0xffff_ffff_1234_5678
330///     result = 0x0000_0000_1234_5678
331///
332/// This helper performs that SEW-width truncation without sign extension.
333#[inline(always)]
334#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
335fn scalar_unsigned_for_sew(val: u64, sew_bits: u8) -> u64 {
336    val & (u64::MAX >> (u64::BITS - u32::from(sew_bits)))
337}
338
339/// Interpret a scalar operand as a signed SEW-wide value.
340///
341/// The scalar is first truncated to SEW bits, then sign-extended back to
342/// 64 bits.
343///
344/// For example on RV64:
345///
346/// SEW=8:
347///     val = 0x0000_0000_0000_00ff
348///     result = 0xffff_ffff_ffff_ffff (-1)
349///
350/// SEW=8:
351///     val = 0x0000_0000_0000_007f
352///     result = 0x0000_0000_0000_007f (+127)
353///
354/// SEW=16:
355///     val = 0x0000_0000_0000_ffff
356///     result = 0xffff_ffff_ffff_ffff (-1)
357///
358/// This matches the signed widening behavior required by instructions such
359/// as vwadd.vx and vwsub.vx.
360#[inline(always)]
361#[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
362fn scalar_signed_for_sew(val: u64, sew_bits: u8) -> u64 {
363    sign_extend_bits(val, sew_bits).cast_unsigned()
364}
365
366/// Execute a widening integer add/subtract.
367///
368/// Each source element is SEW-wide; the destination element is 2×SEW-wide.
369/// `ZERO_EXTEND_AB` selects unsigned or signed widening for sources (unsigned = zero-extend,
370/// signed = sign-extend).
371///
372/// `op` receives `(wide_a: u64, wide_b: u64) -> u64`.
373///
374/// # Safety
375/// - `vd` aligned to `2*group_regs`, fits in `[0,32)`, does not overlap `vs2` or `src` (verified by
376///   caller)
377/// - `vs2` aligned to `group_regs`, fits in `[0,32)` (verified by caller)
378/// - `src` register (when `WidenSrc::Vreg`) aligned to `group_regs`, fits in `[0,32)` (verified by
379///   caller)
380/// - `vl <= group_regs * VLEN.bytes() / sew.bytes_width()` (all elements fit)
381/// - SEW < 64
382/// - When `vm=false`: `vd.to_bits() != 0`
383#[inline(always)]
384#[doc(hidden)]
385// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
386pub unsafe fn execute_widen_op<const ZERO_EXTEND_AB: bool, Reg, ExtState, CustomError, F>(
387    ext_state: &mut ExtState,
388    vd: VReg,
389    vs2: VReg,
390    src: OpSrc,
391    vm: bool,
392    sew: Vsew,
393    op: F,
394) where
395    Reg: Register,
396    ExtState: VectorRegistersExt<Reg, CustomError>,
397    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
398    CustomError: fmt::Debug,
399    F: Fn(u64, u64) -> u64,
400{
401    let vl = ext_state.vl();
402    let vstart = ext_state.vstart();
403    let wide_sew = sew
404        .double_width()
405        .expect("SEW < 64 is enforced by caller, hence this is always valid; qed");
406
407    // SAFETY: `vl <= VLMAX <= VLEN`
408    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
409
410    for i in vstart.range_to(vl) {
411        if !mask_bit(&mask_buf, i) {
412            continue;
413        }
414        // SAFETY: `vs2` aligned to `group_regs`;
415        // `i < vl <= group_regs * (VLEN.bytes() / sew.bytes_width())`
416        let raw_a = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
417        let wide_a = if ZERO_EXTEND_AB {
418            raw_a
419        } else {
420            sign_extend_bits(raw_a, sew.bits_width()).cast_unsigned()
421        };
422        let wide_b = match src {
423            OpSrc::Vreg(vs1_base) => {
424                // SAFETY: same argument as vs2
425                let raw_b = unsafe { read_element_u64(ext_state.read_vregs(), vs1_base, i, sew) };
426                if ZERO_EXTEND_AB {
427                    raw_b
428                } else {
429                    sign_extend_bits(raw_b, sew.bits_width()).cast_unsigned()
430                }
431            }
432            OpSrc::Scalar(val) => {
433                if ZERO_EXTEND_AB {
434                    scalar_unsigned_for_sew(val, sew.bits_width())
435                } else {
436                    scalar_signed_for_sew(val, sew.bits_width())
437                }
438            }
439        };
440        let result = op(wide_a, wide_b);
441        // SAFETY: `vd` aligned to `2*group_regs`;
442        // `i < vl <= group_regs * (VLEN.bytes() / sew.bytes_width())` so
443        // `i < 2*group_regs * (VLEN.bytes() / wide_sew.bytes_width())` - element fits in the wide
444        // group
445        unsafe {
446            write_element_u64(ext_state.write_vregs(), vd, i, wide_sew, result);
447        }
448    }
449    ext_state.mark_vs_dirty();
450    ext_state.reset_vstart();
451}
452
453/// Execute a widening add/subtract where `vs2` is already 2×SEW wide.
454///
455/// `vs2` is read at `wide_sew.bytes_width()`; `src` (narrow) is read at `sew.bytes_width()` and
456/// widened. `ZERO_EXTEND_B` selects unsigned vs signed widening for the narrow source operand.
457///
458/// # Safety
459/// - `vd` aligned to `2*group_regs`, fits in `[0,32)`, does not overlap `vs2` or `src`
460/// - `vs2` aligned to `2*group_regs`, fits in `[0,32)` (wide source)
461/// - `src` register (when `WidenSrc::Vreg`) aligned to `group_regs`, fits in `[0,32)`
462/// - `vl <= group_regs * VLEN.bytes() / sew.bytes_width()`
463/// - SEW < 64
464/// - When `vm=false`: `vd.to_bits() != 0`
465#[inline(always)]
466#[doc(hidden)]
467// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
468pub unsafe fn execute_widen_w_op<const ZERO_EXTEND_B: bool, Reg, ExtState, CustomError, F>(
469    ext_state: &mut ExtState,
470    vd: VReg,
471    vs2: VReg,
472    src: OpSrc,
473    vm: bool,
474    sew: Vsew,
475    op: F,
476) where
477    Reg: Register,
478    ExtState: VectorRegistersExt<Reg, CustomError>,
479    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
480    CustomError: fmt::Debug,
481    F: Fn(u64, u64) -> u64,
482{
483    let vl = ext_state.vl();
484    let vstart = ext_state.vstart();
485    let wide_sew = sew
486        .double_width()
487        .expect("SEW < 64 is enforced by caller, hence this is always valid; qed");
488
489    // SAFETY: `vl <= VLEN`
490    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
491
492    for i in vstart.range_to(vl) {
493        if !mask_bit(&mask_buf, i) {
494            continue;
495        }
496        // vs2 is already 2×SEW; read at wide width
497        // SAFETY: `vs2` aligned to `2*group_regs`; element `i` fits within it
498        let wide_a = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, wide_sew) };
499        let wide_b = match src {
500            OpSrc::Vreg(vs1) => {
501                // SAFETY: `vs1` is aligned to `group_regs` and fits within `[0, 32)`,
502                // verified by caller; `i < vl <= group_regs * (VLEN.bytes() / sew.bytes_width())`,
503                // so `vs1_base + i / elems_per_reg < vs1_base + group_regs <= 32`
504                let raw_b = unsafe { read_element_u64(ext_state.read_vregs(), vs1, i, sew) };
505                if ZERO_EXTEND_B {
506                    raw_b
507                } else {
508                    sign_extend_bits(raw_b, sew.bits_width()).cast_unsigned()
509                }
510            }
511            OpSrc::Scalar(val) => {
512                if ZERO_EXTEND_B {
513                    scalar_unsigned_for_sew(val, sew.bits_width())
514                } else {
515                    scalar_signed_for_sew(val, sew.bits_width())
516                }
517            }
518        };
519        let result = op(wide_a, wide_b);
520        // SAFETY: same as `execute_widen_op` for vd
521        unsafe {
522            write_element_u64(ext_state.write_vregs(), vd, i, wide_sew, result);
523        }
524    }
525    ext_state.mark_vs_dirty();
526    ext_state.reset_vstart();
527}
528
529/// Execute a narrowing right-shift.
530///
531/// `vs2` is 2×SEW wide; the shift amount comes from `src` (SEW-wide or scalar).
532/// The shift amount is masked to `log2(2*SEW)` bits per spec §12.6.
533/// `ARITHMETIC` selects sign-extending (true) vs zero-extending (false) before shifting.
534///
535/// # Safety
536/// - `vd` aligned to `group_regs`, fits in `[0,32)`
537/// - `vs2` aligned to `wide_group_regs`, fits in `[0,32)`; aliasing with the low half of `vs2` is
538///   permitted per spec §11.7 - reads complete before writes to any overlapping element since the
539///   destination SEW is half the source SEW
540/// - `src` register (when `OpSrc::Vreg`) aligned to `group_regs`, fits in `[0,32)`
541/// - `vl <= group_regs * VLEN.bytes() / sew.bytes_width()`
542/// - SEW < 64
543/// - When `vm=false`: `vd.to_bits() != 0`
544#[inline(always)]
545#[doc(hidden)]
546// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
547pub unsafe fn execute_narrow_shift<const ARITHMETIC: bool, Reg, ExtState, CustomError>(
548    ext_state: &mut ExtState,
549    vd: VReg,
550    vs2: VReg,
551    src: OpSrc,
552    vm: bool,
553    sew: Vsew,
554) where
555    Reg: Register,
556    ExtState: VectorRegistersExt<Reg, CustomError>,
557    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
558    CustomError: fmt::Debug,
559{
560    let vl = ext_state.vl();
561    let vstart = ext_state.vstart();
562    let wide_sew = sew
563        .double_width()
564        .expect("SEW < 64 is enforced by caller, hence this is always valid; qed");
565    // Shift amount mask: log2(2*SEW) bits = log2(SEW) + 1 bits
566    let shamt_mask = u64::from(wide_sew.bits_width() - 1);
567
568    // SAFETY: `vl <= VLEN`
569    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
570
571    for i in vstart.range_to(vl) {
572        if !mask_bit(&mask_buf, i) {
573            continue;
574        }
575        // SAFETY: `vs2` is the wide source group
576        let wide_val = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, wide_sew) };
577        let shamt = match src {
578            OpSrc::Vreg(vs1_base) => {
579                // SAFETY: `vs1` is aligned to `group_regs` and fits within `[0, 32)`,
580                // verified by caller; `i < vl <= group_regs * (VLEN.bytes() / sew.bytes_width())`,
581                // so `vs1_base + i / elems_per_reg < vs1_base + group_regs <= 32`
582                let raw = unsafe { read_element_u64(ext_state.read_vregs(), vs1_base, i, sew) };
583                raw & shamt_mask
584            }
585            // Scalar shift amount: only the low log2(2*SEW) bits are used per spec
586            OpSrc::Scalar(val) => val & shamt_mask,
587        };
588        let result_wide = if ARITHMETIC {
589            // Sign-extend to i64 first, then shift arithmetically as i64 to
590            // preserve sign bits, then cast back. Shifting u64 after cast_unsigned()
591            // would be a logical shift and lose sign bits.
592            (sign_extend_bits(wide_val, wide_sew.bits_width()) >> shamt).cast_unsigned()
593        } else {
594            wide_val >> shamt
595        };
596        // Truncate to SEW bits
597        let result = result_wide & ((1u64 << sew.bits_width()) - 1);
598        // SAFETY: `vd` is the narrow destination group
599        unsafe {
600            write_element_u64(ext_state.write_vregs(), vd, i, sew, result);
601        }
602    }
603    ext_state.mark_vs_dirty();
604    ext_state.reset_vstart();
605}
606
607/// Execute an integer extension (vzext/vsext).
608///
609/// Source element width is `sew.divide_by_factor(factor).bytes_width()`; destination is
610/// `sew.bytes_width()`. `SIGN` selects sign- or zero-extension.
611///
612/// The source EMUL = LMUL / factor; the source register group is `max(1, group_regs / factor)`
613/// registers.
614///
615/// # Safety
616/// - `vd` aligned to `group_regs`, fits in `[0,32)`
617/// - `vs2` aligned to `src_group_regs`, fits in `[0,32)`, does not overlap `vd`
618/// - `vl <= group_regs * VLEN.bytes() / sew.bytes_width()`
619/// - `sew.divide_by_factor(factor).is_some()`
620/// - When `vm=false`: `vd.to_bits() != 0`
621#[inline(always)]
622#[doc(hidden)]
623// TODO: #[cfg_attr(feature = "no-panic", no_panic_const::no_panic)]
624pub unsafe fn execute_extension<const SIGN: bool, Reg, ExtState, CustomError>(
625    ext_state: &mut ExtState,
626    vd: VReg,
627    vs2: VReg,
628    vm: bool,
629    sew: Vsew,
630    factor: VsewFactor,
631) where
632    Reg: Register,
633    ExtState: VectorRegistersExt<Reg, CustomError>,
634    [(); SUPPORTED_ELEN_VLEN::<{ ExtState::ELEN }, { ExtState::VLEN }>]:,
635    CustomError: fmt::Debug,
636{
637    let vl = ext_state.vl();
638    let vstart = ext_state.vstart();
639    let src_sew = sew
640        .divide_by_factor(factor)
641        .expect("SEW >= factor*8 and valid according to function contract; qed");
642
643    // SAFETY: `vl <= VLEN`
644    let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
645
646    for i in vstart.range_to(vl) {
647        if !mask_bit(&mask_buf, i) {
648            continue;
649        }
650        // SAFETY: vs2 group covers `vl` narrow elements
651        let raw = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, src_sew) };
652        let result = if SIGN {
653            sign_extend_bits(raw, src_sew.bits_width()).cast_unsigned()
654        } else {
655            raw
656        };
657        // SAFETY: vd group covers `vl` wide elements
658        unsafe {
659            write_element_u64(ext_state.write_vregs(), vd, i, sew, result);
660        }
661    }
662    ext_state.mark_vs_dirty();
663    ext_state.reset_vstart();
664}
ab_riscv_interpreter/v/zvexx/widen_narrow/zvexx_widen_narrow_helpers.rs

ab_riscv_interpreter/v/zvexx/widen_narrow/
zvexx_widen_narrow_helpers.rs