1use crate::v::vector_registers::VectorRegistersExt;
4pub use crate::v::zve64x::arith::zve64x_arith_helpers::{OpSrc, check_vreg_group_alignment};
5use crate::v::zve64x::zve64x_helpers::INSTRUCTION_SIZE;
6use crate::{ExecutionError, InterpreterState, ProgramCounter, VirtualMemory};
7use ab_riscv_primitives::instructions::v::Vsew;
8use ab_riscv_primitives::registers::general_purpose::Register;
9use ab_riscv_primitives::registers::vector::VReg;
10use core::fmt;
11
12#[inline(always)]
15#[doc(hidden)]
16pub fn check_vd_widen_no_src_check<Reg, ExtState, Memory, PC, IH, CustomError>(
17 state: &InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
18 vd: VReg,
19 wide_group_regs: u8,
20) -> Result<(), ExecutionError<Reg::Type, CustomError>>
21where
22 Reg: Register,
23 [(); Reg::N]:,
24 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
25{
26 let vd_idx = vd.bits();
27 if !vd_idx.is_multiple_of(wide_group_regs) || vd_idx + wide_group_regs > 32 {
28 return Err(ExecutionError::IllegalInstruction {
29 address: state.instruction_fetcher.old_pc(INSTRUCTION_SIZE),
30 });
31 }
32 Ok(())
33}
34
35#[inline(always)]
38#[doc(hidden)]
39pub fn check_vs_ext_alignment<Reg, ExtState, Memory, PC, IH, CustomError>(
40 state: &InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
41 vs2: VReg,
42 src_group_regs: u8,
43 vd: VReg,
44 group_regs: u8,
45) -> Result<(), ExecutionError<Reg::Type, CustomError>>
46where
47 Reg: Register,
48 [(); Reg::N]:,
49 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
50{
51 let vs2_idx = vs2.bits();
52 if !vs2_idx.is_multiple_of(src_group_regs) || vs2_idx + src_group_regs > 32 {
53 return Err(ExecutionError::IllegalInstruction {
54 address: state.instruction_fetcher.old_pc(INSTRUCTION_SIZE),
55 });
56 }
57 if ranges_overlap(vd.bits(), group_regs, vs2_idx, src_group_regs) {
59 return Err(ExecutionError::IllegalInstruction {
60 address: state.instruction_fetcher.old_pc(INSTRUCTION_SIZE),
61 });
62 }
63 Ok(())
64}
65
66#[inline(always)]
72#[doc(hidden)]
73pub fn check_vd_widen_alignment<Reg, ExtState, Memory, PC, IH, CustomError>(
74 state: &InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
75 vd: VReg,
76 vs_a: VReg,
77 vs_b_opt: Option<VReg>,
78 group_regs: u8,
79 wide_group_regs: u8,
80) -> Result<(), ExecutionError<Reg::Type, CustomError>>
81where
82 Reg: Register,
83 [(); Reg::N]:,
84 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
85{
86 let vd_idx = vd.bits();
87 if !vd_idx.is_multiple_of(wide_group_regs) || vd_idx + wide_group_regs > 32 {
88 return Err(ExecutionError::IllegalInstruction {
89 address: state.instruction_fetcher.old_pc(INSTRUCTION_SIZE),
90 });
91 }
92 let va_idx = vs_a.bits();
93 if ranges_overlap(vd_idx, wide_group_regs, va_idx, group_regs) {
94 return Err(ExecutionError::IllegalInstruction {
95 address: state.instruction_fetcher.old_pc(INSTRUCTION_SIZE),
96 });
97 }
98 if let Some(vs_b) = vs_b_opt {
99 let vb_idx = vs_b.bits();
100 if ranges_overlap(vd_idx, wide_group_regs, vb_idx, group_regs) {
101 return Err(ExecutionError::IllegalInstruction {
102 address: state.instruction_fetcher.old_pc(INSTRUCTION_SIZE),
103 });
104 }
105 }
106 Ok(())
107}
108
109#[inline(always)]
112#[doc(hidden)]
113pub fn check_vs_wide_alignment<Reg, ExtState, Memory, PC, IH, CustomError>(
114 state: &InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
115 vs: VReg,
116 wide_group_regs: u8,
117) -> Result<(), ExecutionError<Reg::Type, CustomError>>
118where
119 Reg: Register,
120 [(); Reg::N]:,
121 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
122{
123 let vs_idx = vs.bits();
124 if !vs_idx.is_multiple_of(wide_group_regs) || vs_idx + wide_group_regs > 32 {
125 return Err(ExecutionError::IllegalInstruction {
126 address: state.instruction_fetcher.old_pc(INSTRUCTION_SIZE),
127 });
128 }
129 Ok(())
130}
131
132#[inline(always)]
138#[doc(hidden)]
139pub fn check_vd_narrow_alignment<Reg, ExtState, Memory, PC, IH, CustomError>(
140 state: &InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
141 vd: VReg,
142 group_regs: u8,
143) -> Result<(), ExecutionError<Reg::Type, CustomError>>
144where
145 Reg: Register,
146 [(); Reg::N]:,
147 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
148{
149 let vd_idx = vd.bits();
150 if !vd_idx.is_multiple_of(group_regs) || vd_idx + group_regs > 32 {
151 return Err(ExecutionError::IllegalInstruction {
152 address: state.instruction_fetcher.old_pc(INSTRUCTION_SIZE),
153 });
154 }
155 Ok(())
156}
157
158#[inline(always)]
160fn ranges_overlap(a_start: u8, a_len: u8, b_start: u8, b_len: u8) -> bool {
161 a_start < b_start + b_len && b_start < a_start + a_len
162}
163
164#[inline(always)]
166fn mask_bit(mask: &[u8], i: u32) -> bool {
167 mask.get((i / u8::BITS) as usize)
168 .is_some_and(|b| (b >> (i % u8::BITS)) & 1 != 0)
169}
170
171#[inline(always)]
178unsafe fn snapshot_mask<const VLENB: usize>(
179 vreg: &[[u8; VLENB]; 32],
180 vm: bool,
181 vl: u32,
182) -> [u8; VLENB] {
183 let mut buf = [0u8; VLENB];
184 if vm {
185 buf = [0xffu8; VLENB];
186 } else {
187 let mask_bytes = vl.div_ceil(u8::BITS) as usize;
188 unsafe {
190 buf.get_unchecked_mut(..mask_bytes)
191 .copy_from_slice(vreg[usize::from(VReg::V0.bits())].get_unchecked(..mask_bytes));
192 }
193 }
194 buf
195}
196
197#[inline(always)]
203unsafe fn read_element_u64<const VLENB: usize>(
204 vreg: &[[u8; VLENB]; 32],
205 base_reg: usize,
206 elem_i: u32,
207 sew_bytes: usize,
208) -> u64 {
209 let elems_per_reg = VLENB / sew_bytes;
210 let reg_off = elem_i as usize / elems_per_reg;
211 let byte_off = (elem_i as usize % elems_per_reg) * sew_bytes;
212 let reg = unsafe { vreg.get_unchecked(base_reg + reg_off) };
214 let src = unsafe { reg.get_unchecked(byte_off..byte_off + sew_bytes) };
216 let mut buf = [0u8; 8];
217 unsafe { buf.get_unchecked_mut(..sew_bytes) }.copy_from_slice(src);
219 u64::from_le_bytes(buf)
220}
221
222#[inline(always)]
227unsafe fn write_element_u64<const VLENB: usize>(
228 vreg: &mut [[u8; VLENB]; 32],
229 base_reg: u8,
230 elem_i: u32,
231 sew_bytes: usize,
232 value: u64,
233) {
234 let elems_per_reg = VLENB / sew_bytes;
235 let reg_off = elem_i as usize / elems_per_reg;
236 let byte_off = (elem_i as usize % elems_per_reg) * sew_bytes;
237 let buf = value.to_le_bytes();
238 let reg = unsafe { vreg.get_unchecked_mut(usize::from(base_reg) + reg_off) };
240 let dst = unsafe { reg.get_unchecked_mut(byte_off..byte_off + sew_bytes) };
242 dst.copy_from_slice(unsafe { buf.get_unchecked(..sew_bytes) });
244}
245
246#[inline(always)]
248#[doc(hidden)]
249pub fn sign_extend_bits(val: u64, sew_bits: u32) -> i64 {
250 let shift = u64::BITS - sew_bits;
251 (val.cast_signed() << shift) >> shift
252}
253
254#[inline(always)]
272#[expect(clippy::too_many_arguments, reason = "Internal API")]
273#[doc(hidden)]
274pub unsafe fn execute_widen_op<Reg, ExtState, Memory, PC, IH, CustomError, F>(
275 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
276 vd: VReg,
277 vs2: VReg,
278 src: OpSrc,
279 vm: bool,
280 vl: u32,
281 vstart: u32,
282 sew: Vsew,
283 zero_extend_a: bool,
284 zero_extend_b: bool,
285 op: F,
286) where
287 Reg: Register,
288 [(); Reg::N]:,
289 ExtState: VectorRegistersExt<Reg, CustomError>,
290 [(); ExtState::ELEN as usize]:,
291 [(); ExtState::VLEN as usize]:,
292 [(); ExtState::VLENB as usize]:,
293 Memory: VirtualMemory,
294 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
295 CustomError: fmt::Debug,
296 F: Fn(u64, u64) -> u64,
297{
298 let sew_bytes = usize::from(sew.bytes());
299 let wide_sew_bytes = sew_bytes * 2;
301 let sew_bits = u32::from(sew.bits());
302
303 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
305 let vd_base = vd.bits();
306 let vs2_base = vs2.bits();
307
308 for i in vstart..vl {
309 if !mask_bit(&mask_buf, i) {
310 continue;
311 }
312 let raw_a = unsafe {
314 read_element_u64(
315 state.ext_state.read_vreg(),
316 usize::from(vs2_base),
317 i,
318 sew_bytes,
319 )
320 };
321 let wide_a = if zero_extend_a {
322 raw_a
323 } else {
324 sign_extend_bits(raw_a, sew_bits).cast_unsigned()
325 };
326 let wide_b = match &src {
327 OpSrc::Vreg(vs1_base) => {
328 let raw_b = unsafe {
330 read_element_u64(
331 state.ext_state.read_vreg(),
332 usize::from(*vs1_base),
333 i,
334 sew_bytes,
335 )
336 };
337 if zero_extend_b {
338 raw_b
339 } else {
340 sign_extend_bits(raw_b, sew_bits).cast_unsigned()
341 }
342 }
343 OpSrc::Scalar(val) => {
344 let truncated = val & ((1u64 << sew_bits) - 1);
348 if zero_extend_b {
349 truncated
350 } else {
351 sign_extend_bits(truncated, sew_bits).cast_unsigned()
352 }
353 }
354 };
355 let result = op(wide_a, wide_b);
356 unsafe {
359 write_element_u64(
360 state.ext_state.write_vreg(),
361 vd_base,
362 i,
363 wide_sew_bytes,
364 result,
365 );
366 }
367 }
368 state.ext_state.mark_vs_dirty();
369 state.ext_state.reset_vstart();
370}
371
372#[inline(always)]
385#[expect(clippy::too_many_arguments, reason = "Internal API")]
386#[doc(hidden)]
387pub unsafe fn execute_widen_w_op<Reg, ExtState, Memory, PC, IH, CustomError, F>(
388 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
389 vd: VReg,
390 vs2: VReg,
391 src: OpSrc,
392 vm: bool,
393 vl: u32,
394 vstart: u32,
395 sew: Vsew,
396 zero_extend_b: bool,
397 op: F,
398) where
399 Reg: Register,
400 [(); Reg::N]:,
401 ExtState: VectorRegistersExt<Reg, CustomError>,
402 [(); ExtState::ELEN as usize]:,
403 [(); ExtState::VLEN as usize]:,
404 [(); ExtState::VLENB as usize]:,
405 Memory: VirtualMemory,
406 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
407 CustomError: fmt::Debug,
408 F: Fn(u64, u64) -> u64,
409{
410 let sew_bytes = usize::from(sew.bytes());
411 let wide_sew_bytes = sew_bytes * 2;
412 let sew_bits = u32::from(sew.bits());
413
414 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
416 let vd_base = vd.bits();
417 let vs2_base = vs2.bits();
418
419 for i in vstart..vl {
420 if !mask_bit(&mask_buf, i) {
421 continue;
422 }
423 let wide_a = unsafe {
426 read_element_u64(
427 state.ext_state.read_vreg(),
428 usize::from(vs2_base),
429 i,
430 wide_sew_bytes,
431 )
432 };
433 let wide_b = match &src {
434 OpSrc::Vreg(vs1_base) => {
435 let raw_b = unsafe {
439 read_element_u64(
440 state.ext_state.read_vreg(),
441 usize::from(*vs1_base),
442 i,
443 sew_bytes,
444 )
445 };
446 if zero_extend_b {
447 raw_b
448 } else {
449 sign_extend_bits(raw_b, sew_bits).cast_unsigned()
450 }
451 }
452 OpSrc::Scalar(val) => {
453 let truncated = val & ((1u64 << sew_bits) - 1);
455 if zero_extend_b {
456 truncated
457 } else {
458 sign_extend_bits(truncated, sew_bits).cast_unsigned()
459 }
460 }
461 };
462 let result = op(wide_a, wide_b);
463 unsafe {
465 write_element_u64(
466 state.ext_state.write_vreg(),
467 vd_base,
468 i,
469 wide_sew_bytes,
470 result,
471 );
472 }
473 }
474 state.ext_state.mark_vs_dirty();
475 state.ext_state.reset_vstart();
476}
477
478#[inline(always)]
494#[expect(clippy::too_many_arguments, reason = "Internal API")]
495#[doc(hidden)]
496pub unsafe fn execute_narrow_shift<Reg, ExtState, Memory, PC, IH, CustomError>(
497 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
498 vd: VReg,
499 vs2: VReg,
500 src: OpSrc,
501 vm: bool,
502 vl: u32,
503 vstart: u32,
504 sew: Vsew,
505 arithmetic: bool,
506) where
507 Reg: Register,
508 [(); Reg::N]:,
509 ExtState: VectorRegistersExt<Reg, CustomError>,
510 [(); ExtState::ELEN as usize]:,
511 [(); ExtState::VLEN as usize]:,
512 [(); ExtState::VLENB as usize]:,
513 Memory: VirtualMemory,
514 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
515 CustomError: fmt::Debug,
516{
517 let sew_bytes = usize::from(sew.bytes());
518 let wide_sew_bytes = sew_bytes * 2;
519 let wide_sew_bits = u32::from(sew.bits()) * 2;
522 let shamt_mask = u64::from(wide_sew_bits - 1);
523
524 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
526 let vd_base = vd.bits();
527 let vs2_base = vs2.bits();
528
529 for i in vstart..vl {
530 if !mask_bit(&mask_buf, i) {
531 continue;
532 }
533 let wide_val = unsafe {
535 read_element_u64(
536 state.ext_state.read_vreg(),
537 usize::from(vs2_base),
538 i,
539 wide_sew_bytes,
540 )
541 };
542 let shamt = match &src {
543 OpSrc::Vreg(vs1_base) => {
544 let raw = unsafe {
548 read_element_u64(
549 state.ext_state.read_vreg(),
550 usize::from(*vs1_base),
551 i,
552 sew_bytes,
553 )
554 };
555 raw & shamt_mask
556 }
557 OpSrc::Scalar(val) => val & shamt_mask,
559 };
560 let result_wide = if arithmetic {
561 (sign_extend_bits(wide_val, wide_sew_bits) >> shamt).cast_unsigned()
565 } else {
566 wide_val >> shamt
567 };
568 let result = result_wide & ((1u64 << sew.bits()) - 1);
570 unsafe {
572 write_element_u64(state.ext_state.write_vreg(), vd_base, i, sew_bytes, result);
573 }
574 }
575 state.ext_state.mark_vs_dirty();
576 state.ext_state.reset_vstart();
577}
578
579#[inline(always)]
594#[expect(clippy::too_many_arguments, reason = "Internal API")]
595#[doc(hidden)]
596pub unsafe fn execute_extension<Reg, ExtState, Memory, PC, IH, CustomError>(
597 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
598 vd: VReg,
599 vs2: VReg,
600 vm: bool,
601 vl: u32,
602 vstart: u32,
603 sew: Vsew,
604 factor: u8,
605 sign: bool,
606) where
607 Reg: Register,
608 [(); Reg::N]:,
609 ExtState: VectorRegistersExt<Reg, CustomError>,
610 [(); ExtState::ELEN as usize]:,
611 [(); ExtState::VLEN as usize]:,
612 [(); ExtState::VLENB as usize]:,
613 Memory: VirtualMemory,
614 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
615 CustomError: fmt::Debug,
616{
617 let sew_bytes = usize::from(sew.bytes());
618 let src_sew_bytes = sew_bytes / usize::from(factor);
619 let src_sew_bits = (u32::from(sew.bits())) / u32::from(factor);
620
621 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
623 let vd_base = vd.bits();
624 let vs2_base = vs2.bits();
625
626 for i in vstart..vl {
627 if !mask_bit(&mask_buf, i) {
628 continue;
629 }
630 let raw = unsafe {
632 read_element_u64(
633 state.ext_state.read_vreg(),
634 usize::from(vs2_base),
635 i,
636 src_sew_bytes,
637 )
638 };
639 let result = if sign {
640 sign_extend_bits(raw, src_sew_bits).cast_unsigned()
641 } else {
642 raw
643 };
644 unsafe {
646 write_element_u64(state.ext_state.write_vreg(), vd_base, i, sew_bytes, result);
647 }
648 }
649 state.ext_state.mark_vs_dirty();
650 state.ext_state.reset_vstart();
651}