1use crate::v::vector_registers::VectorRegistersExt;
4pub use crate::v::zve64x::arith::zve64x_arith_helpers::check_vreg_group_alignment;
5use crate::v::zve64x::arith::zve64x_arith_helpers::{read_element_u64, write_element_u64};
6use crate::v::zve64x::load::zve64x_load_helpers::{mask_bit, snapshot_mask};
7use crate::v::zve64x::zve64x_helpers::INSTRUCTION_SIZE;
8use crate::{ExecutionError, InterpreterState, ProgramCounter, VirtualMemory};
9use ab_riscv_primitives::instructions::v::Vsew;
10use ab_riscv_primitives::registers::general_purpose::Register;
11use ab_riscv_primitives::registers::vector::VReg;
12use core::fmt;
13
14#[inline(always)]
19#[doc(hidden)]
20pub fn check_no_overlap<Reg, ExtState, Memory, PC, IH, CustomError>(
21 state: &InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
22 a: VReg,
23 b: VReg,
24 count: u8,
25) -> Result<(), ExecutionError<Reg::Type, CustomError>>
26where
27 Reg: Register,
28 [(); Reg::N]:,
29 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
30{
31 let a_start = u16::from(a.bits());
32 let b_start = u16::from(b.bits());
33 let count = u16::from(count);
34 if a_start < b_start + count && b_start < a_start + count {
38 return Err(ExecutionError::IllegalInstruction {
39 address: state.instruction_fetcher.old_pc(INSTRUCTION_SIZE),
40 });
41 }
42 Ok(())
43}
44
45#[inline(always)]
51#[doc(hidden)]
52pub fn check_no_overlap_asymmetric<Reg, ExtState, Memory, PC, IH, CustomError>(
53 state: &InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
54 a: VReg,
55 a_count: u8,
56 b: VReg,
57 b_count: u8,
58) -> Result<(), ExecutionError<Reg::Type, CustomError>>
59where
60 Reg: Register,
61 [(); Reg::N]:,
62 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
63{
64 let a_start = u16::from(a.bits());
65 let b_start = u16::from(b.bits());
66 let a_count = u16::from(a_count);
67 let b_count = u16::from(b_count);
68 if a_start < b_start + b_count && b_start < a_start + a_count {
71 return Err(ExecutionError::IllegalInstruction {
72 address: state.instruction_fetcher.old_pc(INSTRUCTION_SIZE),
73 });
74 }
75 Ok(())
76}
77
78#[inline(always)]
83pub unsafe fn read_element_0_u64<const VLENB: usize>(
84 vreg: &[[u8; VLENB]; 32],
85 base_reg: u8,
86 sew: Vsew,
87) -> u64 {
88 let sew_bytes = usize::from(sew.bytes());
89 let reg = unsafe { vreg.get_unchecked(usize::from(base_reg)) };
91 let mut buf = [0u8; 8];
92 unsafe { buf.get_unchecked_mut(..sew_bytes) }
94 .copy_from_slice(unsafe { reg.get_unchecked(..sew_bytes) });
95 u64::from_le_bytes(buf)
96}
97
98#[inline(always)]
103pub unsafe fn write_element_0_u64<const VLENB: usize>(
104 vreg: &mut [[u8; VLENB]; 32],
105 base_reg: u8,
106 sew: Vsew,
107 value: u64,
108) {
109 let sew_bytes = usize::from(sew.bytes());
110 let buf = value.to_le_bytes();
111 let reg = unsafe { vreg.get_unchecked_mut(usize::from(base_reg)) };
113 unsafe { reg.get_unchecked_mut(..sew_bytes) }
115 .copy_from_slice(unsafe { buf.get_unchecked(..sew_bytes) });
116}
117
118#[inline(always)]
133pub fn sign_extend_to_reg<Reg>(val: u64, sew: Vsew) -> Reg::Type
134where
135 Reg: Register,
136{
137 let sew_bits = u32::from(sew.bits());
138 let shift = u64::BITS - sew_bits;
140 let sign_extended = (val.cast_signed() << shift) >> shift;
142 let raw = sign_extended.cast_unsigned();
143 if Reg::XLEN == u64::BITS as u8 {
144 let lo = Reg::Type::from(raw as u32);
146 let hi = Reg::Type::from((raw >> u32::BITS) as u32);
147 lo | (hi << 32u8)
148 } else {
149 Reg::Type::from(raw as u32)
151 }
152}
153
154#[inline(always)]
164#[expect(clippy::too_many_arguments, reason = "Internal API")]
165#[doc(hidden)]
166pub unsafe fn execute_slideup<Reg, ExtState, Memory, PC, IH, CustomError>(
167 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
168 vd: VReg,
169 vs2: VReg,
170 vm: bool,
171 vl: u32,
172 vstart: u32,
173 sew: Vsew,
174 offset: u64,
175) where
176 Reg: Register,
177 [(); Reg::N]:,
178 ExtState: VectorRegistersExt<Reg, CustomError>,
179 [(); ExtState::ELEN as usize]:,
180 [(); ExtState::VLEN as usize]:,
181 [(); ExtState::VLENB as usize]:,
182 Memory: VirtualMemory,
183 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
184 CustomError: fmt::Debug,
185{
186 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
188 let vd_base = vd.bits();
189 let vs2_base = vs2.bits();
190 let start = vstart.max(offset.min(u64::from(u32::MAX)) as u32);
193 for i in start..vl {
194 if !mask_bit(&mask_buf, i) {
195 continue;
196 }
197 let src_idx = i as u64 - offset;
198 let val = unsafe {
200 read_element_u64(
201 state.ext_state.read_vreg(),
202 usize::from(vs2_base),
203 src_idx as u32,
204 sew,
205 )
206 };
207 unsafe {
209 write_element_u64(state.ext_state.write_vreg(), vd_base, i, sew, val);
210 }
211 }
212 state.ext_state.mark_vs_dirty();
213 state.ext_state.reset_vstart();
214}
215
216#[inline(always)]
225#[expect(clippy::too_many_arguments, reason = "Internal API")]
226#[doc(hidden)]
227pub unsafe fn execute_slidedown<Reg, ExtState, Memory, PC, IH, CustomError>(
228 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
229 vd: VReg,
230 vs2: VReg,
231 vm: bool,
232 vl: u32,
233 vstart: u32,
234 sew: Vsew,
235 vlmax: u32,
236 offset: u64,
237) where
238 Reg: Register,
239 [(); Reg::N]:,
240 ExtState: VectorRegistersExt<Reg, CustomError>,
241 [(); ExtState::ELEN as usize]:,
242 [(); ExtState::VLEN as usize]:,
243 [(); ExtState::VLENB as usize]:,
244 Memory: VirtualMemory,
245 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
246 CustomError: fmt::Debug,
247{
248 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
250 let vd_base = vd.bits();
251 let vs2_base = vs2.bits();
252 for i in vstart..vl {
253 if !mask_bit(&mask_buf, i) {
254 continue;
255 }
256 let val = if let Some(src_idx) = u64::from(i).checked_add(offset)
259 && src_idx < u64::from(vlmax)
260 {
261 unsafe {
263 read_element_u64(
264 state.ext_state.read_vreg(),
265 usize::from(vs2_base),
266 src_idx as u32,
267 sew,
268 )
269 }
270 } else {
271 0
272 };
273 unsafe {
275 write_element_u64(state.ext_state.write_vreg(), vd_base, i, sew, val);
276 }
277 }
278 state.ext_state.mark_vs_dirty();
279 state.ext_state.reset_vstart();
280}
281
282#[inline(always)]
293#[expect(clippy::too_many_arguments, reason = "Internal API")]
294#[doc(hidden)]
295pub unsafe fn execute_slide1up<Reg, ExtState, Memory, PC, IH, CustomError>(
296 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
297 vd: VReg,
298 vs2: VReg,
299 vm: bool,
300 vl: u32,
301 vstart: u32,
302 sew: Vsew,
303 scalar: u64,
304) where
305 Reg: Register,
306 [(); Reg::N]:,
307 ExtState: VectorRegistersExt<Reg, CustomError>,
308 [(); ExtState::ELEN as usize]:,
309 [(); ExtState::VLEN as usize]:,
310 [(); ExtState::VLENB as usize]:,
311 Memory: VirtualMemory,
312 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
313 CustomError: fmt::Debug,
314{
315 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
317 let vd_base = vd.bits();
318 let vs2_base = vs2.bits();
319 for i in vstart..vl {
320 if !mask_bit(&mask_buf, i) {
321 continue;
322 }
323 let val = if i == 0 {
324 scalar
325 } else {
326 unsafe {
328 read_element_u64(
329 state.ext_state.read_vreg(),
330 usize::from(vs2_base),
331 i - 1,
332 sew,
333 )
334 }
335 };
336 unsafe {
338 write_element_u64(state.ext_state.write_vreg(), vd_base, i, sew, val);
339 }
340 }
341 state.ext_state.mark_vs_dirty();
342 state.ext_state.reset_vstart();
343}
344
345#[inline(always)]
360#[expect(clippy::too_many_arguments, reason = "Internal API")]
361#[doc(hidden)]
362pub unsafe fn execute_slide1down<Reg, ExtState, Memory, PC, IH, CustomError>(
363 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
364 vd: VReg,
365 vs2: VReg,
366 vm: bool,
367 vl: u32,
368 vstart: u32,
369 sew: Vsew,
370 scalar: u64,
371) where
372 Reg: Register,
373 [(); Reg::N]:,
374 ExtState: VectorRegistersExt<Reg, CustomError>,
375 [(); ExtState::ELEN as usize]:,
376 [(); ExtState::VLEN as usize]:,
377 [(); ExtState::VLENB as usize]:,
378 Memory: VirtualMemory,
379 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
380 CustomError: fmt::Debug,
381{
382 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
384 let vd_base = vd.bits();
385 let vs2_base = vs2.bits();
386 for i in vstart..vl {
387 if !mask_bit(&mask_buf, i) {
388 continue;
389 }
390 let val = if i + 1 < vl {
391 unsafe {
393 read_element_u64(
394 state.ext_state.read_vreg(),
395 usize::from(vs2_base),
396 i + 1,
397 sew,
398 )
399 }
400 } else {
401 scalar
402 };
403 unsafe {
405 write_element_u64(state.ext_state.write_vreg(), vd_base, i, sew, val);
406 }
407 }
408 state.ext_state.mark_vs_dirty();
409 state.ext_state.reset_vstart();
410}
411
412#[inline(always)]
419#[expect(clippy::too_many_arguments, reason = "Internal API")]
420#[doc(hidden)]
421pub unsafe fn execute_rgather_vv<Reg, ExtState, Memory, PC, IH, CustomError>(
422 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
423 vd: VReg,
424 vs2: VReg,
425 vs1: VReg,
426 vm: bool,
427 vl: u32,
428 vstart: u32,
429 sew: Vsew,
430 vlmax: u32,
431) where
432 Reg: Register,
433 [(); Reg::N]:,
434 ExtState: VectorRegistersExt<Reg, CustomError>,
435 [(); ExtState::ELEN as usize]:,
436 [(); ExtState::VLEN as usize]:,
437 [(); ExtState::VLENB as usize]:,
438 Memory: VirtualMemory,
439 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
440 CustomError: fmt::Debug,
441{
442 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
444 let vd_base = vd.bits();
445 let vs2_base = vs2.bits();
446 let vs1_base = vs1.bits();
447 for i in vstart..vl {
448 if !mask_bit(&mask_buf, i) {
449 continue;
450 }
451 let index =
453 unsafe { read_element_u64(state.ext_state.read_vreg(), usize::from(vs1_base), i, sew) };
454 let val = if index < u64::from(vlmax) {
455 unsafe {
457 read_element_u64(
458 state.ext_state.read_vreg(),
459 usize::from(vs2_base),
460 index as u32,
461 sew,
462 )
463 }
464 } else {
465 0u64
466 };
467 unsafe {
469 write_element_u64(state.ext_state.write_vreg(), vd_base, i, sew, val);
470 }
471 }
472 state.ext_state.mark_vs_dirty();
473 state.ext_state.reset_vstart();
474}
475
476#[inline(always)]
483#[expect(clippy::too_many_arguments, reason = "Internal API")]
484#[doc(hidden)]
485pub unsafe fn execute_rgather_scalar<Reg, ExtState, Memory, PC, IH, CustomError>(
486 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
487 vd: VReg,
488 vs2: VReg,
489 vm: bool,
490 vl: u32,
491 vstart: u32,
492 sew: Vsew,
493 vlmax: u32,
494 index: u64,
495) where
496 Reg: Register,
497 [(); Reg::N]:,
498 ExtState: VectorRegistersExt<Reg, CustomError>,
499 [(); ExtState::ELEN as usize]:,
500 [(); ExtState::VLEN as usize]:,
501 [(); ExtState::VLENB as usize]:,
502 Memory: VirtualMemory,
503 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
504 CustomError: fmt::Debug,
505{
506 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
508 let vd_base = vd.bits();
509 let vs2_base = vs2.bits();
510 let val = if index < u64::from(vlmax) {
512 unsafe {
514 read_element_u64(
515 state.ext_state.read_vreg(),
516 usize::from(vs2_base),
517 index as u32,
518 sew,
519 )
520 }
521 } else {
522 0u64
523 };
524 for i in vstart..vl {
525 if !mask_bit(&mask_buf, i) {
526 continue;
527 }
528 unsafe {
530 write_element_u64(state.ext_state.write_vreg(), vd_base, i, sew, val);
531 }
532 }
533 state.ext_state.mark_vs_dirty();
534 state.ext_state.reset_vstart();
535}
536
537#[inline(always)]
548#[expect(clippy::too_many_arguments, reason = "Internal API")]
549#[doc(hidden)]
550pub unsafe fn execute_rgatherei16<Reg, ExtState, Memory, PC, IH, CustomError>(
551 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
552 vd: VReg,
553 vs2: VReg,
554 vs1: VReg,
555 vm: bool,
556 vl: u32,
557 vstart: u32,
558 sew: Vsew,
559 vlmax: u32,
560 index_group_regs: u8,
561) where
562 Reg: Register,
563 [(); Reg::N]:,
564 ExtState: VectorRegistersExt<Reg, CustomError>,
565 [(); ExtState::ELEN as usize]:,
566 [(); ExtState::VLEN as usize]:,
567 [(); ExtState::VLENB as usize]:,
568 Memory: VirtualMemory,
569 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
570 CustomError: fmt::Debug,
571{
572 let index_capacity = u32::from(index_group_regs) * (ExtState::VLENB / 2);
575 debug_assert!(
578 vl <= vlmax && vl <= index_capacity,
579 "vl={vl} exceeds vlmax={vlmax} or index_capacity={index_capacity}"
580 );
581 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
583 let vd_base = vd.bits();
584 let vs2_base = vs2.bits();
585 let vs1_base = vs1.bits();
586 for i in vstart..vl {
587 if !mask_bit(&mask_buf, i) {
588 continue;
589 }
590 let index = unsafe {
594 read_element_u64(
595 state.ext_state.read_vreg(),
596 usize::from(vs1_base),
597 i,
598 Vsew::E16,
599 )
600 };
601 let val = if index < u64::from(vlmax) {
602 unsafe {
604 read_element_u64(
605 state.ext_state.read_vreg(),
606 usize::from(vs2_base),
607 index as u32,
608 sew,
609 )
610 }
611 } else {
612 0u64
613 };
614 unsafe {
616 write_element_u64(state.ext_state.write_vreg(), vd_base, i, sew, val);
617 }
618 }
619 state.ext_state.mark_vs_dirty();
620 state.ext_state.reset_vstart();
621}
622
623#[inline(always)]
634#[expect(clippy::too_many_arguments, reason = "Internal API")]
635#[doc(hidden)]
636pub unsafe fn execute_merge_vv<Reg, ExtState, Memory, PC, IH, CustomError>(
637 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
638 vd: VReg,
639 vs2: VReg,
640 vs1: VReg,
641 vm: bool,
642 vl: u32,
643 vstart: u32,
644 sew: Vsew,
645) where
646 Reg: Register,
647 [(); Reg::N]:,
648 ExtState: VectorRegistersExt<Reg, CustomError>,
649 [(); ExtState::ELEN as usize]:,
650 [(); ExtState::VLEN as usize]:,
651 [(); ExtState::VLENB as usize]:,
652 Memory: VirtualMemory,
653 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
654 CustomError: fmt::Debug,
655{
656 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
659 let vd_base = vd.bits();
660 let vs1_base = vs1.bits();
661 let vs2_base = vs2.bits();
662 for i in vstart..vl {
663 let mask_set = mask_bit(&mask_buf, i);
664 let val = if mask_set {
665 unsafe { read_element_u64(state.ext_state.read_vreg(), usize::from(vs1_base), i, sew) }
667 } else {
668 unsafe { read_element_u64(state.ext_state.read_vreg(), usize::from(vs2_base), i, sew) }
671 };
672 unsafe { write_element_u64(state.ext_state.write_vreg(), vd_base, i, sew, val) };
674 }
675 state.ext_state.mark_vs_dirty();
676 state.ext_state.reset_vstart();
677}
678
679#[inline(always)]
690#[expect(clippy::too_many_arguments, reason = "Internal API")]
691#[doc(hidden)]
692pub unsafe fn execute_merge_scalar<Reg, ExtState, Memory, PC, IH, CustomError>(
693 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
694 vd: VReg,
695 vs2: VReg,
696 vm: bool,
697 vl: u32,
698 vstart: u32,
699 sew: Vsew,
700 scalar: u64,
701) where
702 Reg: Register,
703 [(); Reg::N]:,
704 ExtState: VectorRegistersExt<Reg, CustomError>,
705 [(); ExtState::ELEN as usize]:,
706 [(); ExtState::VLEN as usize]:,
707 [(); ExtState::VLENB as usize]:,
708 Memory: VirtualMemory,
709 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
710 CustomError: fmt::Debug,
711{
712 let mask_buf = unsafe { snapshot_mask(state.ext_state.read_vreg(), vm, vl) };
714 let vd_base = vd.bits();
715 let vs2_base = vs2.bits();
716 for i in vstart..vl {
717 let val = if mask_bit(&mask_buf, i) {
718 scalar
719 } else {
720 unsafe { read_element_u64(state.ext_state.read_vreg(), usize::from(vs2_base), i, sew) }
722 };
723 unsafe { write_element_u64(state.ext_state.write_vreg(), vd_base, i, sew, val) };
725 }
726 state.ext_state.mark_vs_dirty();
727 state.ext_state.reset_vstart();
728}
729
730#[inline(always)]
745#[doc(hidden)]
746pub unsafe fn execute_compress<Reg, ExtState, Memory, PC, IH, CustomError>(
747 state: &mut InterpreterState<Reg, ExtState, Memory, PC, IH, CustomError>,
748 vd: VReg,
749 vs2: VReg,
750 vs1: VReg,
751 vl: u32,
752 vstart: u32,
753 sew: Vsew,
754) where
755 Reg: Register,
756 [(); Reg::N]:,
757 ExtState: VectorRegistersExt<Reg, CustomError>,
758 [(); ExtState::ELEN as usize]:,
759 [(); ExtState::VLEN as usize]:,
760 [(); ExtState::VLENB as usize]:,
761 Memory: VirtualMemory,
762 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
763 CustomError: fmt::Debug,
764{
765 let vd_base = vd.bits();
766 let vs2_base = vs2.bits();
767 let vs1_base = vs1.bits();
768 let mask_bytes = vl.div_ceil(u8::BITS) as usize;
770 let vreg = state.ext_state.read_vreg();
771 let mut vs1_buf = [0u8; { ExtState::VLENB as usize }];
772 unsafe {
774 vs1_buf.get_unchecked_mut(..mask_bytes).copy_from_slice(
775 vreg.get_unchecked(usize::from(vs1_base))
776 .get_unchecked(..mask_bytes),
777 );
778 }
779 if vstart >= vl {
783 state.ext_state.mark_vs_dirty();
784 state.ext_state.reset_vstart();
785 return;
786 }
787 let mut out_idx = (0..vstart).filter(|&j| mask_bit(&vs1_buf, j)).count() as u32;
792 for i in vstart..vl {
793 if !mask_bit(&vs1_buf, i) {
794 continue;
795 }
796 let val =
798 unsafe { read_element_u64(state.ext_state.read_vreg(), usize::from(vs2_base), i, sew) };
799 unsafe {
801 write_element_u64(state.ext_state.write_vreg(), vd_base, out_idx, sew, val);
802 }
803 out_idx += 1;
804 }
805 state.ext_state.mark_vs_dirty();
806 state.ext_state.reset_vstart();
807}
808
809#[inline(always)]
822#[doc(hidden)]
823pub unsafe fn execute_whole_reg_move<const VLENB: usize>(
824 vreg: &mut [[u8; VLENB]; 32],
825 dst_base: u8,
826 src_base: u8,
827 count: u8,
828) {
829 let count = usize::from(count);
830 debug_assert!(count <= 8, "count must be <= 8 for vmvNr");
831 let mut tmp = [[0u8; VLENB]; 8];
834 for (k, item) in tmp.iter_mut().enumerate().take(count) {
835 let src_idx = usize::from(src_base) + k;
836 *item = *unsafe { vreg.get_unchecked(src_idx) };
838 }
839 for (k, item) in tmp.iter().enumerate().take(count) {
840 let dst_idx = usize::from(dst_base) + k;
841 *unsafe { vreg.get_unchecked_mut(dst_idx) } = *item;
843 }
844}