1use crate::v::vector_registers::VectorRegistersExt;
4pub use crate::v::zve64x::arith::zve64x_arith_helpers::check_vreg_group_alignment;
5use crate::v::zve64x::arith::zve64x_arith_helpers::{read_element_u64, write_element_u64};
6use crate::v::zve64x::load::zve64x_load_helpers::{mask_bit, snapshot_mask};
7use crate::v::zve64x::zve64x_helpers::INSTRUCTION_SIZE;
8use crate::{ExecutionError, ProgramCounter};
9use ab_riscv_primitives::prelude::*;
10use core::fmt;
11
12#[inline(always)]
17#[doc(hidden)]
18pub fn check_no_overlap<Reg, Memory, PC, CustomError>(
19 program_counter: &PC,
20 a: VReg,
21 b: VReg,
22 count: u8,
23) -> Result<(), ExecutionError<Reg::Type, CustomError>>
24where
25 Reg: Register,
26 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
27{
28 let a_start = u16::from(a.bits());
29 let b_start = u16::from(b.bits());
30 let count = u16::from(count);
31 if a_start < b_start + count && b_start < a_start + count {
35 return Err(ExecutionError::IllegalInstruction {
36 address: program_counter.old_pc(INSTRUCTION_SIZE),
37 });
38 }
39 Ok(())
40}
41
42#[inline(always)]
48#[doc(hidden)]
49pub fn check_no_overlap_asymmetric<Reg, Memory, PC, CustomError>(
50 program_counter: &PC,
51 a: VReg,
52 a_count: u8,
53 b: VReg,
54 b_count: u8,
55) -> Result<(), ExecutionError<Reg::Type, CustomError>>
56where
57 Reg: Register,
58 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
59{
60 let a_start = u16::from(a.bits());
61 let b_start = u16::from(b.bits());
62 let a_count = u16::from(a_count);
63 let b_count = u16::from(b_count);
64 if a_start < b_start + b_count && b_start < a_start + a_count {
67 return Err(ExecutionError::IllegalInstruction {
68 address: program_counter.old_pc(INSTRUCTION_SIZE),
69 });
70 }
71 Ok(())
72}
73
74#[inline(always)]
79pub unsafe fn read_element_0_u64<const VLENB: usize>(
80 vreg: &[[u8; VLENB]; 32],
81 base_reg: u8,
82 sew: Vsew,
83) -> u64 {
84 let sew_bytes = usize::from(sew.bytes());
85 let reg = unsafe { vreg.get_unchecked(usize::from(base_reg)) };
87 let mut buf = [0u8; 8];
88 unsafe { buf.get_unchecked_mut(..sew_bytes) }
90 .copy_from_slice(unsafe { reg.get_unchecked(..sew_bytes) });
91 u64::from_le_bytes(buf)
92}
93
94#[inline(always)]
99pub unsafe fn write_element_0_u64<const VLENB: usize>(
100 vreg: &mut [[u8; VLENB]; 32],
101 base_reg: u8,
102 sew: Vsew,
103 value: u64,
104) {
105 let sew_bytes = usize::from(sew.bytes());
106 let buf = value.to_le_bytes();
107 let reg = unsafe { vreg.get_unchecked_mut(usize::from(base_reg)) };
109 unsafe { reg.get_unchecked_mut(..sew_bytes) }
111 .copy_from_slice(unsafe { buf.get_unchecked(..sew_bytes) });
112}
113
114#[inline(always)]
129pub fn sign_extend_to_reg<Reg>(val: u64, sew: Vsew) -> Reg::Type
130where
131 Reg: Register,
132{
133 let sew_bits = u32::from(sew.bits());
134 let shift = u64::BITS - sew_bits;
136 let sign_extended = (val.cast_signed() << shift) >> shift;
138 let raw = sign_extended.cast_unsigned();
139 if Reg::XLEN == u64::BITS as u8 {
140 let lo = Reg::Type::from(raw as u32);
142 let hi = Reg::Type::from((raw >> u32::BITS) as u32);
143 lo | (hi << 32u8)
144 } else {
145 Reg::Type::from(raw as u32)
147 }
148}
149
150#[inline(always)]
160#[expect(clippy::too_many_arguments, reason = "Internal API")]
161#[doc(hidden)]
162pub unsafe fn execute_slideup<Reg, ExtState, CustomError>(
163 ext_state: &mut ExtState,
164 vd: VReg,
165 vs2: VReg,
166 vm: bool,
167 vl: u32,
168 vstart: u32,
169 sew: Vsew,
170 offset: u64,
171) where
172 Reg: Register,
173 ExtState: VectorRegistersExt<Reg, CustomError>,
174 [(); ExtState::ELEN as usize]:,
175 [(); ExtState::VLEN as usize]:,
176 [(); ExtState::VLENB as usize]:,
177 CustomError: fmt::Debug,
178{
179 let mask_buf = unsafe { snapshot_mask(ext_state.read_vreg(), vm, vl) };
181 let vd_base = vd.bits();
182 let vs2_base = vs2.bits();
183 let start = vstart.max(offset.min(u64::from(u32::MAX)) as u32);
186 for i in start..vl {
187 if !mask_bit(&mask_buf, i) {
188 continue;
189 }
190 let src_idx = i as u64 - offset;
191 let val = unsafe {
193 read_element_u64(
194 ext_state.read_vreg(),
195 usize::from(vs2_base),
196 src_idx as u32,
197 sew,
198 )
199 };
200 unsafe {
202 write_element_u64(ext_state.write_vreg(), vd_base, i, sew, val);
203 }
204 }
205 ext_state.mark_vs_dirty();
206 ext_state.reset_vstart();
207}
208
209#[inline(always)]
218#[expect(clippy::too_many_arguments, reason = "Internal API")]
219#[doc(hidden)]
220pub unsafe fn execute_slidedown<Reg, ExtState, CustomError>(
221 ext_state: &mut ExtState,
222 vd: VReg,
223 vs2: VReg,
224 vm: bool,
225 vl: u32,
226 vstart: u32,
227 sew: Vsew,
228 vlmax: u32,
229 offset: u64,
230) where
231 Reg: Register,
232 ExtState: VectorRegistersExt<Reg, CustomError>,
233 [(); ExtState::ELEN as usize]:,
234 [(); ExtState::VLEN as usize]:,
235 [(); ExtState::VLENB as usize]:,
236 CustomError: fmt::Debug,
237{
238 let mask_buf = unsafe { snapshot_mask(ext_state.read_vreg(), vm, vl) };
240 let vd_base = vd.bits();
241 let vs2_base = vs2.bits();
242 for i in vstart..vl {
243 if !mask_bit(&mask_buf, i) {
244 continue;
245 }
246 let val = if let Some(src_idx) = u64::from(i).checked_add(offset)
249 && src_idx < u64::from(vlmax)
250 {
251 unsafe {
253 read_element_u64(
254 ext_state.read_vreg(),
255 usize::from(vs2_base),
256 src_idx as u32,
257 sew,
258 )
259 }
260 } else {
261 0
262 };
263 unsafe {
265 write_element_u64(ext_state.write_vreg(), vd_base, i, sew, val);
266 }
267 }
268 ext_state.mark_vs_dirty();
269 ext_state.reset_vstart();
270}
271
272#[inline(always)]
283#[expect(clippy::too_many_arguments, reason = "Internal API")]
284#[doc(hidden)]
285pub unsafe fn execute_slide1up<Reg, ExtState, CustomError>(
286 ext_state: &mut ExtState,
287 vd: VReg,
288 vs2: VReg,
289 vm: bool,
290 vl: u32,
291 vstart: u32,
292 sew: Vsew,
293 scalar: u64,
294) where
295 Reg: Register,
296 ExtState: VectorRegistersExt<Reg, CustomError>,
297 [(); ExtState::ELEN as usize]:,
298 [(); ExtState::VLEN as usize]:,
299 [(); ExtState::VLENB as usize]:,
300 CustomError: fmt::Debug,
301{
302 let mask_buf = unsafe { snapshot_mask(ext_state.read_vreg(), vm, vl) };
304 let vd_base = vd.bits();
305 let vs2_base = vs2.bits();
306 for i in vstart..vl {
307 if !mask_bit(&mask_buf, i) {
308 continue;
309 }
310 let val = if i == 0 {
311 scalar
312 } else {
313 unsafe { read_element_u64(ext_state.read_vreg(), usize::from(vs2_base), i - 1, sew) }
315 };
316 unsafe {
318 write_element_u64(ext_state.write_vreg(), vd_base, i, sew, val);
319 }
320 }
321 ext_state.mark_vs_dirty();
322 ext_state.reset_vstart();
323}
324
325#[inline(always)]
340#[expect(clippy::too_many_arguments, reason = "Internal API")]
341#[doc(hidden)]
342pub unsafe fn execute_slide1down<Reg, ExtState, CustomError>(
343 ext_state: &mut ExtState,
344 vd: VReg,
345 vs2: VReg,
346 vm: bool,
347 vl: u32,
348 vstart: u32,
349 sew: Vsew,
350 scalar: u64,
351) where
352 Reg: Register,
353 ExtState: VectorRegistersExt<Reg, CustomError>,
354 [(); ExtState::ELEN as usize]:,
355 [(); ExtState::VLEN as usize]:,
356 [(); ExtState::VLENB as usize]:,
357 CustomError: fmt::Debug,
358{
359 let mask_buf = unsafe { snapshot_mask(ext_state.read_vreg(), vm, vl) };
361 let vd_base = vd.bits();
362 let vs2_base = vs2.bits();
363 for i in vstart..vl {
364 if !mask_bit(&mask_buf, i) {
365 continue;
366 }
367 let val = if i + 1 < vl {
368 unsafe { read_element_u64(ext_state.read_vreg(), usize::from(vs2_base), i + 1, sew) }
370 } else {
371 scalar
372 };
373 unsafe {
375 write_element_u64(ext_state.write_vreg(), vd_base, i, sew, val);
376 }
377 }
378 ext_state.mark_vs_dirty();
379 ext_state.reset_vstart();
380}
381
382#[inline(always)]
389#[expect(clippy::too_many_arguments, reason = "Internal API")]
390#[doc(hidden)]
391pub unsafe fn execute_rgather_vv<Reg, ExtState, CustomError>(
392 ext_state: &mut ExtState,
393 vd: VReg,
394 vs2: VReg,
395 vs1: VReg,
396 vm: bool,
397 vl: u32,
398 vstart: u32,
399 sew: Vsew,
400 vlmax: u32,
401) where
402 Reg: Register,
403 ExtState: VectorRegistersExt<Reg, CustomError>,
404 [(); ExtState::ELEN as usize]:,
405 [(); ExtState::VLEN as usize]:,
406 [(); ExtState::VLENB as usize]:,
407 CustomError: fmt::Debug,
408{
409 let mask_buf = unsafe { snapshot_mask(ext_state.read_vreg(), vm, vl) };
411 let vd_base = vd.bits();
412 let vs2_base = vs2.bits();
413 let vs1_base = vs1.bits();
414 for i in vstart..vl {
415 if !mask_bit(&mask_buf, i) {
416 continue;
417 }
418 let index =
420 unsafe { read_element_u64(ext_state.read_vreg(), usize::from(vs1_base), i, sew) };
421 let val = if index < u64::from(vlmax) {
422 unsafe {
424 read_element_u64(
425 ext_state.read_vreg(),
426 usize::from(vs2_base),
427 index as u32,
428 sew,
429 )
430 }
431 } else {
432 0u64
433 };
434 unsafe {
436 write_element_u64(ext_state.write_vreg(), vd_base, i, sew, val);
437 }
438 }
439 ext_state.mark_vs_dirty();
440 ext_state.reset_vstart();
441}
442
443#[inline(always)]
450#[expect(clippy::too_many_arguments, reason = "Internal API")]
451#[doc(hidden)]
452pub unsafe fn execute_rgather_scalar<Reg, ExtState, CustomError>(
453 ext_state: &mut ExtState,
454 vd: VReg,
455 vs2: VReg,
456 vm: bool,
457 vl: u32,
458 vstart: u32,
459 sew: Vsew,
460 vlmax: u32,
461 index: u64,
462) where
463 Reg: Register,
464 ExtState: VectorRegistersExt<Reg, CustomError>,
465 [(); ExtState::ELEN as usize]:,
466 [(); ExtState::VLEN as usize]:,
467 [(); ExtState::VLENB as usize]:,
468 CustomError: fmt::Debug,
469{
470 let mask_buf = unsafe { snapshot_mask(ext_state.read_vreg(), vm, vl) };
472 let vd_base = vd.bits();
473 let vs2_base = vs2.bits();
474 let val = if index < u64::from(vlmax) {
476 unsafe {
478 read_element_u64(
479 ext_state.read_vreg(),
480 usize::from(vs2_base),
481 index as u32,
482 sew,
483 )
484 }
485 } else {
486 0u64
487 };
488 for i in vstart..vl {
489 if !mask_bit(&mask_buf, i) {
490 continue;
491 }
492 unsafe {
494 write_element_u64(ext_state.write_vreg(), vd_base, i, sew, val);
495 }
496 }
497 ext_state.mark_vs_dirty();
498 ext_state.reset_vstart();
499}
500
501#[inline(always)]
512#[expect(clippy::too_many_arguments, reason = "Internal API")]
513#[doc(hidden)]
514pub unsafe fn execute_rgatherei16<Reg, ExtState, CustomError>(
515 ext_state: &mut ExtState,
516 vd: VReg,
517 vs2: VReg,
518 vs1: VReg,
519 vm: bool,
520 vl: u32,
521 vstart: u32,
522 sew: Vsew,
523 vlmax: u32,
524 index_group_regs: u8,
525) where
526 Reg: Register,
527 ExtState: VectorRegistersExt<Reg, CustomError>,
528 [(); ExtState::ELEN as usize]:,
529 [(); ExtState::VLEN as usize]:,
530 [(); ExtState::VLENB as usize]:,
531 CustomError: fmt::Debug,
532{
533 let index_capacity = u32::from(index_group_regs) * (ExtState::VLENB / 2);
536 debug_assert!(
539 vl <= vlmax && vl <= index_capacity,
540 "vl={vl} exceeds vlmax={vlmax} or index_capacity={index_capacity}"
541 );
542 let mask_buf = unsafe { snapshot_mask(ext_state.read_vreg(), vm, vl) };
544 let vd_base = vd.bits();
545 let vs2_base = vs2.bits();
546 let vs1_base = vs1.bits();
547 for i in vstart..vl {
548 if !mask_bit(&mask_buf, i) {
549 continue;
550 }
551 let index =
555 unsafe { read_element_u64(ext_state.read_vreg(), usize::from(vs1_base), i, Vsew::E16) };
556 let val = if index < u64::from(vlmax) {
557 unsafe {
559 read_element_u64(
560 ext_state.read_vreg(),
561 usize::from(vs2_base),
562 index as u32,
563 sew,
564 )
565 }
566 } else {
567 0u64
568 };
569 unsafe {
571 write_element_u64(ext_state.write_vreg(), vd_base, i, sew, val);
572 }
573 }
574 ext_state.mark_vs_dirty();
575 ext_state.reset_vstart();
576}
577
578#[inline(always)]
589#[expect(clippy::too_many_arguments, reason = "Internal API")]
590#[doc(hidden)]
591pub unsafe fn execute_merge_vv<Reg, ExtState, CustomError>(
592 ext_state: &mut ExtState,
593 vd: VReg,
594 vs2: VReg,
595 vs1: VReg,
596 vm: bool,
597 vl: u32,
598 vstart: u32,
599 sew: Vsew,
600) where
601 Reg: Register,
602 ExtState: VectorRegistersExt<Reg, CustomError>,
603 [(); ExtState::ELEN as usize]:,
604 [(); ExtState::VLEN as usize]:,
605 [(); ExtState::VLENB as usize]:,
606 CustomError: fmt::Debug,
607{
608 let mask_buf = unsafe { snapshot_mask(ext_state.read_vreg(), vm, vl) };
611 let vd_base = vd.bits();
612 let vs1_base = vs1.bits();
613 let vs2_base = vs2.bits();
614 for i in vstart..vl {
615 let mask_set = mask_bit(&mask_buf, i);
616 let val = if mask_set {
617 unsafe { read_element_u64(ext_state.read_vreg(), usize::from(vs1_base), i, sew) }
619 } else {
620 unsafe { read_element_u64(ext_state.read_vreg(), usize::from(vs2_base), i, sew) }
623 };
624 unsafe { write_element_u64(ext_state.write_vreg(), vd_base, i, sew, val) };
626 }
627 ext_state.mark_vs_dirty();
628 ext_state.reset_vstart();
629}
630
631#[inline(always)]
642#[expect(clippy::too_many_arguments, reason = "Internal API")]
643#[doc(hidden)]
644pub unsafe fn execute_merge_scalar<Reg, ExtState, CustomError>(
645 ext_state: &mut ExtState,
646 vd: VReg,
647 vs2: VReg,
648 vm: bool,
649 vl: u32,
650 vstart: u32,
651 sew: Vsew,
652 scalar: u64,
653) where
654 Reg: Register,
655 ExtState: VectorRegistersExt<Reg, CustomError>,
656 [(); ExtState::ELEN as usize]:,
657 [(); ExtState::VLEN as usize]:,
658 [(); ExtState::VLENB as usize]:,
659 CustomError: fmt::Debug,
660{
661 let mask_buf = unsafe { snapshot_mask(ext_state.read_vreg(), vm, vl) };
663 let vd_base = vd.bits();
664 let vs2_base = vs2.bits();
665 for i in vstart..vl {
666 let val = if mask_bit(&mask_buf, i) {
667 scalar
668 } else {
669 unsafe { read_element_u64(ext_state.read_vreg(), usize::from(vs2_base), i, sew) }
671 };
672 unsafe { write_element_u64(ext_state.write_vreg(), vd_base, i, sew, val) };
674 }
675 ext_state.mark_vs_dirty();
676 ext_state.reset_vstart();
677}
678
679#[inline(always)]
690#[doc(hidden)]
691pub unsafe fn execute_compress<Reg, ExtState, CustomError>(
692 ext_state: &mut ExtState,
693 vd: VReg,
694 vs2: VReg,
695 vs1: VReg,
696 vl: u32,
697 sew: Vsew,
698) where
699 Reg: Register,
700 ExtState: VectorRegistersExt<Reg, CustomError>,
701 [(); ExtState::ELEN as usize]:,
702 [(); ExtState::VLEN as usize]:,
703 [(); ExtState::VLENB as usize]:,
704 CustomError: fmt::Debug,
705{
706 let vd_base = vd.bits();
707 let vs2_base = vs2.bits();
708 let vs1_base = vs1.bits();
709 let mask_bytes = vl.div_ceil(u8::BITS) as usize;
710 let vreg = ext_state.read_vreg();
711 let mut vs1_buf = [0u8; { ExtState::VLENB as usize }];
712 unsafe {
714 vs1_buf.get_unchecked_mut(..mask_bytes).copy_from_slice(
715 vreg.get_unchecked(usize::from(vs1_base))
716 .get_unchecked(..mask_bytes),
717 );
718 }
719 let mut out_idx = 0u32;
720 for i in 0..vl {
721 if !mask_bit(&vs1_buf, i) {
722 continue;
723 }
724 let val = unsafe { read_element_u64(ext_state.read_vreg(), usize::from(vs2_base), i, sew) };
726 unsafe {
728 write_element_u64(ext_state.write_vreg(), vd_base, out_idx, sew, val);
729 }
730 out_idx += 1;
731 }
732 ext_state.mark_vs_dirty();
733 ext_state.reset_vstart();
734}
735
736#[inline(always)]
749#[doc(hidden)]
750pub unsafe fn execute_whole_reg_move<const VLENB: usize>(
751 vreg: &mut [[u8; VLENB]; 32],
752 dst_base: u8,
753 src_base: u8,
754 count: u8,
755) {
756 let count = usize::from(count);
757 debug_assert!(count <= 8, "count must be <= 8 for vmvNr");
758 let mut tmp = [[0u8; VLENB]; 8];
761 for (k, item) in tmp.iter_mut().enumerate().take(count) {
762 let src_idx = usize::from(src_base) + k;
763 *item = *unsafe { vreg.get_unchecked(src_idx) };
765 }
766 for (k, item) in tmp.iter().enumerate().take(count) {
767 let dst_idx = usize::from(dst_base) + k;
768 *unsafe { vreg.get_unchecked_mut(dst_idx) } = *item;
770 }
771}