1use crate::v::vector_registers::{VectorRegisterFile, VectorRegistersExt};
4pub use crate::v::zvexx::arith::zvexx_arith_helpers::check_vreg_group_alignment;
5use crate::v::zvexx::arith::zvexx_arith_helpers::{read_element_u64, write_element_u64};
6use crate::v::zvexx::load::zvexx_load_helpers::{mask_bit, snapshot_mask};
7use crate::v::zvexx::zvexx_helpers::INSTRUCTION_SIZE;
8use crate::{ExecutionError, ProgramCounter};
9use ab_riscv_primitives::prelude::*;
10use core::fmt;
11use core::hint::cold_path;
12
13#[inline(always)]
18#[doc(hidden)]
19pub fn check_no_overlap<Reg, Memory, PC, CustomError>(
20 program_counter: &PC,
21 a: VReg,
22 b: VReg,
23 count: u8,
24) -> Result<(), ExecutionError<Reg::Type, CustomError>>
25where
26 Reg: Register,
27 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
28{
29 let a_start = u16::from(a.to_bits());
30 let b_start = u16::from(b.to_bits());
31 let count = u16::from(count);
32 if a_start < b_start + count && b_start < a_start + count {
36 cold_path();
37 return Err(ExecutionError::IllegalInstruction {
38 address: program_counter.old_pc(INSTRUCTION_SIZE),
39 });
40 }
41 Ok(())
42}
43
44#[inline(always)]
50#[doc(hidden)]
51pub fn check_no_overlap_asymmetric<Reg, Memory, PC, CustomError>(
52 program_counter: &PC,
53 a: VReg,
54 a_count: u8,
55 b: VReg,
56 b_count: u8,
57) -> Result<(), ExecutionError<Reg::Type, CustomError>>
58where
59 Reg: Register,
60 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
61{
62 let a_start = u16::from(a.to_bits());
63 let b_start = u16::from(b.to_bits());
64 let a_count = u16::from(a_count);
65 let b_count = u16::from(b_count);
66 if a_start < b_start + b_count && b_start < a_start + a_count {
69 cold_path();
70 return Err(ExecutionError::IllegalInstruction {
71 address: program_counter.old_pc(INSTRUCTION_SIZE),
72 });
73 }
74 Ok(())
75}
76
77#[inline(always)]
82pub unsafe fn read_element_0_u64<const VLENB: usize>(
83 vregs: &VectorRegisterFile<VLENB>,
84 base_reg: VReg,
85 sew: Vsew,
86) -> u64 {
87 let sew_bytes = usize::from(sew.bytes_width());
88 let reg = vregs.get(base_reg);
89 let mut buf = [0u8; 8];
90 unsafe {
92 buf.get_unchecked_mut(..sew_bytes)
93 .copy_from_slice(reg.get_unchecked(..sew_bytes));
94 }
95 u64::from_le_bytes(buf)
96}
97
98#[inline(always)]
103pub unsafe fn write_element_0_u64<const VLENB: usize>(
104 vregs: &mut VectorRegisterFile<VLENB>,
105 base_reg: VReg,
106 sew: Vsew,
107 value: u64,
108) {
109 let sew_bytes = usize::from(sew.bytes_width());
110 let buf = value.to_le_bytes();
111 let reg = vregs.get_mut(base_reg);
112 unsafe {
114 reg.get_unchecked_mut(..sew_bytes)
115 .copy_from_slice(buf.get_unchecked(..sew_bytes));
116 }
117}
118
119#[inline(always)]
134pub fn sign_extend_to_reg<Reg>(val: u64, sew: Vsew) -> Reg::Type
135where
136 Reg: Register,
137{
138 let sew_bits = u32::from(sew.bits_width());
139 let shift = u64::BITS - sew_bits;
141 let sign_extended = (val.cast_signed() << shift) >> shift;
143 let raw = sign_extended.cast_unsigned();
144 if Reg::XLEN == u64::BITS as u8 {
145 let lo = Reg::Type::from(raw as u32);
147 let hi = Reg::Type::from((raw >> u32::BITS) as u32);
148 lo | (hi << 32u8)
149 } else {
150 Reg::Type::from(raw as u32)
152 }
153}
154
155#[inline(always)]
165#[doc(hidden)]
166pub unsafe fn execute_slideup<Reg, ExtState, CustomError>(
167 ext_state: &mut ExtState,
168 vd: VReg,
169 vs2: VReg,
170 vm: bool,
171 sew: Vsew,
172 offset: u64,
173) where
174 Reg: Register,
175 ExtState: VectorRegistersExt<Reg, CustomError>,
176 [(); ExtState::ELEN as usize]:,
177 [(); ExtState::VLEN as usize]:,
178 [(); ExtState::VLENB as usize]:,
179 CustomError: fmt::Debug,
180{
181 let vl = ext_state.vl();
182 let vstart = ext_state.vstart();
183 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
185 let start = u32::from(vstart).max(offset.min(u64::from(u32::MAX)) as u32);
188 for i in start..vl {
189 if !mask_bit(&mask_buf, i) {
190 continue;
191 }
192 let src_idx = u64::from(i) - offset;
193 let val = unsafe { read_element_u64(ext_state.read_vregs(), vs2, src_idx as u32, sew) };
195 unsafe {
197 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
198 }
199 }
200 ext_state.mark_vs_dirty();
201 ext_state.reset_vstart();
202}
203
204#[inline(always)]
213#[doc(hidden)]
214pub unsafe fn execute_slidedown<Reg, ExtState, CustomError>(
215 ext_state: &mut ExtState,
216 vd: VReg,
217 vs2: VReg,
218 vm: bool,
219 sew: Vsew,
220 vlmax: u32,
221 offset: u64,
222) where
223 Reg: Register,
224 ExtState: VectorRegistersExt<Reg, CustomError>,
225 [(); ExtState::ELEN as usize]:,
226 [(); ExtState::VLEN as usize]:,
227 [(); ExtState::VLENB as usize]:,
228 CustomError: fmt::Debug,
229{
230 let vl = ext_state.vl();
231 let vstart = ext_state.vstart();
232 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
234 for i in u32::from(vstart)..vl {
235 if !mask_bit(&mask_buf, i) {
236 continue;
237 }
238 let val = if let Some(src_idx) = u64::from(i).checked_add(offset)
241 && src_idx < u64::from(vlmax)
242 {
243 unsafe { read_element_u64(ext_state.read_vregs(), vs2, src_idx as u32, sew) }
245 } else {
246 0
247 };
248 unsafe {
250 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
251 }
252 }
253 ext_state.mark_vs_dirty();
254 ext_state.reset_vstart();
255}
256
257#[inline(always)]
268#[doc(hidden)]
269pub unsafe fn execute_slide1up<Reg, ExtState, CustomError>(
270 ext_state: &mut ExtState,
271 vd: VReg,
272 vs2: VReg,
273 vm: bool,
274 sew: Vsew,
275 scalar: u64,
276) where
277 Reg: Register,
278 ExtState: VectorRegistersExt<Reg, CustomError>,
279 [(); ExtState::ELEN as usize]:,
280 [(); ExtState::VLEN as usize]:,
281 [(); ExtState::VLENB as usize]:,
282 CustomError: fmt::Debug,
283{
284 let vl = ext_state.vl();
285 let vstart = ext_state.vstart();
286 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
288 for i in u32::from(vstart)..vl {
289 if !mask_bit(&mask_buf, i) {
290 continue;
291 }
292 let val = if i == 0 {
293 scalar
294 } else {
295 unsafe { read_element_u64(ext_state.read_vregs(), vs2, i - 1, sew) }
297 };
298 unsafe {
300 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
301 }
302 }
303 ext_state.mark_vs_dirty();
304 ext_state.reset_vstart();
305}
306
307#[inline(always)]
322#[doc(hidden)]
323pub unsafe fn execute_slide1down<Reg, ExtState, CustomError>(
324 ext_state: &mut ExtState,
325 vd: VReg,
326 vs2: VReg,
327 vm: bool,
328 sew: Vsew,
329 scalar: u64,
330) where
331 Reg: Register,
332 ExtState: VectorRegistersExt<Reg, CustomError>,
333 [(); ExtState::ELEN as usize]:,
334 [(); ExtState::VLEN as usize]:,
335 [(); ExtState::VLENB as usize]:,
336 CustomError: fmt::Debug,
337{
338 let vl = ext_state.vl();
339 let vstart = ext_state.vstart();
340 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
342 for i in u32::from(vstart)..vl {
343 if !mask_bit(&mask_buf, i) {
344 continue;
345 }
346 let val = if i + 1 < vl {
347 unsafe { read_element_u64(ext_state.read_vregs(), vs2, i + 1, sew) }
349 } else {
350 scalar
351 };
352 unsafe {
354 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
355 }
356 }
357 ext_state.mark_vs_dirty();
358 ext_state.reset_vstart();
359}
360
361#[inline(always)]
368#[doc(hidden)]
369pub unsafe fn execute_rgather_vv<Reg, ExtState, CustomError>(
370 ext_state: &mut ExtState,
371 vd: VReg,
372 vs2: VReg,
373 vs1: VReg,
374 vm: bool,
375 sew: Vsew,
376 vlmax: u32,
377) where
378 Reg: Register,
379 ExtState: VectorRegistersExt<Reg, CustomError>,
380 [(); ExtState::ELEN as usize]:,
381 [(); ExtState::VLEN as usize]:,
382 [(); ExtState::VLENB as usize]:,
383 CustomError: fmt::Debug,
384{
385 let vl = ext_state.vl();
386 let vstart = ext_state.vstart();
387 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
389 for i in u32::from(vstart)..vl {
390 if !mask_bit(&mask_buf, i) {
391 continue;
392 }
393 let index = unsafe { read_element_u64(ext_state.read_vregs(), vs1, i, sew) };
395 let val = if index < u64::from(vlmax) {
396 unsafe { read_element_u64(ext_state.read_vregs(), vs2, index as u32, sew) }
398 } else {
399 0u64
400 };
401 unsafe {
403 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
404 }
405 }
406 ext_state.mark_vs_dirty();
407 ext_state.reset_vstart();
408}
409
410#[inline(always)]
417#[doc(hidden)]
418pub unsafe fn execute_rgather_scalar<Reg, ExtState, CustomError>(
419 ext_state: &mut ExtState,
420 vd: VReg,
421 vs2: VReg,
422 vm: bool,
423 sew: Vsew,
424 vlmax: u32,
425 index: u64,
426) where
427 Reg: Register,
428 ExtState: VectorRegistersExt<Reg, CustomError>,
429 [(); ExtState::ELEN as usize]:,
430 [(); ExtState::VLEN as usize]:,
431 [(); ExtState::VLENB as usize]:,
432 CustomError: fmt::Debug,
433{
434 let vl = ext_state.vl();
435 let vstart = ext_state.vstart();
436 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
438 let val = if index < u64::from(vlmax) {
440 unsafe { read_element_u64(ext_state.read_vregs(), vs2, index as u32, sew) }
442 } else {
443 0u64
444 };
445 for i in u32::from(vstart)..vl {
446 if !mask_bit(&mask_buf, i) {
447 continue;
448 }
449 unsafe {
451 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
452 }
453 }
454 ext_state.mark_vs_dirty();
455 ext_state.reset_vstart();
456}
457
458#[inline(always)]
469#[expect(clippy::too_many_arguments, reason = "Internal API")]
470#[doc(hidden)]
471pub unsafe fn execute_rgatherei16<Reg, ExtState, CustomError>(
472 ext_state: &mut ExtState,
473 vd: VReg,
474 vs2: VReg,
475 vs1: VReg,
476 vm: bool,
477 sew: Vsew,
478 vlmax: u32,
479 index_group_regs: u8,
480) where
481 Reg: Register,
482 ExtState: VectorRegistersExt<Reg, CustomError>,
483 [(); ExtState::ELEN as usize]:,
484 [(); ExtState::VLEN as usize]:,
485 [(); ExtState::VLENB as usize]:,
486 CustomError: fmt::Debug,
487{
488 let vl = ext_state.vl();
489 let vstart = ext_state.vstart();
490 let index_capacity = u32::from(index_group_regs) * (ExtState::VLENB / 2);
493 debug_assert!(
496 vl <= vlmax && vl <= index_capacity,
497 "vl={vl} exceeds vlmax={vlmax} or index_capacity={index_capacity}"
498 );
499 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
501 for i in u32::from(vstart)..vl {
502 if !mask_bit(&mask_buf, i) {
503 continue;
504 }
505 let index = unsafe { read_element_u64(ext_state.read_vregs(), vs1, i, Vsew::E16) };
509 let val = if index < u64::from(vlmax) {
510 unsafe { read_element_u64(ext_state.read_vregs(), vs2, index as u32, sew) }
512 } else {
513 0u64
514 };
515 unsafe {
517 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
518 }
519 }
520 ext_state.mark_vs_dirty();
521 ext_state.reset_vstart();
522}
523
524#[inline(always)]
535#[doc(hidden)]
536pub unsafe fn execute_merge_vv<Reg, ExtState, CustomError>(
537 ext_state: &mut ExtState,
538 vd: VReg,
539 vs2: VReg,
540 vs1: VReg,
541 vm: bool,
542 sew: Vsew,
543) where
544 Reg: Register,
545 ExtState: VectorRegistersExt<Reg, CustomError>,
546 [(); ExtState::ELEN as usize]:,
547 [(); ExtState::VLEN as usize]:,
548 [(); ExtState::VLENB as usize]:,
549 CustomError: fmt::Debug,
550{
551 let vl = ext_state.vl();
552 let vstart = ext_state.vstart();
553 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
556 for i in u32::from(vstart)..vl {
557 let mask_set = mask_bit(&mask_buf, i);
558 let val = if mask_set {
559 unsafe { read_element_u64(ext_state.read_vregs(), vs1, i, sew) }
561 } else {
562 unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) }
565 };
566 unsafe {
568 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
569 }
570 }
571 ext_state.mark_vs_dirty();
572 ext_state.reset_vstart();
573}
574
575#[inline(always)]
586#[doc(hidden)]
587pub unsafe fn execute_merge_scalar<Reg, ExtState, CustomError>(
588 ext_state: &mut ExtState,
589 vd: VReg,
590 vs2: VReg,
591 vm: bool,
592 sew: Vsew,
593 scalar: u64,
594) where
595 Reg: Register,
596 ExtState: VectorRegistersExt<Reg, CustomError>,
597 [(); ExtState::ELEN as usize]:,
598 [(); ExtState::VLEN as usize]:,
599 [(); ExtState::VLENB as usize]:,
600 CustomError: fmt::Debug,
601{
602 let vl = ext_state.vl();
603 let vstart = ext_state.vstart();
604 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
606
607 for i in u32::from(vstart)..vl {
608 let val = if mask_bit(&mask_buf, i) {
609 scalar
610 } else {
611 unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) }
613 };
614 unsafe {
616 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
617 }
618 }
619 ext_state.mark_vs_dirty();
620 ext_state.reset_vstart();
621}
622
623#[inline(always)]
634#[doc(hidden)]
635pub unsafe fn execute_compress<Reg, ExtState, CustomError>(
636 ext_state: &mut ExtState,
637 vd: VReg,
638 vs2: VReg,
639 vs1: VReg,
640 vl: u32,
641 sew: Vsew,
642) where
643 Reg: Register,
644 ExtState: VectorRegistersExt<Reg, CustomError>,
645 [(); ExtState::ELEN as usize]:,
646 [(); ExtState::VLEN as usize]:,
647 [(); ExtState::VLENB as usize]:,
648 CustomError: fmt::Debug,
649{
650 let mask_bytes = vl.div_ceil(u8::BITS) as usize;
651 let vreg = ext_state.read_vregs();
652 let mut vs1_buf = [0u8; { ExtState::VLENB as usize }];
653 unsafe {
655 vs1_buf
656 .get_unchecked_mut(..mask_bytes)
657 .copy_from_slice(vreg.get(vs1).get_unchecked(..mask_bytes));
658 }
659 let mut out_idx = 0u32;
660 for i in 0..vl {
661 if !mask_bit(&vs1_buf, i) {
662 continue;
663 }
664 let val = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
666 unsafe {
668 write_element_u64(ext_state.write_vregs(), vd, out_idx, sew, val);
669 }
670 out_idx += 1;
671 }
672 ext_state.mark_vs_dirty();
673 ext_state.reset_vstart();
674}
675
676#[inline(always)]
687#[doc(hidden)]
688pub unsafe fn execute_whole_reg_move<const COUNT: usize, const VLENB: usize>(
689 vregs: &mut VectorRegisterFile<VLENB>,
690 dst_base: VReg,
691 src_base: VReg,
692) {
693 let mut tmp = [[0u8; VLENB]; COUNT];
696 for (k, item) in tmp.iter_mut().enumerate() {
697 let src = unsafe { VReg::from_bits(src_base.to_bits() + k as u8).unwrap_unchecked() };
699 *item = *vregs.get(src);
700 }
701 for (k, item) in tmp.iter().enumerate() {
702 let dst = unsafe { VReg::from_bits(dst_base.to_bits() + k as u8).unwrap_unchecked() };
704 *vregs.get_mut(dst) = *item;
705 }
706}