1use crate::v::vector_registers::{VectorRegisterFile, VectorRegistersExt};
4pub use crate::v::zvexx::arith::zvexx_arith_helpers::check_vreg_group_alignment;
5use crate::v::zvexx::arith::zvexx_arith_helpers::{read_element_u64, write_element_u64};
6use crate::v::zvexx::load::zvexx_load_helpers::{mask_bit, snapshot_mask};
7use crate::v::zvexx::zvexx_helpers::INSTRUCTION_SIZE;
8use crate::{ExecutionError, ProgramCounter};
9use ab_riscv_primitives::prelude::*;
10use core::fmt;
11
12#[inline(always)]
17#[doc(hidden)]
18pub fn check_no_overlap<Reg, Memory, PC, CustomError>(
19 program_counter: &PC,
20 a: VReg,
21 b: VReg,
22 count: u8,
23) -> Result<(), ExecutionError<Reg::Type, CustomError>>
24where
25 Reg: Register,
26 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
27{
28 let a_start = u16::from(a.to_bits());
29 let b_start = u16::from(b.to_bits());
30 let count = u16::from(count);
31 if a_start < b_start + count && b_start < a_start + count {
35 return Err(ExecutionError::IllegalInstruction {
36 address: program_counter.old_pc(INSTRUCTION_SIZE),
37 });
38 }
39 Ok(())
40}
41
42#[inline(always)]
48#[doc(hidden)]
49pub fn check_no_overlap_asymmetric<Reg, Memory, PC, CustomError>(
50 program_counter: &PC,
51 a: VReg,
52 a_count: u8,
53 b: VReg,
54 b_count: u8,
55) -> Result<(), ExecutionError<Reg::Type, CustomError>>
56where
57 Reg: Register,
58 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
59{
60 let a_start = u16::from(a.to_bits());
61 let b_start = u16::from(b.to_bits());
62 let a_count = u16::from(a_count);
63 let b_count = u16::from(b_count);
64 if a_start < b_start + b_count && b_start < a_start + a_count {
67 return Err(ExecutionError::IllegalInstruction {
68 address: program_counter.old_pc(INSTRUCTION_SIZE),
69 });
70 }
71 Ok(())
72}
73
74#[inline(always)]
79pub unsafe fn read_element_0_u64<const VLENB: usize>(
80 vregs: &VectorRegisterFile<VLENB>,
81 base_reg: VReg,
82 sew: Vsew,
83) -> u64 {
84 let sew_bytes = usize::from(sew.bytes_width());
85 let reg = vregs.get(base_reg);
86 let mut buf = [0u8; 8];
87 unsafe {
89 buf.get_unchecked_mut(..sew_bytes)
90 .copy_from_slice(reg.get_unchecked(..sew_bytes));
91 }
92 u64::from_le_bytes(buf)
93}
94
95#[inline(always)]
100pub unsafe fn write_element_0_u64<const VLENB: usize>(
101 vregs: &mut VectorRegisterFile<VLENB>,
102 base_reg: VReg,
103 sew: Vsew,
104 value: u64,
105) {
106 let sew_bytes = usize::from(sew.bytes_width());
107 let buf = value.to_le_bytes();
108 let reg = vregs.get_mut(base_reg);
109 unsafe {
111 reg.get_unchecked_mut(..sew_bytes)
112 .copy_from_slice(buf.get_unchecked(..sew_bytes));
113 }
114}
115
116#[inline(always)]
131pub fn sign_extend_to_reg<Reg>(val: u64, sew: Vsew) -> Reg::Type
132where
133 Reg: Register,
134{
135 let sew_bits = u32::from(sew.bits_width());
136 let shift = u64::BITS - sew_bits;
138 let sign_extended = (val.cast_signed() << shift) >> shift;
140 let raw = sign_extended.cast_unsigned();
141 if Reg::XLEN == u64::BITS as u8 {
142 let lo = Reg::Type::from(raw as u32);
144 let hi = Reg::Type::from((raw >> u32::BITS) as u32);
145 lo | (hi << 32u8)
146 } else {
147 Reg::Type::from(raw as u32)
149 }
150}
151
152#[inline(always)]
162#[doc(hidden)]
163pub unsafe fn execute_slideup<Reg, ExtState, CustomError>(
164 ext_state: &mut ExtState,
165 vd: VReg,
166 vs2: VReg,
167 vm: bool,
168 sew: Vsew,
169 offset: u64,
170) where
171 Reg: Register,
172 ExtState: VectorRegistersExt<Reg, CustomError>,
173 [(); ExtState::ELEN as usize]:,
174 [(); ExtState::VLEN as usize]:,
175 [(); ExtState::VLENB as usize]:,
176 CustomError: fmt::Debug,
177{
178 let vl = ext_state.vl();
179 let vstart = ext_state.vstart();
180 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
182 let start = u32::from(vstart).max(offset.min(u64::from(u32::MAX)) as u32);
185 for i in start..vl {
186 if !mask_bit(&mask_buf, i) {
187 continue;
188 }
189 let src_idx = u64::from(i) - offset;
190 let val = unsafe { read_element_u64(ext_state.read_vregs(), vs2, src_idx as u32, sew) };
192 unsafe {
194 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
195 }
196 }
197 ext_state.mark_vs_dirty();
198 ext_state.reset_vstart();
199}
200
201#[inline(always)]
210#[doc(hidden)]
211pub unsafe fn execute_slidedown<Reg, ExtState, CustomError>(
212 ext_state: &mut ExtState,
213 vd: VReg,
214 vs2: VReg,
215 vm: bool,
216 sew: Vsew,
217 vlmax: u32,
218 offset: u64,
219) where
220 Reg: Register,
221 ExtState: VectorRegistersExt<Reg, CustomError>,
222 [(); ExtState::ELEN as usize]:,
223 [(); ExtState::VLEN as usize]:,
224 [(); ExtState::VLENB as usize]:,
225 CustomError: fmt::Debug,
226{
227 let vl = ext_state.vl();
228 let vstart = ext_state.vstart();
229 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
231 for i in u32::from(vstart)..vl {
232 if !mask_bit(&mask_buf, i) {
233 continue;
234 }
235 let val = if let Some(src_idx) = u64::from(i).checked_add(offset)
238 && src_idx < u64::from(vlmax)
239 {
240 unsafe { read_element_u64(ext_state.read_vregs(), vs2, src_idx as u32, sew) }
242 } else {
243 0
244 };
245 unsafe {
247 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
248 }
249 }
250 ext_state.mark_vs_dirty();
251 ext_state.reset_vstart();
252}
253
254#[inline(always)]
265#[doc(hidden)]
266pub unsafe fn execute_slide1up<Reg, ExtState, CustomError>(
267 ext_state: &mut ExtState,
268 vd: VReg,
269 vs2: VReg,
270 vm: bool,
271 sew: Vsew,
272 scalar: u64,
273) where
274 Reg: Register,
275 ExtState: VectorRegistersExt<Reg, CustomError>,
276 [(); ExtState::ELEN as usize]:,
277 [(); ExtState::VLEN as usize]:,
278 [(); ExtState::VLENB as usize]:,
279 CustomError: fmt::Debug,
280{
281 let vl = ext_state.vl();
282 let vstart = ext_state.vstart();
283 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
285 for i in u32::from(vstart)..vl {
286 if !mask_bit(&mask_buf, i) {
287 continue;
288 }
289 let val = if i == 0 {
290 scalar
291 } else {
292 unsafe { read_element_u64(ext_state.read_vregs(), vs2, i - 1, sew) }
294 };
295 unsafe {
297 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
298 }
299 }
300 ext_state.mark_vs_dirty();
301 ext_state.reset_vstart();
302}
303
304#[inline(always)]
319#[doc(hidden)]
320pub unsafe fn execute_slide1down<Reg, ExtState, CustomError>(
321 ext_state: &mut ExtState,
322 vd: VReg,
323 vs2: VReg,
324 vm: bool,
325 sew: Vsew,
326 scalar: u64,
327) where
328 Reg: Register,
329 ExtState: VectorRegistersExt<Reg, CustomError>,
330 [(); ExtState::ELEN as usize]:,
331 [(); ExtState::VLEN as usize]:,
332 [(); ExtState::VLENB as usize]:,
333 CustomError: fmt::Debug,
334{
335 let vl = ext_state.vl();
336 let vstart = ext_state.vstart();
337 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
339 for i in u32::from(vstart)..vl {
340 if !mask_bit(&mask_buf, i) {
341 continue;
342 }
343 let val = if i + 1 < vl {
344 unsafe { read_element_u64(ext_state.read_vregs(), vs2, i + 1, sew) }
346 } else {
347 scalar
348 };
349 unsafe {
351 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
352 }
353 }
354 ext_state.mark_vs_dirty();
355 ext_state.reset_vstart();
356}
357
358#[inline(always)]
365#[doc(hidden)]
366pub unsafe fn execute_rgather_vv<Reg, ExtState, CustomError>(
367 ext_state: &mut ExtState,
368 vd: VReg,
369 vs2: VReg,
370 vs1: VReg,
371 vm: bool,
372 sew: Vsew,
373 vlmax: u32,
374) where
375 Reg: Register,
376 ExtState: VectorRegistersExt<Reg, CustomError>,
377 [(); ExtState::ELEN as usize]:,
378 [(); ExtState::VLEN as usize]:,
379 [(); ExtState::VLENB as usize]:,
380 CustomError: fmt::Debug,
381{
382 let vl = ext_state.vl();
383 let vstart = ext_state.vstart();
384 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
386 for i in u32::from(vstart)..vl {
387 if !mask_bit(&mask_buf, i) {
388 continue;
389 }
390 let index = unsafe { read_element_u64(ext_state.read_vregs(), vs1, i, sew) };
392 let val = if index < u64::from(vlmax) {
393 unsafe { read_element_u64(ext_state.read_vregs(), vs2, index as u32, sew) }
395 } else {
396 0u64
397 };
398 unsafe {
400 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
401 }
402 }
403 ext_state.mark_vs_dirty();
404 ext_state.reset_vstart();
405}
406
407#[inline(always)]
414#[doc(hidden)]
415pub unsafe fn execute_rgather_scalar<Reg, ExtState, CustomError>(
416 ext_state: &mut ExtState,
417 vd: VReg,
418 vs2: VReg,
419 vm: bool,
420 sew: Vsew,
421 vlmax: u32,
422 index: u64,
423) where
424 Reg: Register,
425 ExtState: VectorRegistersExt<Reg, CustomError>,
426 [(); ExtState::ELEN as usize]:,
427 [(); ExtState::VLEN as usize]:,
428 [(); ExtState::VLENB as usize]:,
429 CustomError: fmt::Debug,
430{
431 let vl = ext_state.vl();
432 let vstart = ext_state.vstart();
433 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
435 let val = if index < u64::from(vlmax) {
437 unsafe { read_element_u64(ext_state.read_vregs(), vs2, index as u32, sew) }
439 } else {
440 0u64
441 };
442 for i in u32::from(vstart)..vl {
443 if !mask_bit(&mask_buf, i) {
444 continue;
445 }
446 unsafe {
448 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
449 }
450 }
451 ext_state.mark_vs_dirty();
452 ext_state.reset_vstart();
453}
454
455#[inline(always)]
466#[expect(clippy::too_many_arguments, reason = "Internal API")]
467#[doc(hidden)]
468pub unsafe fn execute_rgatherei16<Reg, ExtState, CustomError>(
469 ext_state: &mut ExtState,
470 vd: VReg,
471 vs2: VReg,
472 vs1: VReg,
473 vm: bool,
474 sew: Vsew,
475 vlmax: u32,
476 index_group_regs: u8,
477) where
478 Reg: Register,
479 ExtState: VectorRegistersExt<Reg, CustomError>,
480 [(); ExtState::ELEN as usize]:,
481 [(); ExtState::VLEN as usize]:,
482 [(); ExtState::VLENB as usize]:,
483 CustomError: fmt::Debug,
484{
485 let vl = ext_state.vl();
486 let vstart = ext_state.vstart();
487 let index_capacity = u32::from(index_group_regs) * (ExtState::VLENB / 2);
490 debug_assert!(
493 vl <= vlmax && vl <= index_capacity,
494 "vl={vl} exceeds vlmax={vlmax} or index_capacity={index_capacity}"
495 );
496 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
498 for i in u32::from(vstart)..vl {
499 if !mask_bit(&mask_buf, i) {
500 continue;
501 }
502 let index = unsafe { read_element_u64(ext_state.read_vregs(), vs1, i, Vsew::E16) };
506 let val = if index < u64::from(vlmax) {
507 unsafe { read_element_u64(ext_state.read_vregs(), vs2, index as u32, sew) }
509 } else {
510 0u64
511 };
512 unsafe {
514 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
515 }
516 }
517 ext_state.mark_vs_dirty();
518 ext_state.reset_vstart();
519}
520
521#[inline(always)]
532#[doc(hidden)]
533pub unsafe fn execute_merge_vv<Reg, ExtState, CustomError>(
534 ext_state: &mut ExtState,
535 vd: VReg,
536 vs2: VReg,
537 vs1: VReg,
538 vm: bool,
539 sew: Vsew,
540) where
541 Reg: Register,
542 ExtState: VectorRegistersExt<Reg, CustomError>,
543 [(); ExtState::ELEN as usize]:,
544 [(); ExtState::VLEN as usize]:,
545 [(); ExtState::VLENB as usize]:,
546 CustomError: fmt::Debug,
547{
548 let vl = ext_state.vl();
549 let vstart = ext_state.vstart();
550 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
553 for i in u32::from(vstart)..vl {
554 let mask_set = mask_bit(&mask_buf, i);
555 let val = if mask_set {
556 unsafe { read_element_u64(ext_state.read_vregs(), vs1, i, sew) }
558 } else {
559 unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) }
562 };
563 unsafe {
565 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
566 }
567 }
568 ext_state.mark_vs_dirty();
569 ext_state.reset_vstart();
570}
571
572#[inline(always)]
583#[doc(hidden)]
584pub unsafe fn execute_merge_scalar<Reg, ExtState, CustomError>(
585 ext_state: &mut ExtState,
586 vd: VReg,
587 vs2: VReg,
588 vm: bool,
589 sew: Vsew,
590 scalar: u64,
591) where
592 Reg: Register,
593 ExtState: VectorRegistersExt<Reg, CustomError>,
594 [(); ExtState::ELEN as usize]:,
595 [(); ExtState::VLEN as usize]:,
596 [(); ExtState::VLENB as usize]:,
597 CustomError: fmt::Debug,
598{
599 let vl = ext_state.vl();
600 let vstart = ext_state.vstart();
601 let mask_buf = unsafe { snapshot_mask(ext_state.read_vregs(), vm, vl) };
603
604 for i in u32::from(vstart)..vl {
605 let val = if mask_bit(&mask_buf, i) {
606 scalar
607 } else {
608 unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) }
610 };
611 unsafe {
613 write_element_u64(ext_state.write_vregs(), vd, i, sew, val);
614 }
615 }
616 ext_state.mark_vs_dirty();
617 ext_state.reset_vstart();
618}
619
620#[inline(always)]
631#[doc(hidden)]
632pub unsafe fn execute_compress<Reg, ExtState, CustomError>(
633 ext_state: &mut ExtState,
634 vd: VReg,
635 vs2: VReg,
636 vs1: VReg,
637 vl: u32,
638 sew: Vsew,
639) where
640 Reg: Register,
641 ExtState: VectorRegistersExt<Reg, CustomError>,
642 [(); ExtState::ELEN as usize]:,
643 [(); ExtState::VLEN as usize]:,
644 [(); ExtState::VLENB as usize]:,
645 CustomError: fmt::Debug,
646{
647 let mask_bytes = vl.div_ceil(u8::BITS) as usize;
648 let vreg = ext_state.read_vregs();
649 let mut vs1_buf = [0u8; { ExtState::VLENB as usize }];
650 unsafe {
652 vs1_buf
653 .get_unchecked_mut(..mask_bytes)
654 .copy_from_slice(vreg.get(vs1).get_unchecked(..mask_bytes));
655 }
656 let mut out_idx = 0u32;
657 for i in 0..vl {
658 if !mask_bit(&vs1_buf, i) {
659 continue;
660 }
661 let val = unsafe { read_element_u64(ext_state.read_vregs(), vs2, i, sew) };
663 unsafe {
665 write_element_u64(ext_state.write_vregs(), vd, out_idx, sew, val);
666 }
667 out_idx += 1;
668 }
669 ext_state.mark_vs_dirty();
670 ext_state.reset_vstart();
671}
672
673#[inline(always)]
686#[doc(hidden)]
687pub unsafe fn execute_whole_reg_move<const VLENB: usize>(
688 vregs: &mut VectorRegisterFile<VLENB>,
689 dst_base: VReg,
690 src_base: VReg,
691 count: u8,
692) {
693 let count = usize::from(count);
694 debug_assert!(count <= 8, "count must be <= 8 for vmvNr");
695 let mut tmp = [[0u8; VLENB]; 8];
698 for (k, item) in tmp.iter_mut().enumerate().take(count) {
699 let src = unsafe { VReg::from_bits(src_base.to_bits() + k as u8).unwrap_unchecked() };
701 *item = *vregs.get(src);
702 }
703 for (k, item) in tmp.iter().enumerate().take(count) {
704 let dst = unsafe { VReg::from_bits(dst_base.to_bits() + k as u8).unwrap_unchecked() };
706 *vregs.get_mut(dst) = *item;
707 }
708}