1#[cfg(test)]
4mod tests;
5pub mod zve64x_reduction_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::arith::zve64x_arith_helpers;
9use crate::v::zve64x::zve64x_helpers;
10use crate::{ExecutableInstruction, ExecutionError, ProgramCounter, RegisterFile, VirtualMemory};
11use ab_riscv_macros::instruction_execution;
12use ab_riscv_primitives::prelude::*;
13use core::fmt;
14use core::ops::ControlFlow;
15
16#[instruction_execution]
17impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
18 ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
19 for Zve64xReductionInstruction<Reg>
20where
21 Reg: Register,
22 Regs: RegisterFile<Reg>,
23 ExtState: VectorRegistersExt<Reg, CustomError>,
24 [(); ExtState::ELEN as usize]:,
25 [(); ExtState::VLEN as usize]:,
26 [(); ExtState::VLENB as usize]:,
27 Memory: VirtualMemory,
28 PC: ProgramCounter<Reg::Type, Memory, CustomError>,
29 CustomError: fmt::Debug,
30{
31 #[inline(always)]
32 fn execute(
33 self,
34 _regs: &mut Regs,
35 ext_state: &mut ExtState,
36 _memory: &mut Memory,
37 program_counter: &mut PC,
38 _system_instruction_handler: &mut InstructionHandler,
39 ) -> Result<ControlFlow<()>, ExecutionError<Reg::Type, CustomError>> {
40 match self {
41 Self::Vredsum { vd, vs2, vs1, vm } => {
42 if !ext_state.vector_instructions_allowed() {
43 Err(ExecutionError::IllegalInstruction {
44 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
45 })?;
46 }
47 let vtype = ext_state
48 .vtype()
49 .ok_or(ExecutionError::IllegalInstruction {
50 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
51 })?;
52 if u32::from(ext_state.vstart()) != 0 {
54 Err(ExecutionError::IllegalInstruction {
55 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
56 })?;
57 }
58 let group_regs = vtype.vlmul().register_count();
59 zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
60 program_counter,
61 vs2,
62 group_regs,
63 )?;
64 let sew = vtype.vsew();
65 let vl = ext_state.vl();
66 unsafe {
69 zve64x_reduction_helpers::execute_reduce_op(
70 ext_state,
71 vd,
72 vs2,
73 vs1,
74 vm,
75 vl,
76 sew,
77 |acc, elem, _sew| acc.wrapping_add(elem),
78 );
79 }
80 }
81 Self::Vredand { vd, vs2, vs1, vm } => {
82 if !ext_state.vector_instructions_allowed() {
83 Err(ExecutionError::IllegalInstruction {
84 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
85 })?;
86 }
87 let vtype = ext_state
88 .vtype()
89 .ok_or(ExecutionError::IllegalInstruction {
90 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
91 })?;
92 if u32::from(ext_state.vstart()) != 0 {
93 Err(ExecutionError::IllegalInstruction {
94 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
95 })?;
96 }
97 let group_regs = vtype.vlmul().register_count();
98 zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
99 program_counter,
100 vs2,
101 group_regs,
102 )?;
103 let sew = vtype.vsew();
104 let vl = ext_state.vl();
105 unsafe {
107 zve64x_reduction_helpers::execute_reduce_op(
108 ext_state,
109 vd,
110 vs2,
111 vs1,
112 vm,
113 vl,
114 sew,
115 |acc, elem, _sew| acc & elem,
116 );
117 }
118 }
119 Self::Vredor { vd, vs2, vs1, vm } => {
120 if !ext_state.vector_instructions_allowed() {
121 Err(ExecutionError::IllegalInstruction {
122 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
123 })?;
124 }
125 let vtype = ext_state
126 .vtype()
127 .ok_or(ExecutionError::IllegalInstruction {
128 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
129 })?;
130 if u32::from(ext_state.vstart()) != 0 {
131 Err(ExecutionError::IllegalInstruction {
132 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
133 })?;
134 }
135 let group_regs = vtype.vlmul().register_count();
136 zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
137 program_counter,
138 vs2,
139 group_regs,
140 )?;
141 let sew = vtype.vsew();
142 let vl = ext_state.vl();
143 unsafe {
145 zve64x_reduction_helpers::execute_reduce_op(
146 ext_state,
147 vd,
148 vs2,
149 vs1,
150 vm,
151 vl,
152 sew,
153 |acc, elem, _sew| acc | elem,
154 );
155 }
156 }
157 Self::Vredxor { vd, vs2, vs1, vm } => {
158 if !ext_state.vector_instructions_allowed() {
159 Err(ExecutionError::IllegalInstruction {
160 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
161 })?;
162 }
163 let vtype = ext_state
164 .vtype()
165 .ok_or(ExecutionError::IllegalInstruction {
166 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
167 })?;
168 if u32::from(ext_state.vstart()) != 0 {
169 Err(ExecutionError::IllegalInstruction {
170 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
171 })?;
172 }
173 let group_regs = vtype.vlmul().register_count();
174 zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
175 program_counter,
176 vs2,
177 group_regs,
178 )?;
179 let sew = vtype.vsew();
180 let vl = ext_state.vl();
181 unsafe {
183 zve64x_reduction_helpers::execute_reduce_op(
184 ext_state,
185 vd,
186 vs2,
187 vs1,
188 vm,
189 vl,
190 sew,
191 |acc, elem, _sew| acc ^ elem,
192 );
193 }
194 }
195 Self::Vredminu { vd, vs2, vs1, vm } => {
196 if !ext_state.vector_instructions_allowed() {
197 Err(ExecutionError::IllegalInstruction {
198 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
199 })?;
200 }
201 let vtype = ext_state
202 .vtype()
203 .ok_or(ExecutionError::IllegalInstruction {
204 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
205 })?;
206 if u32::from(ext_state.vstart()) != 0 {
207 Err(ExecutionError::IllegalInstruction {
208 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
209 })?;
210 }
211 let group_regs = vtype.vlmul().register_count();
212 zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
213 program_counter,
214 vs2,
215 group_regs,
216 )?;
217 let sew = vtype.vsew();
218 let vl = ext_state.vl();
219 unsafe {
221 zve64x_reduction_helpers::execute_reduce_op(
222 ext_state,
223 vd,
224 vs2,
225 vs1,
226 vm,
227 vl,
228 sew,
229 |acc, elem, sew| {
230 let mask = zve64x_arith_helpers::sew_mask(sew);
231 if elem & mask < acc & mask { elem } else { acc }
232 },
233 );
234 }
235 }
236 Self::Vredmin { vd, vs2, vs1, vm } => {
237 if !ext_state.vector_instructions_allowed() {
238 Err(ExecutionError::IllegalInstruction {
239 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
240 })?;
241 }
242 let vtype = ext_state
243 .vtype()
244 .ok_or(ExecutionError::IllegalInstruction {
245 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
246 })?;
247 if u32::from(ext_state.vstart()) != 0 {
248 Err(ExecutionError::IllegalInstruction {
249 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
250 })?;
251 }
252 let group_regs = vtype.vlmul().register_count();
253 zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
254 program_counter,
255 vs2,
256 group_regs,
257 )?;
258 let sew = vtype.vsew();
259 let vl = ext_state.vl();
260 unsafe {
262 zve64x_reduction_helpers::execute_reduce_op(
263 ext_state,
264 vd,
265 vs2,
266 vs1,
267 vm,
268 vl,
269 sew,
270 |acc, elem, sew| {
271 if zve64x_arith_helpers::sign_extend(elem, sew)
272 < zve64x_arith_helpers::sign_extend(acc, sew)
273 {
274 elem
275 } else {
276 acc
277 }
278 },
279 );
280 }
281 }
282 Self::Vredmaxu { vd, vs2, vs1, vm } => {
283 if !ext_state.vector_instructions_allowed() {
284 Err(ExecutionError::IllegalInstruction {
285 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
286 })?;
287 }
288 let vtype = ext_state
289 .vtype()
290 .ok_or(ExecutionError::IllegalInstruction {
291 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
292 })?;
293 if u32::from(ext_state.vstart()) != 0 {
294 Err(ExecutionError::IllegalInstruction {
295 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
296 })?;
297 }
298 let group_regs = vtype.vlmul().register_count();
299 zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
300 program_counter,
301 vs2,
302 group_regs,
303 )?;
304 let sew = vtype.vsew();
305 let vl = ext_state.vl();
306 unsafe {
308 zve64x_reduction_helpers::execute_reduce_op(
309 ext_state,
310 vd,
311 vs2,
312 vs1,
313 vm,
314 vl,
315 sew,
316 |acc, elem, sew| {
317 let mask = zve64x_arith_helpers::sew_mask(sew);
318 if elem & mask > acc & mask { elem } else { acc }
319 },
320 );
321 }
322 }
323 Self::Vredmax { vd, vs2, vs1, vm } => {
324 if !ext_state.vector_instructions_allowed() {
325 Err(ExecutionError::IllegalInstruction {
326 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
327 })?;
328 }
329 let vtype = ext_state
330 .vtype()
331 .ok_or(ExecutionError::IllegalInstruction {
332 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
333 })?;
334 if u32::from(ext_state.vstart()) != 0 {
335 Err(ExecutionError::IllegalInstruction {
336 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
337 })?;
338 }
339 let group_regs = vtype.vlmul().register_count();
340 zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
341 program_counter,
342 vs2,
343 group_regs,
344 )?;
345 let sew = vtype.vsew();
346 let vl = ext_state.vl();
347 unsafe {
349 zve64x_reduction_helpers::execute_reduce_op(
350 ext_state,
351 vd,
352 vs2,
353 vs1,
354 vm,
355 vl,
356 sew,
357 |acc, elem, sew| {
358 if zve64x_arith_helpers::sign_extend(elem, sew)
359 > zve64x_arith_helpers::sign_extend(acc, sew)
360 {
361 elem
362 } else {
363 acc
364 }
365 },
366 );
367 }
368 }
369 Self::Vwredsumu { vd, vs2, vs1, vm } => {
370 if !ext_state.vector_instructions_allowed() {
371 Err(ExecutionError::IllegalInstruction {
372 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
373 })?;
374 }
375 let vtype = ext_state
376 .vtype()
377 .ok_or(ExecutionError::IllegalInstruction {
378 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
379 })?;
380 if u32::from(ext_state.vstart()) != 0 {
381 Err(ExecutionError::IllegalInstruction {
382 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
383 })?;
384 }
385 if u32::from(vtype.vsew().bits()) * 2 > ExtState::ELEN {
387 Err(ExecutionError::IllegalInstruction {
388 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
389 })?;
390 }
391 let group_regs = vtype.vlmul().register_count();
392 zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
393 program_counter,
394 vs2,
395 group_regs,
396 )?;
397 let sew = vtype.vsew();
398 let vl = ext_state.vl();
399 unsafe {
402 zve64x_reduction_helpers::execute_widening_reduce_op(
403 ext_state,
404 vd,
405 vs2,
406 vs1,
407 vm,
408 vl,
409 sew,
410 |acc, elem, _sew| acc.wrapping_add(elem),
412 false,
413 );
414 }
415 }
416 Self::Vwredsum { vd, vs2, vs1, vm } => {
417 if !ext_state.vector_instructions_allowed() {
418 Err(ExecutionError::IllegalInstruction {
419 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
420 })?;
421 }
422 let vtype = ext_state
423 .vtype()
424 .ok_or(ExecutionError::IllegalInstruction {
425 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
426 })?;
427 if u32::from(ext_state.vstart()) != 0 {
428 Err(ExecutionError::IllegalInstruction {
429 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
430 })?;
431 }
432 if u32::from(vtype.vsew().bits()) * 2 > ExtState::ELEN {
433 Err(ExecutionError::IllegalInstruction {
434 address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
435 })?;
436 }
437 let group_regs = vtype.vlmul().register_count();
438 zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
439 program_counter,
440 vs2,
441 group_regs,
442 )?;
443 let sew = vtype.vsew();
444 let vl = ext_state.vl();
445 unsafe {
447 zve64x_reduction_helpers::execute_widening_reduce_op(
448 ext_state,
449 vd,
450 vs2,
451 vs1,
452 vm,
453 vl,
454 sew,
455 |acc, elem, _sew| acc.wrapping_add(elem),
457 true,
458 );
459 }
460 }
461 Self::PhantomZve64xReduction(_) => unreachable!("Never constructed"),
462 }
463
464 Ok(ControlFlow::Continue(()))
465 }
466}