Skip to main content

ab_riscv_interpreter/v/zve64x/
reduction.rs

1//! Zve64x integer reduction instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zve64x_reduction_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::arith::zve64x_arith_helpers;
9use crate::v::zve64x::zve64x_helpers;
10use crate::{
11    ExecutableInstruction, ExecutableInstructionCsr, ExecutableInstructionOperands, ExecutionError,
12    ProgramCounter, RegisterFile, Rs1Rs2OperandValues, Rs1Rs2Operands, VirtualMemory,
13};
14use ab_riscv_macros::instruction_execution;
15use ab_riscv_primitives::prelude::*;
16use core::fmt;
17use core::ops::ControlFlow;
18
19#[instruction_execution]
20impl<Reg> ExecutableInstructionOperands for Zve64xReductionInstruction<Reg> where Reg: Register {}
21
22#[instruction_execution]
23impl<Reg, ExtState, CustomError> ExecutableInstructionCsr<ExtState, CustomError>
24    for Zve64xReductionInstruction<Reg>
25where
26    Reg: Register,
27{
28}
29
30#[instruction_execution]
31impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
32    ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
33    for Zve64xReductionInstruction<Reg>
34where
35    Reg: Register,
36    Regs: RegisterFile<Reg>,
37    ExtState: VectorRegistersExt<Reg, CustomError>,
38    [(); ExtState::ELEN as usize]:,
39    [(); ExtState::VLEN as usize]:,
40    [(); ExtState::VLENB as usize]:,
41    Memory: VirtualMemory,
42    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
43    CustomError: fmt::Debug,
44{
45    #[inline(always)]
46    fn execute(
47        self,
48        Rs1Rs2OperandValues {
49            rs1_value: _,
50            rs2_value: _,
51        }: Rs1Rs2OperandValues<<Self::Reg as Register>::Type>,
52        _regs: &mut Regs,
53        ext_state: &mut ExtState,
54        _memory: &mut Memory,
55        program_counter: &mut PC,
56        _system_instruction_handler: &mut InstructionHandler,
57    ) -> Result<
58        ControlFlow<(), (Self::Reg, <Self::Reg as Register>::Type)>,
59        ExecutionError<Reg::Type, CustomError>,
60    > {
61        match self {
62            Self::Vredsum { vd, vs2, vs1, vm } => {
63                if !ext_state.vector_instructions_allowed() {
64                    Err(ExecutionError::IllegalInstruction {
65                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
66                    })?;
67                }
68                let vtype = ext_state
69                    .vtype()
70                    .ok_or(ExecutionError::IllegalInstruction {
71                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
72                    })?;
73                // Spec ยง14: reductions with vstart > 0 are reserved; raise illegal instruction
74                if u32::from(ext_state.vstart()) != 0 {
75                    Err(ExecutionError::IllegalInstruction {
76                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
77                    })?;
78                }
79                let group_regs = vtype.vlmul().register_count();
80                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
81                    program_counter,
82                    vs2,
83                    group_regs,
84                )?;
85                let sew = vtype.vsew();
86                let vl = ext_state.vl();
87                // SAFETY: `vs2` alignment checked; `vstart == 0` checked;
88                // `vs1` and `vd` are single-register scalar operands
89                unsafe {
90                    zve64x_reduction_helpers::execute_reduce_op(
91                        ext_state,
92                        vd,
93                        vs2,
94                        vs1,
95                        vm,
96                        vl,
97                        sew,
98                        |acc, elem, _sew| acc.wrapping_add(elem),
99                    );
100                }
101            }
102            Self::Vredand { vd, vs2, vs1, vm } => {
103                if !ext_state.vector_instructions_allowed() {
104                    Err(ExecutionError::IllegalInstruction {
105                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
106                    })?;
107                }
108                let vtype = ext_state
109                    .vtype()
110                    .ok_or(ExecutionError::IllegalInstruction {
111                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
112                    })?;
113                if u32::from(ext_state.vstart()) != 0 {
114                    Err(ExecutionError::IllegalInstruction {
115                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
116                    })?;
117                }
118                let group_regs = vtype.vlmul().register_count();
119                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
120                    program_counter,
121                    vs2,
122                    group_regs,
123                )?;
124                let sew = vtype.vsew();
125                let vl = ext_state.vl();
126                // SAFETY: see `Vredsum`
127                unsafe {
128                    zve64x_reduction_helpers::execute_reduce_op(
129                        ext_state,
130                        vd,
131                        vs2,
132                        vs1,
133                        vm,
134                        vl,
135                        sew,
136                        |acc, elem, _sew| acc & elem,
137                    );
138                }
139            }
140            Self::Vredor { vd, vs2, vs1, vm } => {
141                if !ext_state.vector_instructions_allowed() {
142                    Err(ExecutionError::IllegalInstruction {
143                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
144                    })?;
145                }
146                let vtype = ext_state
147                    .vtype()
148                    .ok_or(ExecutionError::IllegalInstruction {
149                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
150                    })?;
151                if u32::from(ext_state.vstart()) != 0 {
152                    Err(ExecutionError::IllegalInstruction {
153                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
154                    })?;
155                }
156                let group_regs = vtype.vlmul().register_count();
157                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
158                    program_counter,
159                    vs2,
160                    group_regs,
161                )?;
162                let sew = vtype.vsew();
163                let vl = ext_state.vl();
164                // SAFETY: see `Vredsum`
165                unsafe {
166                    zve64x_reduction_helpers::execute_reduce_op(
167                        ext_state,
168                        vd,
169                        vs2,
170                        vs1,
171                        vm,
172                        vl,
173                        sew,
174                        |acc, elem, _sew| acc | elem,
175                    );
176                }
177            }
178            Self::Vredxor { vd, vs2, vs1, vm } => {
179                if !ext_state.vector_instructions_allowed() {
180                    Err(ExecutionError::IllegalInstruction {
181                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
182                    })?;
183                }
184                let vtype = ext_state
185                    .vtype()
186                    .ok_or(ExecutionError::IllegalInstruction {
187                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
188                    })?;
189                if u32::from(ext_state.vstart()) != 0 {
190                    Err(ExecutionError::IllegalInstruction {
191                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
192                    })?;
193                }
194                let group_regs = vtype.vlmul().register_count();
195                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
196                    program_counter,
197                    vs2,
198                    group_regs,
199                )?;
200                let sew = vtype.vsew();
201                let vl = ext_state.vl();
202                // SAFETY: see `Vredsum`
203                unsafe {
204                    zve64x_reduction_helpers::execute_reduce_op(
205                        ext_state,
206                        vd,
207                        vs2,
208                        vs1,
209                        vm,
210                        vl,
211                        sew,
212                        |acc, elem, _sew| acc ^ elem,
213                    );
214                }
215            }
216            Self::Vredminu { vd, vs2, vs1, vm } => {
217                if !ext_state.vector_instructions_allowed() {
218                    Err(ExecutionError::IllegalInstruction {
219                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
220                    })?;
221                }
222                let vtype = ext_state
223                    .vtype()
224                    .ok_or(ExecutionError::IllegalInstruction {
225                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
226                    })?;
227                if u32::from(ext_state.vstart()) != 0 {
228                    Err(ExecutionError::IllegalInstruction {
229                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
230                    })?;
231                }
232                let group_regs = vtype.vlmul().register_count();
233                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
234                    program_counter,
235                    vs2,
236                    group_regs,
237                )?;
238                let sew = vtype.vsew();
239                let vl = ext_state.vl();
240                // SAFETY: see `Vredsum`
241                unsafe {
242                    zve64x_reduction_helpers::execute_reduce_op(
243                        ext_state,
244                        vd,
245                        vs2,
246                        vs1,
247                        vm,
248                        vl,
249                        sew,
250                        |acc, elem, sew| {
251                            let mask = zve64x_arith_helpers::sew_mask(sew);
252                            if elem & mask < acc & mask { elem } else { acc }
253                        },
254                    );
255                }
256            }
257            Self::Vredmin { vd, vs2, vs1, vm } => {
258                if !ext_state.vector_instructions_allowed() {
259                    Err(ExecutionError::IllegalInstruction {
260                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
261                    })?;
262                }
263                let vtype = ext_state
264                    .vtype()
265                    .ok_or(ExecutionError::IllegalInstruction {
266                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
267                    })?;
268                if u32::from(ext_state.vstart()) != 0 {
269                    Err(ExecutionError::IllegalInstruction {
270                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
271                    })?;
272                }
273                let group_regs = vtype.vlmul().register_count();
274                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
275                    program_counter,
276                    vs2,
277                    group_regs,
278                )?;
279                let sew = vtype.vsew();
280                let vl = ext_state.vl();
281                // SAFETY: see `Vredsum`
282                unsafe {
283                    zve64x_reduction_helpers::execute_reduce_op(
284                        ext_state,
285                        vd,
286                        vs2,
287                        vs1,
288                        vm,
289                        vl,
290                        sew,
291                        |acc, elem, sew| {
292                            if zve64x_arith_helpers::sign_extend(elem, sew)
293                                < zve64x_arith_helpers::sign_extend(acc, sew)
294                            {
295                                elem
296                            } else {
297                                acc
298                            }
299                        },
300                    );
301                }
302            }
303            Self::Vredmaxu { vd, vs2, vs1, vm } => {
304                if !ext_state.vector_instructions_allowed() {
305                    Err(ExecutionError::IllegalInstruction {
306                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
307                    })?;
308                }
309                let vtype = ext_state
310                    .vtype()
311                    .ok_or(ExecutionError::IllegalInstruction {
312                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
313                    })?;
314                if u32::from(ext_state.vstart()) != 0 {
315                    Err(ExecutionError::IllegalInstruction {
316                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
317                    })?;
318                }
319                let group_regs = vtype.vlmul().register_count();
320                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
321                    program_counter,
322                    vs2,
323                    group_regs,
324                )?;
325                let sew = vtype.vsew();
326                let vl = ext_state.vl();
327                // SAFETY: see `Vredsum`
328                unsafe {
329                    zve64x_reduction_helpers::execute_reduce_op(
330                        ext_state,
331                        vd,
332                        vs2,
333                        vs1,
334                        vm,
335                        vl,
336                        sew,
337                        |acc, elem, sew| {
338                            let mask = zve64x_arith_helpers::sew_mask(sew);
339                            if elem & mask > acc & mask { elem } else { acc }
340                        },
341                    );
342                }
343            }
344            Self::Vredmax { vd, vs2, vs1, vm } => {
345                if !ext_state.vector_instructions_allowed() {
346                    Err(ExecutionError::IllegalInstruction {
347                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
348                    })?;
349                }
350                let vtype = ext_state
351                    .vtype()
352                    .ok_or(ExecutionError::IllegalInstruction {
353                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
354                    })?;
355                if u32::from(ext_state.vstart()) != 0 {
356                    Err(ExecutionError::IllegalInstruction {
357                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
358                    })?;
359                }
360                let group_regs = vtype.vlmul().register_count();
361                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
362                    program_counter,
363                    vs2,
364                    group_regs,
365                )?;
366                let sew = vtype.vsew();
367                let vl = ext_state.vl();
368                // SAFETY: see `Vredsum`
369                unsafe {
370                    zve64x_reduction_helpers::execute_reduce_op(
371                        ext_state,
372                        vd,
373                        vs2,
374                        vs1,
375                        vm,
376                        vl,
377                        sew,
378                        |acc, elem, sew| {
379                            if zve64x_arith_helpers::sign_extend(elem, sew)
380                                > zve64x_arith_helpers::sign_extend(acc, sew)
381                            {
382                                elem
383                            } else {
384                                acc
385                            }
386                        },
387                    );
388                }
389            }
390            Self::Vwredsumu { vd, vs2, vs1, vm } => {
391                if !ext_state.vector_instructions_allowed() {
392                    Err(ExecutionError::IllegalInstruction {
393                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
394                    })?;
395                }
396                let vtype = ext_state
397                    .vtype()
398                    .ok_or(ExecutionError::IllegalInstruction {
399                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
400                    })?;
401                if u32::from(ext_state.vstart()) != 0 {
402                    Err(ExecutionError::IllegalInstruction {
403                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
404                    })?;
405                }
406                // Widening: 2*SEW must fit in ELEN
407                if u32::from(vtype.vsew().bits()) * 2 > ExtState::ELEN {
408                    Err(ExecutionError::IllegalInstruction {
409                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
410                    })?;
411                }
412                let group_regs = vtype.vlmul().register_count();
413                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
414                    program_counter,
415                    vs2,
416                    group_regs,
417                )?;
418                let sew = vtype.vsew();
419                let vl = ext_state.vl();
420                // SAFETY: `vs2` alignment checked; widening SEW constraint checked above;
421                // `vstart == 0` checked; `vd` and `vs1` are single-register 2*SEW scalar operands
422                unsafe {
423                    zve64x_reduction_helpers::execute_widening_reduce_op(
424                        ext_state,
425                        vd,
426                        vs2,
427                        vs1,
428                        vm,
429                        vl,
430                        sew,
431                        // Zero-extend vs2 elements then accumulate
432                        |acc, elem, _sew| acc.wrapping_add(elem),
433                        false,
434                    );
435                }
436            }
437            Self::Vwredsum { vd, vs2, vs1, vm } => {
438                if !ext_state.vector_instructions_allowed() {
439                    Err(ExecutionError::IllegalInstruction {
440                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
441                    })?;
442                }
443                let vtype = ext_state
444                    .vtype()
445                    .ok_or(ExecutionError::IllegalInstruction {
446                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
447                    })?;
448                if u32::from(ext_state.vstart()) != 0 {
449                    Err(ExecutionError::IllegalInstruction {
450                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
451                    })?;
452                }
453                if u32::from(vtype.vsew().bits()) * 2 > ExtState::ELEN {
454                    Err(ExecutionError::IllegalInstruction {
455                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
456                    })?;
457                }
458                let group_regs = vtype.vlmul().register_count();
459                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
460                    program_counter,
461                    vs2,
462                    group_regs,
463                )?;
464                let sew = vtype.vsew();
465                let vl = ext_state.vl();
466                // SAFETY: see `Vwredsumu`
467                unsafe {
468                    zve64x_reduction_helpers::execute_widening_reduce_op(
469                        ext_state,
470                        vd,
471                        vs2,
472                        vs1,
473                        vm,
474                        vl,
475                        sew,
476                        // Sign-extend vs2 elements then accumulate
477                        |acc, elem, _sew| acc.wrapping_add(elem),
478                        true,
479                    );
480                }
481            }
482        }
483
484        Ok(ControlFlow::Continue(Default::default()))
485    }
486}