Skip to main content

ab_riscv_interpreter/v/zve64x/
reduction.rs

1//! Zve64x integer reduction instructions
2
3#[cfg(test)]
4mod tests;
5pub mod zve64x_reduction_helpers;
6
7use crate::v::vector_registers::VectorRegistersExt;
8use crate::v::zve64x::arith::zve64x_arith_helpers;
9use crate::v::zve64x::zve64x_helpers;
10use crate::{ExecutableInstruction, ExecutionError, ProgramCounter, RegisterFile, VirtualMemory};
11use ab_riscv_macros::instruction_execution;
12use ab_riscv_primitives::prelude::*;
13use core::fmt;
14use core::ops::ControlFlow;
15
16#[instruction_execution]
17impl<Reg, Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
18    ExecutableInstruction<Regs, ExtState, Memory, PC, InstructionHandler, CustomError>
19    for Zve64xReductionInstruction<Reg>
20where
21    Reg: Register,
22    Regs: RegisterFile<Reg>,
23    ExtState: VectorRegistersExt<Reg, CustomError>,
24    [(); ExtState::ELEN as usize]:,
25    [(); ExtState::VLEN as usize]:,
26    [(); ExtState::VLENB as usize]:,
27    Memory: VirtualMemory,
28    PC: ProgramCounter<Reg::Type, Memory, CustomError>,
29    CustomError: fmt::Debug,
30{
31    #[inline(always)]
32    fn execute(
33        self,
34        _regs: &mut Regs,
35        ext_state: &mut ExtState,
36        _memory: &mut Memory,
37        program_counter: &mut PC,
38        _system_instruction_handler: &mut InstructionHandler,
39    ) -> Result<ControlFlow<()>, ExecutionError<Reg::Type, CustomError>> {
40        match self {
41            Self::Vredsum { vd, vs2, vs1, vm } => {
42                if !ext_state.vector_instructions_allowed() {
43                    Err(ExecutionError::IllegalInstruction {
44                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
45                    })?;
46                }
47                let vtype = ext_state
48                    .vtype()
49                    .ok_or(ExecutionError::IllegalInstruction {
50                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
51                    })?;
52                // Spec ยง14: reductions with vstart > 0 are reserved; raise illegal instruction
53                if u32::from(ext_state.vstart()) != 0 {
54                    Err(ExecutionError::IllegalInstruction {
55                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
56                    })?;
57                }
58                let group_regs = vtype.vlmul().register_count();
59                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
60                    program_counter,
61                    vs2,
62                    group_regs,
63                )?;
64                let sew = vtype.vsew();
65                let vl = ext_state.vl();
66                // SAFETY: `vs2` alignment checked; `vstart == 0` checked;
67                // `vs1` and `vd` are single-register scalar operands
68                unsafe {
69                    zve64x_reduction_helpers::execute_reduce_op(
70                        ext_state,
71                        vd,
72                        vs2,
73                        vs1,
74                        vm,
75                        vl,
76                        sew,
77                        |acc, elem, _sew| acc.wrapping_add(elem),
78                    );
79                }
80            }
81            Self::Vredand { vd, vs2, vs1, vm } => {
82                if !ext_state.vector_instructions_allowed() {
83                    Err(ExecutionError::IllegalInstruction {
84                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
85                    })?;
86                }
87                let vtype = ext_state
88                    .vtype()
89                    .ok_or(ExecutionError::IllegalInstruction {
90                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
91                    })?;
92                if u32::from(ext_state.vstart()) != 0 {
93                    Err(ExecutionError::IllegalInstruction {
94                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
95                    })?;
96                }
97                let group_regs = vtype.vlmul().register_count();
98                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
99                    program_counter,
100                    vs2,
101                    group_regs,
102                )?;
103                let sew = vtype.vsew();
104                let vl = ext_state.vl();
105                // SAFETY: see `Vredsum`
106                unsafe {
107                    zve64x_reduction_helpers::execute_reduce_op(
108                        ext_state,
109                        vd,
110                        vs2,
111                        vs1,
112                        vm,
113                        vl,
114                        sew,
115                        |acc, elem, _sew| acc & elem,
116                    );
117                }
118            }
119            Self::Vredor { vd, vs2, vs1, vm } => {
120                if !ext_state.vector_instructions_allowed() {
121                    Err(ExecutionError::IllegalInstruction {
122                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
123                    })?;
124                }
125                let vtype = ext_state
126                    .vtype()
127                    .ok_or(ExecutionError::IllegalInstruction {
128                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
129                    })?;
130                if u32::from(ext_state.vstart()) != 0 {
131                    Err(ExecutionError::IllegalInstruction {
132                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
133                    })?;
134                }
135                let group_regs = vtype.vlmul().register_count();
136                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
137                    program_counter,
138                    vs2,
139                    group_regs,
140                )?;
141                let sew = vtype.vsew();
142                let vl = ext_state.vl();
143                // SAFETY: see `Vredsum`
144                unsafe {
145                    zve64x_reduction_helpers::execute_reduce_op(
146                        ext_state,
147                        vd,
148                        vs2,
149                        vs1,
150                        vm,
151                        vl,
152                        sew,
153                        |acc, elem, _sew| acc | elem,
154                    );
155                }
156            }
157            Self::Vredxor { vd, vs2, vs1, vm } => {
158                if !ext_state.vector_instructions_allowed() {
159                    Err(ExecutionError::IllegalInstruction {
160                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
161                    })?;
162                }
163                let vtype = ext_state
164                    .vtype()
165                    .ok_or(ExecutionError::IllegalInstruction {
166                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
167                    })?;
168                if u32::from(ext_state.vstart()) != 0 {
169                    Err(ExecutionError::IllegalInstruction {
170                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
171                    })?;
172                }
173                let group_regs = vtype.vlmul().register_count();
174                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
175                    program_counter,
176                    vs2,
177                    group_regs,
178                )?;
179                let sew = vtype.vsew();
180                let vl = ext_state.vl();
181                // SAFETY: see `Vredsum`
182                unsafe {
183                    zve64x_reduction_helpers::execute_reduce_op(
184                        ext_state,
185                        vd,
186                        vs2,
187                        vs1,
188                        vm,
189                        vl,
190                        sew,
191                        |acc, elem, _sew| acc ^ elem,
192                    );
193                }
194            }
195            Self::Vredminu { vd, vs2, vs1, vm } => {
196                if !ext_state.vector_instructions_allowed() {
197                    Err(ExecutionError::IllegalInstruction {
198                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
199                    })?;
200                }
201                let vtype = ext_state
202                    .vtype()
203                    .ok_or(ExecutionError::IllegalInstruction {
204                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
205                    })?;
206                if u32::from(ext_state.vstart()) != 0 {
207                    Err(ExecutionError::IllegalInstruction {
208                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
209                    })?;
210                }
211                let group_regs = vtype.vlmul().register_count();
212                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
213                    program_counter,
214                    vs2,
215                    group_regs,
216                )?;
217                let sew = vtype.vsew();
218                let vl = ext_state.vl();
219                // SAFETY: see `Vredsum`
220                unsafe {
221                    zve64x_reduction_helpers::execute_reduce_op(
222                        ext_state,
223                        vd,
224                        vs2,
225                        vs1,
226                        vm,
227                        vl,
228                        sew,
229                        |acc, elem, sew| {
230                            let mask = zve64x_arith_helpers::sew_mask(sew);
231                            if elem & mask < acc & mask { elem } else { acc }
232                        },
233                    );
234                }
235            }
236            Self::Vredmin { vd, vs2, vs1, vm } => {
237                if !ext_state.vector_instructions_allowed() {
238                    Err(ExecutionError::IllegalInstruction {
239                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
240                    })?;
241                }
242                let vtype = ext_state
243                    .vtype()
244                    .ok_or(ExecutionError::IllegalInstruction {
245                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
246                    })?;
247                if u32::from(ext_state.vstart()) != 0 {
248                    Err(ExecutionError::IllegalInstruction {
249                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
250                    })?;
251                }
252                let group_regs = vtype.vlmul().register_count();
253                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
254                    program_counter,
255                    vs2,
256                    group_regs,
257                )?;
258                let sew = vtype.vsew();
259                let vl = ext_state.vl();
260                // SAFETY: see `Vredsum`
261                unsafe {
262                    zve64x_reduction_helpers::execute_reduce_op(
263                        ext_state,
264                        vd,
265                        vs2,
266                        vs1,
267                        vm,
268                        vl,
269                        sew,
270                        |acc, elem, sew| {
271                            if zve64x_arith_helpers::sign_extend(elem, sew)
272                                < zve64x_arith_helpers::sign_extend(acc, sew)
273                            {
274                                elem
275                            } else {
276                                acc
277                            }
278                        },
279                    );
280                }
281            }
282            Self::Vredmaxu { vd, vs2, vs1, vm } => {
283                if !ext_state.vector_instructions_allowed() {
284                    Err(ExecutionError::IllegalInstruction {
285                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
286                    })?;
287                }
288                let vtype = ext_state
289                    .vtype()
290                    .ok_or(ExecutionError::IllegalInstruction {
291                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
292                    })?;
293                if u32::from(ext_state.vstart()) != 0 {
294                    Err(ExecutionError::IllegalInstruction {
295                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
296                    })?;
297                }
298                let group_regs = vtype.vlmul().register_count();
299                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
300                    program_counter,
301                    vs2,
302                    group_regs,
303                )?;
304                let sew = vtype.vsew();
305                let vl = ext_state.vl();
306                // SAFETY: see `Vredsum`
307                unsafe {
308                    zve64x_reduction_helpers::execute_reduce_op(
309                        ext_state,
310                        vd,
311                        vs2,
312                        vs1,
313                        vm,
314                        vl,
315                        sew,
316                        |acc, elem, sew| {
317                            let mask = zve64x_arith_helpers::sew_mask(sew);
318                            if elem & mask > acc & mask { elem } else { acc }
319                        },
320                    );
321                }
322            }
323            Self::Vredmax { vd, vs2, vs1, vm } => {
324                if !ext_state.vector_instructions_allowed() {
325                    Err(ExecutionError::IllegalInstruction {
326                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
327                    })?;
328                }
329                let vtype = ext_state
330                    .vtype()
331                    .ok_or(ExecutionError::IllegalInstruction {
332                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
333                    })?;
334                if u32::from(ext_state.vstart()) != 0 {
335                    Err(ExecutionError::IllegalInstruction {
336                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
337                    })?;
338                }
339                let group_regs = vtype.vlmul().register_count();
340                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
341                    program_counter,
342                    vs2,
343                    group_regs,
344                )?;
345                let sew = vtype.vsew();
346                let vl = ext_state.vl();
347                // SAFETY: see `Vredsum`
348                unsafe {
349                    zve64x_reduction_helpers::execute_reduce_op(
350                        ext_state,
351                        vd,
352                        vs2,
353                        vs1,
354                        vm,
355                        vl,
356                        sew,
357                        |acc, elem, sew| {
358                            if zve64x_arith_helpers::sign_extend(elem, sew)
359                                > zve64x_arith_helpers::sign_extend(acc, sew)
360                            {
361                                elem
362                            } else {
363                                acc
364                            }
365                        },
366                    );
367                }
368            }
369            Self::Vwredsumu { vd, vs2, vs1, vm } => {
370                if !ext_state.vector_instructions_allowed() {
371                    Err(ExecutionError::IllegalInstruction {
372                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
373                    })?;
374                }
375                let vtype = ext_state
376                    .vtype()
377                    .ok_or(ExecutionError::IllegalInstruction {
378                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
379                    })?;
380                if u32::from(ext_state.vstart()) != 0 {
381                    Err(ExecutionError::IllegalInstruction {
382                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
383                    })?;
384                }
385                // Widening: 2*SEW must fit in ELEN
386                if u32::from(vtype.vsew().bits()) * 2 > ExtState::ELEN {
387                    Err(ExecutionError::IllegalInstruction {
388                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
389                    })?;
390                }
391                let group_regs = vtype.vlmul().register_count();
392                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
393                    program_counter,
394                    vs2,
395                    group_regs,
396                )?;
397                let sew = vtype.vsew();
398                let vl = ext_state.vl();
399                // SAFETY: `vs2` alignment checked; widening SEW constraint checked above;
400                // `vstart == 0` checked; `vd` and `vs1` are single-register 2*SEW scalar operands
401                unsafe {
402                    zve64x_reduction_helpers::execute_widening_reduce_op(
403                        ext_state,
404                        vd,
405                        vs2,
406                        vs1,
407                        vm,
408                        vl,
409                        sew,
410                        // Zero-extend vs2 elements then accumulate
411                        |acc, elem, _sew| acc.wrapping_add(elem),
412                        false,
413                    );
414                }
415            }
416            Self::Vwredsum { vd, vs2, vs1, vm } => {
417                if !ext_state.vector_instructions_allowed() {
418                    Err(ExecutionError::IllegalInstruction {
419                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
420                    })?;
421                }
422                let vtype = ext_state
423                    .vtype()
424                    .ok_or(ExecutionError::IllegalInstruction {
425                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
426                    })?;
427                if u32::from(ext_state.vstart()) != 0 {
428                    Err(ExecutionError::IllegalInstruction {
429                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
430                    })?;
431                }
432                if u32::from(vtype.vsew().bits()) * 2 > ExtState::ELEN {
433                    Err(ExecutionError::IllegalInstruction {
434                        address: program_counter.old_pc(zve64x_helpers::INSTRUCTION_SIZE),
435                    })?;
436                }
437                let group_regs = vtype.vlmul().register_count();
438                zve64x_arith_helpers::check_vreg_group_alignment::<Reg, _, _, _>(
439                    program_counter,
440                    vs2,
441                    group_regs,
442                )?;
443                let sew = vtype.vsew();
444                let vl = ext_state.vl();
445                // SAFETY: see `Vwredsumu`
446                unsafe {
447                    zve64x_reduction_helpers::execute_widening_reduce_op(
448                        ext_state,
449                        vd,
450                        vs2,
451                        vs1,
452                        vm,
453                        vl,
454                        sew,
455                        // Sign-extend vs2 elements then accumulate
456                        |acc, elem, _sew| acc.wrapping_add(elem),
457                        true,
458                    );
459                }
460            }
461            Self::PhantomZve64xReduction(_) => unreachable!("Never constructed"),
462        }
463
464        Ok(ControlFlow::Continue(()))
465    }
466}