Skip to main content

ab_chacha8/
lib.rs

1//! Small GPU-friendly ([rust-gpu]) software implementation of ChaCha8
2//!
3//! [rust-gpu]: https://github.com/rust-gpu/rust-gpu
4//!
5//! Does not require a standard library (`no_std`) or an allocator.
6
7#![no_std]
8
9#[cfg(test)]
10mod tests;
11
12/// A single ChaCha8 block
13pub type ChaCha8Block = [u32; 16];
14
15/// Convert block to bytes
16#[inline(always)]
17#[cfg_attr(feature = "no-panic", no_panic::no_panic)]
18pub fn block_to_bytes(block: &ChaCha8Block) -> [u8; 64] {
19    // SAFETY: Same size and no alignment requirements
20    unsafe { block.as_ptr().cast::<[u8; 64]>().read() }
21}
22
23/// Create an instance from internal representation
24#[inline(always)]
25#[cfg_attr(feature = "no-panic", no_panic::no_panic)]
26pub fn bytes_to_block(bytes: &[u8; 64]) -> ChaCha8Block {
27    // SAFETY: Same size, all bit patterns are valid
28    unsafe { bytes.as_ptr().cast::<ChaCha8Block>().read_unaligned() }
29}
30
31/// State of ChaCha8 cipher
32#[derive(Debug, Copy, Clone)]
33pub struct ChaCha8State {
34    data: ChaCha8Block,
35}
36
37impl ChaCha8State {
38    const ROUNDS: usize = 8;
39
40    /// Initialize ChaCha8 state
41    #[inline]
42    #[cfg_attr(feature = "no-panic", no_panic::no_panic)]
43    pub fn init(key: &[u8; 32], nonce: &[u8; 12]) -> Self {
44        let mut data = [0u32; 16];
45        data[0] = 0x61707865;
46        data[1] = 0x3320646e;
47        data[2] = 0x79622d32;
48        data[3] = 0x6b206574;
49
50        for (i, &chunk) in key.as_chunks::<4>().0.iter().enumerate() {
51            data[4 + i] = u32::from_le_bytes(chunk);
52        }
53
54        // `data[12]` and `data[13]` is counter specific to each block, thus not set here
55
56        for (i, &chunk) in nonce.as_chunks::<4>().0.iter().enumerate() {
57            data[13 + i] = u32::from_le_bytes(chunk);
58        }
59
60        Self { data }
61    }
62
63    /// Convert to internal representation
64    #[inline(always)]
65    #[cfg_attr(feature = "no-panic", no_panic::no_panic)]
66    pub fn to_repr(self) -> ChaCha8Block {
67        self.data
68    }
69
70    /// Create an instance from internal representation
71    #[inline(always)]
72    #[cfg_attr(feature = "no-panic", no_panic::no_panic)]
73    pub fn from_repr(data: ChaCha8Block) -> Self {
74        Self { data }
75    }
76
77    /// Compute block for specified counter.
78    ///
79    /// Counter is only 32-bit because that is all that is needed for target use case.
80    #[inline(always)]
81    #[cfg_attr(feature = "no-panic", no_panic::no_panic)]
82    pub fn compute_block(mut self, counter: u32) -> ChaCha8Block {
83        self.data[12] = counter;
84        // Not setting `data[13]` due to counter being limited to `u32`
85
86        let initial = self.data;
87
88        for _ in 0..Self::ROUNDS / 2 {
89            self.quarter_round(0, 4, 8, 12);
90            self.quarter_round(1, 5, 9, 13);
91            self.quarter_round(2, 6, 10, 14);
92            self.quarter_round(3, 7, 11, 15);
93
94            self.quarter_round(0, 5, 10, 15);
95            self.quarter_round(1, 6, 11, 12);
96            self.quarter_round(2, 7, 8, 13);
97            self.quarter_round(3, 4, 9, 14);
98        }
99
100        // TODO: More idiomatic version currently doesn't compile:
101        //  https://github.com/Rust-GPU/rust-gpu/issues/241#issuecomment-3005693043
102        #[expect(clippy::needless_range_loop)]
103        // for (d, initial) in self.data.iter_mut().zip(initial) {
104        //     *d = d.wrapping_add(initial);
105        // }
106        for i in 0..16 {
107            self.data[i] = self.data[i].wrapping_add(initial[i]);
108        }
109
110        self.data
111    }
112
113    #[inline(always)]
114    #[cfg_attr(feature = "no-panic", no_panic::no_panic)]
115    fn quarter_round(&mut self, a: usize, b: usize, c: usize, d: usize) {
116        self.data[a] = self.data[a].wrapping_add(self.data[b]);
117        self.data[d] ^= self.data[a];
118        self.data[d] = self.data[d].rotate_left(16);
119
120        self.data[c] = self.data[c].wrapping_add(self.data[d]);
121        self.data[b] ^= self.data[c];
122        self.data[b] = self.data[b].rotate_left(12);
123
124        self.data[a] = self.data[a].wrapping_add(self.data[b]);
125        self.data[d] ^= self.data[a];
126        self.data[d] = self.data[d].rotate_left(8);
127
128        self.data[c] = self.data[c].wrapping_add(self.data[d]);
129        self.data[b] ^= self.data[c];
130        self.data[b] = self.data[b].rotate_left(7);
131    }
132}