Skip to main content

ab_merkle_tree/
balanced.rs

1use crate::{hash_pair, hash_pair_block, hash_pairs};
2use ab_blake3::{BLOCK_LEN, OUT_LEN};
3#[cfg(feature = "alloc")]
4use alloc::boxed::Box;
5use core::iter::TrustedLen;
6use core::mem;
7use core::mem::MaybeUninit;
8use core::num::NonZero;
9
10/// Optimal number of blocks for hashing at once to saturate BLAKE3 SIMD on any hardware
11const BATCH_HASH_NUM_BLOCKS: usize = 16;
12/// Number of leaves that corresponds to [`BATCH_HASH_NUM_BLOCKS`]
13const BATCH_HASH_NUM_LEAVES: usize = BATCH_HASH_NUM_BLOCKS * BLOCK_LEN / OUT_LEN;
14
15/// Inner function used in [`BalancedMerkleTree::compute_root_only()`] for stack allocation, only
16/// public due to use in generic bounds
17pub const fn compute_root_only_large_stack_size(n: usize) -> usize {
18    // For small trees the large stack is not used, so the returned value does not matter as long as
19    // it compiles
20    if n < BATCH_HASH_NUM_LEAVES {
21        return 1;
22    }
23
24    (n / BATCH_HASH_NUM_LEAVES).ilog2() as usize + 1
25}
26
27/// Ensuring only supported `N` can be specified for [`BalancedMerkleTree`].
28///
29/// This is essentially a workaround for the current Rust type system constraints that do not allow
30/// a nicer way to do the same thing at compile time.
31pub const fn ensure_supported_n(n: usize) -> usize {
32    assert!(
33        n.is_power_of_two(),
34        "Balanced Merkle Tree must have a number of leaves that is a power of 2"
35    );
36
37    assert!(
38        n > 1,
39        "This Balanced Merkle Tree must have more than one leaf"
40    );
41
42    0
43}
44
45/// Merkle Tree variant that has hash-sized leaves and is fully balanced according to configured
46/// generic parameter.
47///
48/// This can be considered a general case of [`UnbalancedMerkleTree`]. The root and proofs are
49/// identical for both in case the number of leaves is a power of two. For the number of leaves that
50/// is a power of two [`UnbalancedMerkleTree`] is useful when a single proof needs to be generated
51/// and the number of leaves is very large (it can generate proofs with very little RAM usage
52/// compared to this version).
53///
54/// [`UnbalancedMerkleTree`]: crate::unbalanced::UnbalancedMerkleTree
55///
56/// This Merkle Tree implementation is best suited for use cases when proofs for all (or most) of
57/// the elements need to be generated and the whole tree easily fits into memory. It can also be
58/// constructed and proofs can be generated efficiently without heap allocations.
59///
60/// With all parameters of the tree known statically, it results in the most efficient version of
61/// the code being generated for a given set of parameters.
62#[derive(Debug)]
63pub struct BalancedMerkleTree<'a, const N: usize>
64where
65    [(); N - 1]:,
66{
67    leaves: &'a [[u8; OUT_LEN]],
68    // This tree doesn't include leaves because we have them in `leaves` field
69    tree: [[u8; OUT_LEN]; N - 1],
70}
71
72// TODO: Optimize by implementing SIMD-accelerated hashing of multiple values:
73//  https://github.com/BLAKE3-team/BLAKE3/issues/478
74impl<'a, const N: usize> BalancedMerkleTree<'a, N>
75where
76    [(); N - 1]:,
77    [(); ensure_supported_n(N)]:,
78{
79    /// Create a new tree from a fixed set of elements.
80    ///
81    /// The data structure is statically allocated and might be too large to fit on the stack!
82    /// If that is the case, use `new_boxed()` method.
83    // TODO: Unlock on RISC-V, it started failing since https://github.com/nazar-pc/abundance/pull/551
84    //  for unknown reason
85    #[cfg_attr(
86        all(feature = "no-panic", not(target_arch = "riscv64")),
87        no_panic::no_panic
88    )]
89    pub fn new(leaves: &'a [[u8; OUT_LEN]; N]) -> Self {
90        let mut tree = [MaybeUninit::<[u8; OUT_LEN]>::uninit(); _];
91
92        Self::init_internal(leaves, &mut tree);
93
94        Self {
95            leaves,
96            // SAFETY: Statically guaranteed for all elements to be initialized
97            tree: unsafe { tree.transpose().assume_init() },
98        }
99    }
100
101    /// Like [`Self::new()`], but used pre-allocated memory for instantiation
102    // TODO: Unlock on RISC-V, it started failing since https://github.com/nazar-pc/abundance/pull/551
103    //  for unknown reason
104    #[cfg_attr(
105        all(feature = "no-panic", not(target_arch = "riscv64")),
106        no_panic::no_panic
107    )]
108    pub fn new_in<'b>(
109        instance: &'b mut MaybeUninit<Self>,
110        leaves: &'a [[u8; OUT_LEN]; N],
111    ) -> &'b mut Self {
112        let instance_ptr = instance.as_mut_ptr();
113        // SAFETY: Valid and correctly aligned non-null pointer
114        unsafe {
115            (&raw mut (*instance_ptr).leaves).write(leaves);
116        }
117        let tree = {
118            // SAFETY: Valid and correctly aligned non-null pointer
119            let tree_ptr = unsafe { &raw mut (*instance_ptr).tree };
120            // SAFETY: Allocated and correctly aligned uninitialized data
121            unsafe {
122                tree_ptr
123                    .cast::<[MaybeUninit<[u8; OUT_LEN]>; N - 1]>()
124                    .as_mut_unchecked()
125            }
126        };
127
128        Self::init_internal(leaves, tree);
129
130        // SAFETY: Initialized field by field above
131        unsafe { instance.assume_init_mut() }
132    }
133
134    /// Like [`Self::new()`], but creates heap-allocated instance, avoiding excessive stack usage
135    /// for large trees
136    #[cfg(feature = "alloc")]
137    pub fn new_boxed(leaves: &'a [[u8; OUT_LEN]; N]) -> Box<Self> {
138        let mut instance = Box::<Self>::new_uninit();
139
140        Self::new_in(&mut instance, leaves);
141
142        // SAFETY: Initialized by constructor above
143        unsafe { instance.assume_init() }
144    }
145
146    // TODO: Unlock on RISC-V, it started failing since https://github.com/nazar-pc/abundance/pull/551
147    //  for unknown reason
148    #[cfg_attr(
149        all(feature = "no-panic", not(target_arch = "riscv64")),
150        no_panic::no_panic
151    )]
152    fn init_internal(leaves: &[[u8; OUT_LEN]; N], tree: &mut [MaybeUninit<[u8; OUT_LEN]>; N - 1]) {
153        let mut tree_hashes = tree.as_mut_slice();
154        let mut level_hashes = leaves.as_slice();
155
156        while level_hashes.len() > 1 {
157            let num_pairs = level_hashes.len() / 2;
158            let parent_hashes;
159            // SAFETY: The size of the tree is statically known to match the number of leaves and
160            // levels of hashes
161            (parent_hashes, tree_hashes) = unsafe { tree_hashes.split_at_mut_unchecked(num_pairs) };
162
163            if parent_hashes.len().is_multiple_of(BATCH_HASH_NUM_BLOCKS) {
164                // SAFETY: Just checked to be a multiple of chunk size and not empty
165                let parent_hashes_chunks =
166                    unsafe { parent_hashes.as_chunks_unchecked_mut::<BATCH_HASH_NUM_BLOCKS>() };
167                for (pairs, hashes) in level_hashes
168                    .as_chunks::<BATCH_HASH_NUM_LEAVES>()
169                    .0
170                    .iter()
171                    .zip(parent_hashes_chunks)
172                {
173                    // TODO: Would be nice to have a convenient method for this:
174                    //  https://github.com/rust-lang/rust/pull/145504#pullrequestreview-3788155275
175                    // SAFETY: Identical layout
176                    let hashes = unsafe {
177                        mem::transmute::<
178                            &mut [MaybeUninit<[u8; OUT_LEN]>; BATCH_HASH_NUM_BLOCKS],
179                            &mut MaybeUninit<[[u8; OUT_LEN]; BATCH_HASH_NUM_BLOCKS]>,
180                        >(hashes)
181                    };
182
183                    // TODO: This memory copy is unfortunate, make hashing write into this memory
184                    //  directly once blake3 API improves
185                    hashes.write(hash_pairs(pairs));
186                }
187            } else {
188                for (pair, parent_hash) in level_hashes
189                    .as_chunks()
190                    .0
191                    .iter()
192                    .zip(parent_hashes.iter_mut())
193                {
194                    // SAFETY: Same size and alignment
195                    let pair = unsafe {
196                        mem::transmute::<&[[u8; OUT_LEN]; BLOCK_LEN / OUT_LEN], &[u8; BLOCK_LEN]>(
197                            pair,
198                        )
199                    };
200                    parent_hash.write(hash_pair_block(pair));
201                }
202            }
203
204            // SAFETY: Just initialized
205            level_hashes = unsafe { parent_hashes.assume_init_ref() };
206        }
207    }
208
209    // TODO: Method that generates not only root, but also proof, like Unbalanced Merkle Tree
210    /// Compute Merkle Tree root.
211    ///
212    /// This is functionally equivalent to creating an instance first and calling [`Self::root()`]
213    /// method, but is faster and avoids heap allocation when root is the only thing that is needed.
214    #[inline]
215    #[cfg_attr(feature = "no-panic", no_panic::no_panic)]
216    pub fn compute_root_only(leaves: &[[u8; OUT_LEN]; N]) -> [u8; OUT_LEN]
217    where
218        [(); N.ilog2() as usize + 1]:,
219        [(); compute_root_only_large_stack_size(N)]:,
220    {
221        // Special case for small trees below optimal SIMD width
222        match N {
223            2 => {
224                let [root] = hash_pairs(leaves);
225
226                return root;
227            }
228            4 => {
229                let hashes = hash_pairs::<2, _>(leaves);
230                let [root] = hash_pairs(&hashes);
231
232                return root;
233            }
234            8 => {
235                let hashes = hash_pairs::<4, _>(leaves);
236                let hashes = hash_pairs::<2, _>(&hashes);
237                let [root] = hash_pairs(&hashes);
238
239                return root;
240            }
241            16 => {
242                let hashes = hash_pairs::<8, _>(leaves);
243                let hashes = hash_pairs::<4, _>(&hashes);
244                let hashes = hash_pairs::<2, _>(&hashes);
245                let [root] = hash_pairs(&hashes);
246
247                return root;
248            }
249            _ => {
250                // We know this is the case
251                assert!(N >= BATCH_HASH_NUM_LEAVES);
252            }
253        }
254
255        // Stack of intermediate nodes per tree level. The logic here is the same as with a small
256        // tree above, except we store `BATCH_HASH_NUM_BLOCKS` hashes per level and do a
257        // post-processing step at the very end to collapse them into a single root hash.
258        let mut stack =
259            [[[0u8; OUT_LEN]; BATCH_HASH_NUM_BLOCKS]; compute_root_only_large_stack_size(N)];
260
261        // This variable allows reusing and reducing stack usage instead of having a separate
262        // `current` variable
263        let mut parent_current = [[0u8; OUT_LEN]; BATCH_HASH_NUM_LEAVES];
264        for (num_chunks, chunk_leaves) in leaves
265            .as_chunks::<BATCH_HASH_NUM_LEAVES>()
266            .0
267            .iter()
268            .enumerate()
269        {
270            let current_half = &mut parent_current[BATCH_HASH_NUM_BLOCKS..];
271
272            let current = hash_pairs::<BATCH_HASH_NUM_BLOCKS, _>(chunk_leaves);
273            current_half.copy_from_slice(&current);
274
275            // Every bit set to `1` corresponds to an active Merkle Tree level
276            let lowest_active_levels = num_chunks.trailing_ones() as usize;
277            for parent in &mut stack[..lowest_active_levels] {
278                let parent_half = &mut parent_current[..BATCH_HASH_NUM_BLOCKS];
279                parent_half.copy_from_slice(parent);
280
281                let current = hash_pairs::<BATCH_HASH_NUM_BLOCKS, _>(&parent_current);
282
283                let current_half = &mut parent_current[BATCH_HASH_NUM_BLOCKS..];
284                current_half.copy_from_slice(&current);
285            }
286
287            let current_half = &mut parent_current[BATCH_HASH_NUM_BLOCKS..];
288
289            // Place freshly computed 8 hashes into the first inactive level
290            stack[lowest_active_levels].copy_from_slice(current_half);
291        }
292
293        let hashes = &mut stack[compute_root_only_large_stack_size(N) - 1];
294        let hashes = hash_pairs::<{ BATCH_HASH_NUM_BLOCKS / 2 }, _>(hashes);
295        let hashes = hash_pairs::<{ BATCH_HASH_NUM_BLOCKS / 4 }, _>(&hashes);
296        let hashes = hash_pairs::<{ BATCH_HASH_NUM_BLOCKS / 8 }, _>(&hashes);
297        let [root] = hash_pairs::<{ BATCH_HASH_NUM_BLOCKS / 16 }, _>(&hashes);
298
299        root
300    }
301
302    /// Get the root of Merkle Tree
303    #[inline]
304    #[cfg_attr(feature = "no-panic", no_panic::no_panic)]
305    pub fn root(&self) -> [u8; OUT_LEN] {
306        *self
307            .tree
308            .last()
309            .or(self.leaves.last())
310            .expect("There is always at least one leaf hash; qed")
311    }
312
313    /// Iterator over proofs in the same order as provided leaf hashes
314    #[cfg_attr(feature = "no-panic", no_panic::no_panic)]
315    pub fn all_proofs(
316        &self,
317    ) -> impl ExactSizeIterator<Item = [[u8; OUT_LEN]; N.ilog2() as usize]> + TrustedLen
318    where
319        [(); N.ilog2() as usize]:,
320    {
321        let iter = self.leaves.as_chunks().0.iter().enumerate().flat_map(
322            |(pair_index, &[left_hash, right_hash])| {
323                let mut left_proof = [MaybeUninit::<[u8; OUT_LEN]>::uninit(); _];
324                left_proof[0].write(right_hash);
325
326                let left_proof = {
327                    let shared_proof = &mut left_proof[1..];
328
329                    let mut tree_hashes = self.tree.as_slice();
330                    let mut parent_position = pair_index;
331                    let mut parent_level_size = N / 2;
332
333                    for hash in shared_proof {
334                        // The line below is a more efficient branchless version of this:
335                        // let parent_other_position = if parent_position % 2 == 0 {
336                        //     parent_position + 1
337                        // } else {
338                        //     parent_position - 1
339                        // };
340                        let parent_other_position = parent_position ^ 1;
341
342                        // SAFETY: Statically guaranteed to be present by constructor
343                        let other_hash =
344                            unsafe { tree_hashes.get_unchecked(parent_other_position) };
345                        hash.write(*other_hash);
346                        tree_hashes = &tree_hashes[parent_level_size..];
347
348                        parent_position /= 2;
349                        parent_level_size /= 2;
350                    }
351
352                    // SAFETY: Just initialized
353                    unsafe { left_proof.transpose().assume_init() }
354                };
355
356                let mut right_proof = left_proof;
357                right_proof[0] = left_hash;
358
359                [left_proof, right_proof]
360            },
361        );
362
363        ProofsIterator { iter, len: N }
364    }
365
366    /// Verify previously generated proof
367    #[inline]
368    #[cfg_attr(feature = "no-panic", no_panic::no_panic)]
369    pub fn verify(
370        root: &[u8; OUT_LEN],
371        proof: &[[u8; OUT_LEN]; N.ilog2() as usize],
372        leaf_index: usize,
373        leaf: [u8; OUT_LEN],
374    ) -> bool
375    where
376        [(); N.ilog2() as usize]:,
377    {
378        if leaf_index >= N {
379            return false;
380        }
381
382        let mut computed_root = leaf;
383
384        let mut position = leaf_index;
385        for hash in proof {
386            computed_root = if position.is_multiple_of(2) {
387                hash_pair(&computed_root, hash)
388            } else {
389                hash_pair(hash, &computed_root)
390            };
391
392            position /= 2;
393        }
394
395        root == &computed_root
396    }
397}
398
399struct ProofsIterator<Iter> {
400    iter: Iter,
401    len: usize,
402}
403
404impl<Iter> Iterator for ProofsIterator<Iter>
405where
406    Iter: Iterator,
407{
408    type Item = Iter::Item;
409
410    #[inline(always)]
411    #[cfg_attr(feature = "no-panic", no_panic::no_panic)]
412    fn next(&mut self) -> Option<Self::Item> {
413        let item = self.iter.next();
414        self.len = self.len.saturating_sub(1);
415        item
416    }
417
418    #[inline(always)]
419    fn size_hint(&self) -> (usize, Option<usize>) {
420        (self.len, Some(self.len))
421    }
422
423    #[inline(always)]
424    fn count(self) -> usize
425    where
426        Self: Sized,
427    {
428        self.len
429    }
430
431    #[inline(always)]
432    fn last(self) -> Option<Self::Item>
433    where
434        Self: Sized,
435    {
436        self.iter.last()
437    }
438
439    #[inline(always)]
440    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
441        self.len = self.len.saturating_sub(n);
442        self.iter.advance_by(n)
443    }
444
445    #[inline(always)]
446    fn nth(&mut self, n: usize) -> Option<Self::Item> {
447        self.len = self.len.saturating_sub(n.saturating_add(1));
448        self.iter.nth(n)
449    }
450}
451
452impl<Iter> ExactSizeIterator for ProofsIterator<Iter>
453where
454    Iter: Iterator,
455{
456    #[inline(always)]
457    fn len(&self) -> usize {
458        self.len
459    }
460}
461
462// SAFETY: The length is always exact and correct here
463unsafe impl<Iter> TrustedLen for ProofsIterator<Iter> where Iter: Iterator {}