ab_proof_of_space/chiapos/
table.rs

1mod rmap;
2#[cfg(test)]
3mod tests;
4pub(super) mod types;
5
6#[cfg(not(feature = "std"))]
7extern crate alloc;
8
9use crate::chiapos::Seed;
10use crate::chiapos::constants::{PARAM_B, PARAM_BC, PARAM_C, PARAM_EXT, PARAM_M};
11use crate::chiapos::table::rmap::Rmap;
12use crate::chiapos::table::types::{Metadata, Position, X, Y};
13use crate::chiapos::utils::EvaluatableUsize;
14use ab_chacha8::{ChaCha8Block, ChaCha8State};
15#[cfg(not(feature = "std"))]
16use alloc::boxed::Box;
17#[cfg(not(feature = "std"))]
18use alloc::vec;
19#[cfg(not(feature = "std"))]
20use alloc::vec::Vec;
21use chacha20::cipher::{Iv, KeyIvInit, StreamCipher};
22use chacha20::{ChaCha8, Key};
23#[cfg(any(feature = "parallel", test))]
24use core::cell::SyncUnsafeCell;
25use core::mem::MaybeUninit;
26use core::simd::prelude::*;
27#[cfg(any(feature = "parallel", test))]
28use core::sync::atomic::{AtomicUsize, Ordering};
29use core::{array, mem};
30use seq_macro::seq;
31
32pub(super) const COMPUTE_F1_SIMD_FACTOR: usize = 8;
33const COMPUTE_FN_SIMD_FACTOR: usize = 16;
34const MAX_BUCKET_SIZE: usize = 512;
35const BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS: u8 = 128;
36/// Reducing bucket size for better performance.
37///
38/// The number should be sufficient to produce enough proofs for sector encoding with high
39/// probability.
40// TODO: Statistical analysis if possible.
41const REDUCED_BUCKETS_SIZE: usize = 272;
42/// Reducing matches count for better performance.
43///
44/// The number should be sufficient to produce enough proofs for sector encoding with high
45/// probability.
46// TODO: Statistical analysis if possible.
47const REDUCED_MATCHES_COUNT: usize = 288;
48#[cfg(any(feature = "parallel", test))]
49const CACHE_LINE_SIZE: usize = 64;
50
51const _: () = {
52    debug_assert!(REDUCED_BUCKETS_SIZE <= MAX_BUCKET_SIZE);
53    debug_assert!(REDUCED_MATCHES_COUNT <= MAX_BUCKET_SIZE);
54};
55
56/// Compute the size of `y` in bits
57pub(super) const fn y_size_bits(k: u8) -> usize {
58    k as usize + PARAM_EXT as usize
59}
60
61/// Metadata size in bytes
62pub const fn metadata_size_bytes(k: u8, table_number: u8) -> usize {
63    metadata_size_bits(k, table_number).div_ceil(u8::BITS as usize)
64}
65
66/// Metadata size in bits
67pub(super) const fn metadata_size_bits(k: u8, table_number: u8) -> usize {
68    k as usize
69        * match table_number {
70            1 => 1,
71            2 => 2,
72            3 | 4 => 4,
73            5 => 3,
74            6 => 2,
75            7 => 0,
76            _ => unreachable!(),
77        }
78}
79
80/// Number of buckets for a given `k`
81pub const fn num_buckets(k: u8) -> usize {
82    2_usize
83        .pow(y_size_bits(k) as u32)
84        .div_ceil(usize::from(PARAM_BC))
85}
86
87#[cfg(any(feature = "parallel", test))]
88#[inline(always)]
89fn strip_sync_unsafe_cell<const N: usize, T>(value: Box<[SyncUnsafeCell<T>; N]>) -> Box<[T; N]> {
90    // SAFETY: `SyncUnsafeCell` has the same layout as `T`
91    unsafe { Box::from_raw(Box::into_raw(value).cast()) }
92}
93
94/// ChaCha8 [`Vec`] sufficient for the whole first table for [`K`].
95/// Prefer [`partial_y`] if you need partial y just for a single `x`.
96fn partial_ys<const K: u8>(seed: Seed) -> Vec<u8> {
97    let output_len_bits = usize::from(K) * (1 << K);
98    let mut output = vec![0; output_len_bits.div_ceil(u8::BITS as usize)];
99
100    let key = Key::from(seed);
101    let iv = Iv::<ChaCha8>::default();
102
103    let mut cipher = ChaCha8::new(&key, &iv);
104
105    cipher.write_keystream(&mut output);
106
107    output
108}
109
110/// Calculate a probabilistic upper bound on the Chia bucket size for a given `k` and
111/// `security_bits` (security level).
112///
113/// This is based on a Chernoff bound for the Poisson distribution with mean
114/// `lambda = PARAM_BC / 2^PARAM_EXT`, ensuring the probability that any bucket exceeds the bound is
115/// less than `2^{-security_bits}`.
116/// The bound is lambda + ceil(sqrt(3 * lambda * (k + security_bits) * ln(2))).
117const fn bucket_size_upper_bound(k: u8, security_bits: u8) -> usize {
118    // Lambda is the expected number of entries in a bucket, approximated as
119    // `PARAM_BC / 2^PARAM_EXT`. It is independent of `k`.
120    const LAMBDA: u64 = PARAM_BC as u64 / 2u64.pow(PARAM_EXT as u32);
121    // Approximation of ln(2) as a fraction: ln(2) ≈ LN2_NUM / LN2_DEN.
122    // This allows integer-only computation of the square root term involving ln(2).
123    const LN2_NUM: u128 = 693147;
124    const LN2_DEN: u128 = 1000000;
125
126    // `k + security_bits` for the union bound over ~2^k intervals
127    let ks = k as u128 + security_bits as u128;
128    // Compute numerator for the expression under the square root:
129    // `3 * lambda * (k + security_bits) * LN2_NUM`
130    let num = 3u128 * LAMBDA as u128 * ks * LN2_NUM;
131    // Denominator for ln(2): `LN2_DEN`
132    let den = LN2_DEN;
133
134    let ceil_div: u128 = num.div_ceil(den);
135
136    // Binary search to find the smallest `x` such that `x * x * den >= num`,
137    // which computes `ceil(sqrt(num / den))` without floating-point.
138    // We use a custom binary search over `u64` range because binary search in the standard library
139    // operates on sorted slices, not directly on integer ranges for solving inequalities like this.
140    let mut low = 0u64;
141    let mut high = u64::MAX;
142    while low < high {
143        let mid = low + (high - low) / 2;
144        let left = (mid as u128) * (mid as u128);
145        if left >= ceil_div {
146            high = mid;
147        } else {
148            low = mid + 1;
149        }
150    }
151    let add_term = low;
152
153    (LAMBDA + add_term) as usize
154}
155
156fn group_by_buckets<const K: u8>(
157    ys: &[Y],
158) -> Box<[[Position; REDUCED_BUCKETS_SIZE]; num_buckets(K)]>
159where
160    [(); num_buckets(K)]:,
161{
162    let mut bucket_offsets = [0_u16; num_buckets(K)];
163    // SAFETY: Contents is `MaybeUninit`
164    let mut buckets = unsafe {
165        Box::<[[MaybeUninit<Position>; REDUCED_BUCKETS_SIZE]; num_buckets(K)]>::new_uninit()
166            .assume_init()
167    };
168
169    for (&y, position) in ys.iter().zip(Position::ZERO..) {
170        let y = u32::from(y);
171        let bucket_index = (y / u32::from(PARAM_BC)) as usize;
172
173        // SAFETY: Bucket is obtained using division by `PARAM_BC` and fits by definition
174        let bucket_offset = unsafe { bucket_offsets.get_unchecked_mut(bucket_index) };
175        // SAFETY: Bucket is obtained using division by `PARAM_BC` and fits by definition
176        let bucket = unsafe { buckets.get_unchecked_mut(bucket_index) };
177
178        if *bucket_offset < REDUCED_BUCKETS_SIZE as u16 {
179            bucket[*bucket_offset as usize].write(position);
180            *bucket_offset += 1;
181        }
182    }
183
184    for (bucket, initialized) in buckets.iter_mut().zip(bucket_offsets) {
185        bucket[usize::from(initialized)..].write_filled(Position::SENTINEL);
186    }
187
188    // SAFETY: All entries are initialized
189    unsafe { Box::from_raw(Box::into_raw(buckets).cast()) }
190}
191
192/// Similar to [`group_by_buckets()`], but processes buckets instead of a flat list of `y`s.
193///
194/// # Safety
195/// Iterator item is a list of potentially uninitialized `y`s and a number of initialized `y`s. The
196/// number of initialized `y`s must be correct.
197#[cfg(any(feature = "parallel", test))]
198unsafe fn group_by_buckets_from_buckets<'a, const K: u8, I>(
199    iter: I,
200) -> Box<[[Position; REDUCED_BUCKETS_SIZE]; num_buckets(K)]>
201where
202    I: Iterator<Item = (&'a [MaybeUninit<Y>; REDUCED_MATCHES_COUNT], usize)> + 'a,
203    [(); num_buckets(K)]:,
204{
205    let mut bucket_offsets = [0_u16; num_buckets(K)];
206    // SAFETY: Contents is `MaybeUninit`
207    let mut buckets = unsafe {
208        Box::<[[MaybeUninit<Position>; REDUCED_BUCKETS_SIZE]; num_buckets(K)]>::new_uninit()
209            .assume_init()
210    };
211
212    for ((ys, count), batch_start) in iter.zip((Position::ZERO..).step_by(REDUCED_MATCHES_COUNT)) {
213        // SAFETY: Function contract guarantees that `y`s are initialized
214        let ys = unsafe { ys[..count].assume_init_ref() };
215        for (&y, position) in ys.iter().zip(batch_start..) {
216            let y = u32::from(y);
217            let bucket_index = (y / u32::from(PARAM_BC)) as usize;
218
219            // SAFETY: Bucket is obtained using division by `PARAM_BC` and fits by definition
220            let bucket_offset = unsafe { bucket_offsets.get_unchecked_mut(bucket_index) };
221            // SAFETY: Bucket is obtained using division by `PARAM_BC` and fits by definition
222            let bucket = unsafe { buckets.get_unchecked_mut(bucket_index) };
223
224            if *bucket_offset < REDUCED_BUCKETS_SIZE as u16 {
225                bucket[*bucket_offset as usize].write(position);
226                *bucket_offset += 1;
227            }
228        }
229    }
230
231    for (bucket, initialized) in buckets.iter_mut().zip(bucket_offsets) {
232        bucket[usize::from(initialized)..].write_filled(Position::SENTINEL);
233    }
234
235    // SAFETY: All entries are initialized
236    unsafe { Box::from_raw(Box::into_raw(buckets).cast()) }
237}
238
239/// Mapping from `parity` to `r` to `m`
240type LeftTargets = [[Simd<u16, { PARAM_M as usize }>; PARAM_BC as usize]; 2];
241
242fn calculate_left_targets() -> Box<LeftTargets> {
243    let mut left_targets = Box::<LeftTargets>::new_uninit();
244    // SAFETY: Same layout and uninitialized in both cases
245    let left_targets_slice = unsafe {
246        mem::transmute::<
247            &mut MaybeUninit<[[Simd<u16, { PARAM_M as usize }>; PARAM_BC as usize]; 2]>,
248            &mut [[MaybeUninit<Simd<u16, { PARAM_M as usize }>>; PARAM_BC as usize]; 2],
249        >(left_targets.as_mut())
250    };
251
252    for parity in 0..=1 {
253        for r in 0..PARAM_BC {
254            let c = r / PARAM_C;
255
256            let mut arr = array::from_fn(|m| {
257                let m = m as u16;
258                ((c + m) % PARAM_B) * PARAM_C
259                    + (((2 * m + parity) * (2 * m + parity) + r) % PARAM_C)
260            });
261            arr.sort_unstable();
262            left_targets_slice[parity as usize][r as usize].write(Simd::from_array(arr));
263        }
264    }
265
266    // SAFETY: Initialized all entries
267    unsafe { left_targets.assume_init() }
268}
269
270fn calculate_left_target_on_demand(parity: u32, r: u32, m: u32) -> u32 {
271    let param_b = u32::from(PARAM_B);
272    let param_c = u32::from(PARAM_C);
273
274    ((r / param_c + m) % param_b) * param_c + (((2 * m + parity) * (2 * m + parity) + r) % param_c)
275}
276
277/// Caches that can be used to optimize the creation of multiple [`Tables`](super::Tables).
278#[derive(Debug, Clone)]
279pub struct TablesCache<const K: u8> {
280    left_targets: Box<LeftTargets>,
281}
282
283impl<const K: u8> Default for TablesCache<K> {
284    /// Create a new instance
285    fn default() -> Self {
286        Self {
287            left_targets: calculate_left_targets(),
288        }
289    }
290}
291
292#[derive(Debug, Copy, Clone)]
293struct Match {
294    left_position: Position,
295    left_y: Y,
296    right_position: Position,
297}
298
299/// `partial_y_offset` is in bits within `partial_y`
300pub(super) fn compute_f1<const K: u8>(x: X, seed: &Seed) -> Y {
301    let skip_bits = u32::from(K) * u32::from(x);
302    let skip_u32s = skip_bits / u32::BITS;
303    let partial_y_offset = skip_bits % u32::BITS;
304
305    const U32S_PER_BLOCK: usize = size_of::<ChaCha8Block>() / size_of::<u32>();
306
307    let initial_state = ChaCha8State::init(seed, &[0; _]);
308    let first_block_counter = skip_u32s / U32S_PER_BLOCK as u32;
309    let u32_in_first_block = skip_u32s as usize % U32S_PER_BLOCK;
310
311    let first_block = initial_state.compute_block(first_block_counter);
312    let hi = first_block[u32_in_first_block].to_be();
313
314    // TODO: Is SIMD version of `compute_block()` that produces two blocks at once possible?
315    let lo = if u32_in_first_block + 1 == U32S_PER_BLOCK {
316        // Spilled over into the second block
317        let second_block = initial_state.compute_block(first_block_counter + 1);
318        second_block[0].to_be()
319    } else {
320        first_block[u32_in_first_block + 1].to_be()
321    };
322
323    let partial_y = (u64::from(hi) << u32::BITS) | u64::from(lo);
324
325    let pre_y = partial_y >> (u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset);
326    let pre_y = pre_y as u32;
327    // Mask for clearing the rest of bits of `pre_y`.
328    let pre_y_mask = (u32::MAX << PARAM_EXT) & (u32::MAX >> (u32::BITS - u32::from(K + PARAM_EXT)));
329
330    // Extract `PARAM_EXT` most significant bits from `x` and store in the final offset of
331    // eventual `y` with the rest of bits being zero (`x` is `0..2^K`)
332    let pre_ext = u32::from(x) >> (K - PARAM_EXT);
333
334    // Combine all of the bits together:
335    // [padding zero bits][`K` bits rom `partial_y`][`PARAM_EXT` bits from `x`]
336    Y::from((pre_y & pre_y_mask) | pre_ext)
337}
338
339pub(super) fn compute_f1_simd<const K: u8>(
340    xs: Simd<u32, COMPUTE_F1_SIMD_FACTOR>,
341    partial_ys: &[u8; K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize],
342) -> [Y; COMPUTE_F1_SIMD_FACTOR] {
343    // Each element contains `K` desired bits of `partial_ys` in the final offset of eventual `ys`
344    // with the rest of bits being in undefined state
345    let pre_ys_bytes = array::from_fn(|i| {
346        let partial_y_offset = i * usize::from(K);
347        let partial_y_length =
348            (partial_y_offset % u8::BITS as usize + usize::from(K)).div_ceil(u8::BITS as usize);
349        let mut pre_y_bytes = 0u64.to_be_bytes();
350        pre_y_bytes[..partial_y_length].copy_from_slice(
351            &partial_ys[partial_y_offset / u8::BITS as usize..][..partial_y_length],
352        );
353
354        u64::from_be_bytes(pre_y_bytes)
355    });
356    let pre_ys_right_offset = array::from_fn(|i| {
357        let partial_y_offset = i as u32 * u32::from(K);
358        u64::from(u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset % u8::BITS)
359    });
360    let pre_ys = Simd::from_array(pre_ys_bytes) >> Simd::from_array(pre_ys_right_offset);
361
362    // Mask for clearing the rest of bits of `pre_ys`.
363    let pre_ys_mask = Simd::splat(
364        (u32::MAX << usize::from(PARAM_EXT))
365            & (u32::MAX >> (u32::BITS as usize - usize::from(K + PARAM_EXT))),
366    );
367
368    // Extract `PARAM_EXT` most significant bits from `xs` and store in the final offset of
369    // eventual `ys` with the rest of bits being in undefined state.
370    let pre_exts = xs >> Simd::splat(u32::from(K - PARAM_EXT));
371
372    // Combine all of the bits together:
373    // [padding zero bits][`K` bits rom `partial_y`][`PARAM_EXT` bits from `x`]
374    let ys = (pre_ys.cast() & pre_ys_mask) | pre_exts;
375
376    Y::array_from_repr(ys.to_array())
377}
378
379/// For verification use [`has_match`] instead.
380///
381/// # Safety
382/// Left and right bucket positions must correspond to the parent table.
383// TODO: Try to reduce the `matches` size further by processing `left_bucket` in chunks (like halves
384//  for example)
385unsafe fn find_matches_in_buckets<'a, const K: u8, const PARENT_TABLE_NUMBER: u8>(
386    left_bucket_index: u32,
387    left_bucket: &[Position; REDUCED_BUCKETS_SIZE],
388    right_bucket: &[Position; REDUCED_BUCKETS_SIZE],
389    parent_table: &Table<K, PARENT_TABLE_NUMBER>,
390    // `PARAM_M as usize * 2` corresponds to the upper bound number of matches a single `y` in the
391    // left bucket might have here
392    matches: &'a mut [MaybeUninit<Match>; REDUCED_MATCHES_COUNT + PARAM_M as usize * 2],
393    left_targets: &LeftTargets,
394) -> &'a [Match]
395where
396    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
397    [(); 1 << K]:,
398    [(); num_buckets(K)]:,
399{
400    let left_base = left_bucket_index * u32::from(PARAM_BC);
401    let right_base = left_base + u32::from(PARAM_BC);
402
403    let mut rmap = Rmap::new();
404    for &right_position in right_bucket {
405        if right_position == Position::SENTINEL {
406            break;
407        }
408        // SAFETY: Guaranteed by function contract
409        let y = unsafe { parent_table.y(right_position) };
410        let r = u32::from(y) - right_base;
411        // SAFETY: `r` is within `0..PARAM_BC` range by definition, the right bucket is limited to
412        // `REDUCED_BUCKETS_SIZE`
413        unsafe {
414            rmap.add(r, right_position);
415        }
416    }
417
418    let parity = left_base % 2;
419    let left_targets_parity = &left_targets[parity as usize];
420    let mut next_match_index = 0;
421
422    // TODO: Simd read for left bucket? It might be more efficient in terms of memory access to
423    //  process chunks of the left bucket against one right value for each at a time
424    for &left_position in left_bucket {
425        // `next_match_index >= REDUCED_MATCHES_COUNT` is crucial to make sure
426        if left_position == Position::SENTINEL || next_match_index >= REDUCED_MATCHES_COUNT {
427            // Sentinel values are padded to the end of the bucket
428            break;
429        }
430
431        // SAFETY: Guaranteed by function contract
432        let y = unsafe { parent_table.y(left_position) };
433        let r = u32::from(y) - left_base;
434        // SAFETY: `r` is within a bucket and exists by definition
435        let left_targets_r = unsafe { left_targets_parity.get_unchecked(r as usize) }.as_array();
436
437        for r_target in left_targets_r {
438            // SAFETY: Targets are always limited to `PARAM_BC`
439            let [right_position_a, right_position_b] = unsafe { rmap.get(u32::from(*r_target)) };
440
441            // The right bucket position is never zero
442            if right_position_a != Position::ZERO {
443                // SAFETY: Iteration will stop before `REDUCED_MATCHES_COUNT + PARAM_M * 2`
444                // elements is inserted
445                unsafe { matches.get_unchecked_mut(next_match_index) }.write(Match {
446                    left_position,
447                    left_y: y,
448                    right_position: right_position_a,
449                });
450                next_match_index += 1;
451
452                if right_position_b != Position::ZERO {
453                    // SAFETY: Iteration will stop before
454                    // `REDUCED_MATCHES_COUNT + PARAM_M * 2` elements is inserted
455                    unsafe { matches.get_unchecked_mut(next_match_index) }.write(Match {
456                        left_position,
457                        left_y: y,
458                        right_position: right_position_b,
459                    });
460                    next_match_index += 1;
461                }
462            }
463        }
464    }
465
466    // SAFETY: Initialized this many matches
467    unsafe { matches[..next_match_index].assume_init_ref() }
468}
469
470/// Simplified version of [`find_matches_in_buckets`] for verification purposes.
471pub(super) fn has_match(left_y: Y, right_y: Y) -> bool {
472    let right_r = u32::from(right_y) % u32::from(PARAM_BC);
473    let parity = (u32::from(left_y) / u32::from(PARAM_BC)) % 2;
474    let left_r = u32::from(left_y) % u32::from(PARAM_BC);
475
476    let r_targets = array::from_fn::<_, { PARAM_M as usize }, _>(|i| {
477        calculate_left_target_on_demand(parity, left_r, i as u32)
478    });
479
480    r_targets.contains(&right_r)
481}
482
483#[inline(always)]
484pub(super) fn compute_fn<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
485    y: Y,
486    left_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
487    right_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
488) -> (Y, Metadata<K, TABLE_NUMBER>)
489where
490    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
491    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
492{
493    let left_metadata = u128::from(left_metadata);
494    let right_metadata = u128::from(right_metadata);
495
496    let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
497
498    // Part of the `right_bits` at the final offset of eventual `input_a`
499    let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
500    let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
501
502    // Take only bytes where bits were set
503    let num_bytes_with_data =
504        (y_size_bits(K) + parent_metadata_bits * 2).div_ceil(u8::BITS as usize);
505
506    // Only supports `K` from 15 to 25 (otherwise math will not be correct when concatenating y,
507    // left metadata and right metadata)
508    let hash = {
509        // Collect `K` most significant bits of `y` at the final offset of eventual `input_a`
510        let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
511
512        // Move bits of `left_metadata` at the final offset of eventual `input_a`
513        let left_metadata_bits =
514            left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
515
516        // If `right_metadata` bits start to the left of the desired position in `input_a` move
517        // bits right, else move left
518        if right_bits_start_offset < y_and_left_bits {
519            let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
520            // Collect bits of `right_metadata` that will fit into `input_a` at the final offset in
521            // eventual `input_a`
522            let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
523            let input_a = y_bits | left_metadata_bits | right_bits_a;
524            // Collect bits of `right_metadata` that will spill over into `input_b`
525            let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
526
527            let input = [input_a.to_be_bytes(), input_b.to_be_bytes()];
528            let input_len =
529                size_of::<u128>() + right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize);
530            ab_blake3::single_block_hash(&input.as_flattened()[..input_len])
531                .expect("Exactly a single block worth of bytes; qed")
532        } else {
533            let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
534            let input_a = y_bits | left_metadata_bits | right_bits_a;
535
536            ab_blake3::single_block_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
537                .expect("Less than a single block worth of bytes; qed")
538        }
539    };
540
541    let y_output = Y::from(
542        u32::from_be_bytes([hash[0], hash[1], hash[2], hash[3]])
543            >> (u32::BITS as usize - y_size_bits(K)),
544    );
545
546    let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
547
548    let metadata = if TABLE_NUMBER < 4 {
549        (left_metadata << parent_metadata_bits) | right_metadata
550    } else if metadata_size_bits > 0 {
551        // For K up to 25 it is guaranteed that metadata + bit offset will always fit into u128.
552        // We collect the bytes necessary, potentially with extra bits at the start and end of the
553        // bytes that will be taken care of later.
554        let metadata = u128::from_be_bytes(
555            hash[y_size_bits(K) / u8::BITS as usize..][..size_of::<u128>()]
556                .try_into()
557                .expect("Always enough bits for any K; qed"),
558        );
559        // Remove extra bits at the beginning
560        let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
561        // Move bits into the correct location
562        metadata >> (u128::BITS as usize - metadata_size_bits)
563    } else {
564        0
565    };
566
567    (y_output, Metadata::from(metadata))
568}
569
570// TODO: This is actually using only pipelining rather than real SIMD (at least explicitly) due to:
571//  * https://github.com/rust-lang/portable-simd/issues/108
572//  * https://github.com/BLAKE3-team/BLAKE3/issues/478#issuecomment-3200106103
573fn compute_fn_simd<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
574    left_ys: [Y; COMPUTE_FN_SIMD_FACTOR],
575    left_metadatas: [Metadata<K, PARENT_TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
576    right_metadatas: [Metadata<K, PARENT_TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
577) -> (
578    [Y; COMPUTE_FN_SIMD_FACTOR],
579    [Metadata<K, TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
580)
581where
582    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
583    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
584{
585    let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
586    let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
587
588    // TODO: `u128` is not supported as SIMD element yet, see
589    //  https://github.com/rust-lang/portable-simd/issues/108
590    let left_metadatas: [u128; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
591        [
592        #(
593            u128::from(left_metadatas[N]),
594        )*
595        ]
596    });
597    let right_metadatas: [u128; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
598        [
599        #(
600            u128::from(right_metadatas[N]),
601        )*
602        ]
603    });
604
605    // Part of the `right_bits` at the final offset of eventual `input_a`
606    let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
607    let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
608
609    // Take only bytes where bits were set
610    let num_bytes_with_data =
611        (y_size_bits(K) + parent_metadata_bits * 2).div_ceil(u8::BITS as usize);
612
613    // Only supports `K` from 15 to 25 (otherwise math will not be correct when concatenating y,
614    // left metadata and right metadata)
615    // TODO: SIMD hashing once this is possible:
616    //  https://github.com/BLAKE3-team/BLAKE3/issues/478#issuecomment-3200106103
617    let hashes: [_; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
618        [
619        #(
620        {
621            let y = left_ys[N];
622            let left_metadata = left_metadatas[N];
623            let right_metadata = right_metadatas[N];
624
625            // Collect `K` most significant bits of `y` at the final offset of eventual
626            // `input_a`
627            let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
628
629            // Move bits of `left_metadata` at the final offset of eventual `input_a`
630            let left_metadata_bits =
631                left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
632
633            // If `right_metadata` bits start to the left of the desired position in `input_a` move
634            // bits right, else move left
635            if right_bits_start_offset < y_and_left_bits {
636                let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
637                // Collect bits of `right_metadata` that will fit into `input_a` at the final offset
638                // in eventual `input_a`
639                let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
640                let input_a = y_bits | left_metadata_bits | right_bits_a;
641                // Collect bits of `right_metadata` that will spill over into `input_b`
642                let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
643
644                let input = [input_a.to_be_bytes(), input_b.to_be_bytes()];
645                let input_len =
646                    size_of::<u128>() + right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize);
647                ab_blake3::single_block_hash(&input.as_flattened()[..input_len])
648                    .expect("Exactly a single block worth of bytes; qed")
649            } else {
650                let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
651                let input_a = y_bits | left_metadata_bits | right_bits_a;
652
653                ab_blake3::single_block_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
654                    .expect("Exactly a single block worth of bytes; qed")
655            }
656        },
657        )*
658        ]
659    });
660
661    let y_outputs = Simd::from_array(
662        hashes.map(|hash| u32::from_be_bytes([hash[0], hash[1], hash[2], hash[3]])),
663    ) >> (u32::BITS - y_size_bits(K) as u32);
664    let y_outputs = Y::array_from_repr(y_outputs.to_array());
665
666    let metadatas = if TABLE_NUMBER < 4 {
667        seq!(N in 0..16 {
668            [
669            #(
670                Metadata::from((left_metadatas[N] << parent_metadata_bits) | right_metadatas[N]),
671            )*
672            ]
673        })
674    } else if metadata_size_bits > 0 {
675        // For K up to 25 it is guaranteed that metadata + bit offset will always fit into u128.
676        // We collect the bytes necessary, potentially with extra bits at the start and end of the
677        // bytes that will be taken care of later.
678        seq!(N in 0..16 {
679            [
680            #(
681            {
682                let metadata = u128::from_be_bytes(
683                    hashes[N][y_size_bits(K) / u8::BITS as usize..][..size_of::<u128>()]
684                        .try_into()
685                        .expect("Always enough bits for any K; qed"),
686                );
687                // Remove extra bits at the beginning
688                let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
689                // Move bits into the correct location
690                Metadata::from(metadata >> (u128::BITS as usize - metadata_size_bits))
691            },
692            )*
693            ]
694        })
695    } else {
696        [Metadata::default(); _]
697    };
698
699    (y_outputs, metadatas)
700}
701
702/// # Safety
703/// `m` must contain positions that correspond to the parent table
704unsafe fn match_to_result<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
705    parent_table: &Table<K, PARENT_TABLE_NUMBER>,
706    m: &Match,
707) -> (Y, [Position; 2], Metadata<K, TABLE_NUMBER>)
708where
709    Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
710    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
711    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
712    [(); 1 << K]:,
713    [(); num_buckets(K)]:,
714{
715    // SAFETY: Guaranteed by function contract
716    let left_metadata = unsafe { parent_table.metadata(m.left_position) };
717    // SAFETY: Guaranteed by function contract
718    let right_metadata = unsafe { parent_table.metadata(m.right_position) };
719
720    let (y, metadata) =
721        compute_fn::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(m.left_y, left_metadata, right_metadata);
722
723    (y, [m.left_position, m.right_position], metadata)
724}
725
726/// # Safety
727/// `matches` must contain positions that correspond to the parent table
728#[inline(always)]
729unsafe fn match_to_result_simd<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
730    parent_table: &Table<K, PARENT_TABLE_NUMBER>,
731    matches: &[Match; COMPUTE_FN_SIMD_FACTOR],
732) -> (
733    [Y; COMPUTE_FN_SIMD_FACTOR],
734    [[Position; 2]; COMPUTE_FN_SIMD_FACTOR],
735    [Metadata<K, TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
736)
737where
738    Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
739    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
740    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
741    [(); 1 << K]:,
742    [(); num_buckets(K)]:,
743{
744    let left_ys: [_; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
745        [
746        #(
747            matches[N].left_y,
748        )*
749        ]
750    });
751    // SAFETY: Guaranteed by function contract
752    let left_metadatas: [_; COMPUTE_FN_SIMD_FACTOR] = unsafe {
753        seq!(N in 0..16 {
754            [
755            #(
756                parent_table.metadata(matches[N].left_position),
757            )*
758            ]
759        })
760    };
761    // SAFETY: Guaranteed by function contract
762    let right_metadatas: [_; COMPUTE_FN_SIMD_FACTOR] = unsafe {
763        seq!(N in 0..16 {
764            [
765            #(
766                parent_table.metadata(matches[N].right_position),
767            )*
768            ]
769        })
770    };
771
772    let (y_outputs, metadatas) = compute_fn_simd::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(
773        left_ys,
774        left_metadatas,
775        right_metadatas,
776    );
777
778    let positions = seq!(N in 0..16 {
779        [
780        #(
781            [
782                matches[N].left_position,
783                matches[N].right_position,
784            ],
785        )*
786        ]
787    });
788
789    (y_outputs, positions, metadatas)
790}
791
792/// # Safety
793/// `matches` must contain positions that correspond to the parent table. `ys`, `position` and
794/// `metadatas` length must be at least the length of `matches`
795#[inline(always)]
796unsafe fn matches_to_results<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
797    parent_table: &Table<K, PARENT_TABLE_NUMBER>,
798    matches: &[Match],
799    ys: &mut [MaybeUninit<Y>],
800    positions: &mut [MaybeUninit<[Position; 2]>],
801    metadatas: &mut [MaybeUninit<Metadata<K, TABLE_NUMBER>>],
802) where
803    Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
804    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
805    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
806    [(); 1 << K]:,
807    [(); num_buckets(K)]:,
808{
809    let (grouped_matches, other_matches) = matches.as_chunks::<COMPUTE_FN_SIMD_FACTOR>();
810    let (grouped_ys, other_ys) = ys.split_at_mut(grouped_matches.as_flattened().len());
811    let grouped_ys = grouped_ys.as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>().0;
812    let (grouped_positions, other_positions) =
813        positions.split_at_mut(grouped_matches.as_flattened().len());
814    let grouped_positions = grouped_positions
815        .as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>()
816        .0;
817    let (grouped_metadatas, other_metadatas) =
818        metadatas.split_at_mut(grouped_matches.as_flattened().len());
819    let grouped_metadatas = grouped_metadatas
820        .as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>()
821        .0;
822
823    for (((grouped_matches, grouped_ys), grouped_positions), grouped_metadatas) in grouped_matches
824        .iter()
825        .zip(grouped_ys)
826        .zip(grouped_positions)
827        .zip(grouped_metadatas)
828    {
829        // SAFETY: Guaranteed by function contract
830        let (ys_group, positions_group, metadatas_group) =
831            unsafe { match_to_result_simd(parent_table, grouped_matches) };
832        grouped_ys.write_copy_of_slice(&ys_group);
833        grouped_positions.write_copy_of_slice(&positions_group);
834
835        // The last table doesn't have metadata
836        if metadata_size_bits(K, TABLE_NUMBER) > 0 {
837            grouped_metadatas.write_copy_of_slice(&metadatas_group);
838        }
839    }
840    for (((other_match, other_y), other_positions), other_metadata) in other_matches
841        .iter()
842        .zip(other_ys)
843        .zip(other_positions)
844        .zip(other_metadatas)
845    {
846        // SAFETY: Guaranteed by function contract
847        let (y, p, metadata) = unsafe { match_to_result(parent_table, other_match) };
848        other_y.write(y);
849        other_positions.write(p);
850        // The last table doesn't have metadata
851        if metadata_size_bits(K, TABLE_NUMBER) > 0 {
852            other_metadata.write(metadata);
853        }
854    }
855}
856
857#[derive(Debug)]
858pub(super) enum Table<const K: u8, const TABLE_NUMBER: u8>
859where
860    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
861    [(); 1 << K]:,
862    [(); num_buckets(K)]:,
863{
864    /// First table with the contents of entries split into separate vectors for more efficient
865    /// access
866    First {
867        /// Derived values computed from `x`
868        ys: Vec<Y>,
869        /// Each bucket contains positions of `Y` values that belong to it.
870        ///
871        /// Buckets are padded with sentinel values to `REDUCED_BUCKETS_SIZE`.
872        buckets: Box<[[Position; REDUCED_BUCKETS_SIZE]; num_buckets(K)]>,
873    },
874    /// Other tables
875    Other {
876        /// Derived values computed from the previous table
877        ys: Vec<Y>,
878        /// Left and right entry positions in a previous table encoded into bits
879        positions: Box<[MaybeUninit<[Position; 2]>; 1 << K]>,
880        /// Metadata corresponding to each entry
881        metadatas: Vec<Metadata<K, TABLE_NUMBER>>,
882        /// Each bucket contains positions of `Y` values that belong to it.
883        ///
884        /// Buckets are padded with sentinel values to `REDUCED_BUCKETS_SIZE`.
885        buckets: Box<[[Position; REDUCED_BUCKETS_SIZE]; num_buckets(K)]>,
886    },
887    /// Other tables
888    #[cfg(any(feature = "parallel", test))]
889    OtherBuckets {
890        /// Derived values computed from the previous table.
891        ///
892        /// Only positions from the `buckets` field are guaranteed to be initialized.
893        ys: Vec<[MaybeUninit<Y>; REDUCED_MATCHES_COUNT]>,
894        /// Left and right entry positions in a previous table encoded into bits.
895        ///
896        /// Only positions from the `buckets` field are guaranteed to be initialized.
897        positions: Box<[[MaybeUninit<[Position; 2]>; REDUCED_MATCHES_COUNT]; num_buckets(K)]>,
898        /// Metadata corresponding to each entry.
899        ///
900        /// Only positions from the `buckets` field are guaranteed to be initialized.
901        metadatas: Vec<[MaybeUninit<Metadata<K, TABLE_NUMBER>>; REDUCED_MATCHES_COUNT]>,
902        /// Each bucket contains positions of `Y` values that belong to it.
903        ///
904        /// Buckets are padded with sentinel values to `REDUCED_BUCKETS_SIZE`.
905        buckets: Box<[[Position; REDUCED_BUCKETS_SIZE]; num_buckets(K)]>,
906    },
907}
908
909impl<const K: u8> Table<K, 1>
910where
911    EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
912    [(); 1 << K]:,
913    [(); num_buckets(K)]:,
914{
915    /// Create the table
916    pub(super) fn create(seed: Seed) -> Self
917    where
918        EvaluatableUsize<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
919    {
920        // `MAX_BUCKET_SIZE` is not actively used, but is an upper-bound reference for the other
921        // parameters
922        debug_assert!(
923            MAX_BUCKET_SIZE >= bucket_size_upper_bound(K, BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS),
924            "Max bucket size is not sufficiently large"
925        );
926
927        let partial_ys = partial_ys::<K>(seed);
928
929        // SAFETY: Contents is `MaybeUninit`
930        let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
931
932        for ((ys, xs_batch_start), partial_ys) in ys
933            .as_chunks_mut::<COMPUTE_F1_SIMD_FACTOR>()
934            .0
935            .iter_mut()
936            .zip((X::ZERO..).step_by(COMPUTE_F1_SIMD_FACTOR))
937            .zip(
938                partial_ys
939                    .as_chunks::<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
940                    .0,
941            )
942        {
943            let xs = Simd::splat(u32::from(xs_batch_start))
944                + Simd::from_array(array::from_fn(|i| i as u32));
945            let ys_batch = compute_f1_simd::<K>(xs, partial_ys);
946
947            ys.write_copy_of_slice(&ys_batch);
948        }
949
950        // SAFETY: Converting a boxed array to a vector of the same size, which has the same memory
951        // layout, all elements were initialized
952        let ys = unsafe {
953            let ys_len = ys.len();
954            let ys = Box::into_raw(ys);
955            Vec::from_raw_parts(ys.cast(), ys_len, ys_len)
956        };
957
958        // TODO: Try to group buckets in the process of collecting `y`s
959        let buckets = group_by_buckets::<K>(&ys);
960
961        Self::First { ys, buckets }
962    }
963
964    /// Create the table, leverages available parallelism
965    #[cfg(any(feature = "parallel", test))]
966    pub(super) fn create_parallel(seed: Seed) -> Self
967    where
968        EvaluatableUsize<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
969    {
970        // `MAX_BUCKET_SIZE` is not actively used, but is an upper-bound reference for the other
971        // parameters
972        debug_assert!(
973            MAX_BUCKET_SIZE >= bucket_size_upper_bound(K, BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS),
974            "Max bucket size is not sufficiently large"
975        );
976
977        let partial_ys = partial_ys::<K>(seed);
978
979        // SAFETY: Contents is `MaybeUninit`
980        let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
981
982        // TODO: Try parallelism here?
983        for ((ys, xs_batch_start), partial_ys) in ys
984            .as_chunks_mut::<COMPUTE_F1_SIMD_FACTOR>()
985            .0
986            .iter_mut()
987            .zip((X::ZERO..).step_by(COMPUTE_F1_SIMD_FACTOR))
988            .zip(
989                partial_ys
990                    .as_chunks::<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
991                    .0,
992            )
993        {
994            let xs = Simd::splat(u32::from(xs_batch_start))
995                + Simd::from_array(array::from_fn(|i| i as u32));
996            let ys_batch = compute_f1_simd::<K>(xs, partial_ys);
997
998            ys.write_copy_of_slice(&ys_batch);
999        }
1000
1001        // SAFETY: Converting a boxed array to a vector of the same size, which has the same memory
1002        // layout, all elements were initialized
1003        let ys = unsafe {
1004            let ys_len = ys.len();
1005            let ys = Box::into_raw(ys);
1006            Vec::from_raw_parts(ys.cast(), ys_len, ys_len)
1007        };
1008
1009        // TODO: Try to group buckets in the process of collecting `y`s
1010        let buckets = group_by_buckets::<K>(&ys);
1011
1012        Self::First { ys, buckets }
1013    }
1014}
1015
1016mod private {
1017    pub(in super::super) trait SupportedOtherTables {}
1018    pub(in super::super) trait NotLastTable {}
1019}
1020
1021impl<const K: u8> private::SupportedOtherTables for Table<K, 2>
1022where
1023    EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized,
1024    [(); 1 << K]:,
1025    [(); num_buckets(K)]:,
1026{
1027}
1028impl<const K: u8> private::SupportedOtherTables for Table<K, 3>
1029where
1030    EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized,
1031    [(); 1 << K]:,
1032    [(); num_buckets(K)]:,
1033{
1034}
1035impl<const K: u8> private::SupportedOtherTables for Table<K, 4>
1036where
1037    EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized,
1038    [(); 1 << K]:,
1039    [(); num_buckets(K)]:,
1040{
1041}
1042impl<const K: u8> private::SupportedOtherTables for Table<K, 5>
1043where
1044    EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized,
1045    [(); 1 << K]:,
1046    [(); num_buckets(K)]:,
1047{
1048}
1049impl<const K: u8> private::SupportedOtherTables for Table<K, 6>
1050where
1051    EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1052    [(); 1 << K]:,
1053    [(); num_buckets(K)]:,
1054{
1055}
1056impl<const K: u8> private::SupportedOtherTables for Table<K, 7>
1057where
1058    EvaluatableUsize<{ metadata_size_bytes(K, 7) }>: Sized,
1059    [(); 1 << K]:,
1060    [(); num_buckets(K)]:,
1061{
1062}
1063
1064impl<const K: u8> private::NotLastTable for Table<K, 1>
1065where
1066    EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
1067    [(); 1 << K]:,
1068    [(); num_buckets(K)]:,
1069{
1070}
1071impl<const K: u8> private::NotLastTable for Table<K, 2>
1072where
1073    EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized,
1074    [(); 1 << K]:,
1075    [(); num_buckets(K)]:,
1076{
1077}
1078impl<const K: u8> private::NotLastTable for Table<K, 3>
1079where
1080    EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized,
1081    [(); 1 << K]:,
1082    [(); num_buckets(K)]:,
1083{
1084}
1085impl<const K: u8> private::NotLastTable for Table<K, 4>
1086where
1087    EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized,
1088    [(); 1 << K]:,
1089    [(); num_buckets(K)]:,
1090{
1091}
1092impl<const K: u8> private::NotLastTable for Table<K, 5>
1093where
1094    EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized,
1095    [(); 1 << K]:,
1096    [(); num_buckets(K)]:,
1097{
1098}
1099impl<const K: u8> private::NotLastTable for Table<K, 6>
1100where
1101    EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1102    [(); 1 << K]:,
1103    [(); num_buckets(K)]:,
1104{
1105}
1106
1107impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1108where
1109    Self: private::SupportedOtherTables,
1110    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1111    [(); 1 << K]:,
1112    [(); num_buckets(K)]:,
1113{
1114    /// Creates a new [`TABLE_NUMBER`] table. There also exists [`Self::create_parallel()`] that
1115    /// trades CPU efficiency and memory usage for lower latency and with multiple parallel calls,
1116    /// better overall performance.
1117    pub(super) fn create<const PARENT_TABLE_NUMBER: u8>(
1118        parent_table: &mut Table<K, PARENT_TABLE_NUMBER>,
1119        cache: &mut TablesCache<K>,
1120    ) -> Self
1121    where
1122        Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
1123        EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
1124    {
1125        let left_targets = &*cache.left_targets;
1126        let mut initialized_elements = 0_usize;
1127        // SAFETY: Contents is `MaybeUninit`
1128        let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1129        // SAFETY: Contents is `MaybeUninit`
1130        let mut positions =
1131            unsafe { Box::<[MaybeUninit<[Position; 2]>; 1 << K]>::new_uninit().assume_init() };
1132        // The last table doesn't have metadata
1133        // SAFETY: Contents is `MaybeUninit`
1134        let mut metadatas = unsafe {
1135            Box::<[MaybeUninit<Metadata<K, TABLE_NUMBER>>; 1 << K]>::new_uninit().assume_init()
1136        };
1137
1138        for ([left_bucket, right_bucket], left_bucket_index) in
1139            parent_table.buckets().array_windows().zip(0..)
1140        {
1141            let mut matches = [MaybeUninit::uninit(); _];
1142            // SAFETY: Positions are taken from `Table::buckets()` and correspond to initialized
1143            // values
1144            let matches = unsafe {
1145                find_matches_in_buckets(
1146                    left_bucket_index,
1147                    left_bucket,
1148                    right_bucket,
1149                    parent_table,
1150                    &mut matches,
1151                    left_targets,
1152                )
1153            };
1154            // Throw away some successful matches that are not that necessary
1155            let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1156            // SAFETY: Already initialized this many elements
1157            let (ys, positions, metadatas) = unsafe {
1158                (
1159                    ys.split_at_mut_unchecked(initialized_elements).1,
1160                    positions.split_at_mut_unchecked(initialized_elements).1,
1161                    metadatas.split_at_mut_unchecked(initialized_elements).1,
1162                )
1163            };
1164
1165            // SAFETY: Preallocated length is an upper bound and is always sufficient
1166            let (ys, positions, metadatas) = unsafe {
1167                (
1168                    ys.split_at_mut_unchecked(matches.len()).0,
1169                    positions.split_at_mut_unchecked(matches.len()).0,
1170                    metadatas.split_at_mut_unchecked(matches.len()).0,
1171                )
1172            };
1173
1174            // SAFETY: Matches come from the parent table and the size of `ys`, `positions`
1175            // and `metadatas` is the same as the number of matches
1176            unsafe {
1177                matches_to_results(parent_table, matches, ys, positions, metadatas);
1178            }
1179
1180            initialized_elements += matches.len();
1181        }
1182
1183        parent_table.clear_ys_and_metadata();
1184
1185        // SAFETY: Converting a boxed array to a vector of the same size, which has the same memory
1186        // layout, the number of elements matches the number of elements that were initialized
1187        let ys = unsafe {
1188            let ys_len = ys.len();
1189            let ys = Box::into_raw(ys);
1190            Vec::from_raw_parts(ys.cast(), initialized_elements, ys_len)
1191        };
1192
1193        // SAFETY: Converting a boxed array to a vector of the same size, which has the same memory
1194        // layout, the number of elements matches the number of elements that were initialized
1195        let metadatas = unsafe {
1196            let metadatas_len = metadatas.len();
1197            let metadatas = Box::into_raw(metadatas);
1198            Vec::from_raw_parts(metadatas.cast(), initialized_elements, metadatas_len)
1199        };
1200
1201        // TODO: Try to group buckets in the process of collecting `y`s
1202        let buckets = group_by_buckets::<K>(&ys);
1203
1204        Self::Other {
1205            ys,
1206            positions,
1207            metadatas,
1208            buckets,
1209        }
1210    }
1211
1212    /// Almost the same as [`Self::create()`], but uses parallelism internally for better
1213    /// performance (though not efficiency of CPU and memory usage), if you create multiple tables
1214    /// in parallel, prefer [`Self::create_parallel()`] for better overall performance.
1215    #[cfg(any(feature = "parallel", test))]
1216    pub(super) fn create_parallel<const PARENT_TABLE_NUMBER: u8>(
1217        parent_table: &mut Table<K, PARENT_TABLE_NUMBER>,
1218        cache: &mut TablesCache<K>,
1219    ) -> Self
1220    where
1221        Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
1222        EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
1223    {
1224        // SAFETY: Contents is `MaybeUninit`
1225        let ys = unsafe {
1226            Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K)]>::new_uninit().assume_init()
1227        };
1228        // SAFETY: Contents is `MaybeUninit`
1229        let positions = unsafe {
1230            Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K)]>::new_uninit().assume_init()
1231        };
1232        // SAFETY: Contents is `MaybeUninit`
1233        let metadatas = unsafe {
1234            Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K)]>::new_uninit().assume_init()
1235        };
1236        let global_results_counts =
1237            array::from_fn::<_, { num_buckets(K) }, _>(|_| SyncUnsafeCell::new(0u16));
1238
1239        let left_targets = &*cache.left_targets;
1240
1241        let buckets = parent_table.buckets();
1242        // Iterate over buckets in batches, such that a cache line worth of bytes is taken from
1243        // `global_results_counts` each time to avoid unnecessary false sharing
1244        let bucket_batch_size = CACHE_LINE_SIZE / size_of::<u16>();
1245        let bucket_batch_index = AtomicUsize::new(0);
1246
1247        rayon::broadcast(|_ctx| {
1248            loop {
1249                let bucket_batch_index = bucket_batch_index.fetch_add(1, Ordering::Relaxed);
1250
1251                let buckets_batch = buckets
1252                    .array_windows::<2>()
1253                    .enumerate()
1254                    .skip(bucket_batch_index * bucket_batch_size)
1255                    .take(bucket_batch_size);
1256
1257                if buckets_batch.is_empty() {
1258                    break;
1259                }
1260
1261                for (left_bucket_index, [left_bucket, right_bucket]) in buckets_batch {
1262                    let mut matches = [MaybeUninit::uninit(); _];
1263                    // SAFETY: Positions are taken from `Table::buckets()` and correspond to initialized
1264                    // values
1265                    let matches = unsafe {
1266                        find_matches_in_buckets(
1267                            left_bucket_index as u32,
1268                            left_bucket,
1269                            right_bucket,
1270                            parent_table,
1271                            &mut matches,
1272                            left_targets,
1273                        )
1274                    };
1275                    // Throw away some successful matches that are not that necessary
1276                    let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1277
1278                    // SAFETY: This is the only place where `left_bucket_index`'s entry is accessed at
1279                    // this time, and it is guaranteed to be in range
1280                    let ys = unsafe { &mut *ys.get_unchecked(left_bucket_index).get() };
1281                    // SAFETY: This is the only place where `left_bucket_index`'s entry is accessed at
1282                    // this time, and it is guaranteed to be in range
1283                    let positions =
1284                        unsafe { &mut *positions.get_unchecked(left_bucket_index).get() };
1285                    // SAFETY: This is the only place where `left_bucket_index`'s entry is accessed at
1286                    // this time, and it is guaranteed to be in range
1287                    let metadatas =
1288                        unsafe { &mut *metadatas.get_unchecked(left_bucket_index).get() };
1289                    // SAFETY: This is the only place where `left_bucket_index`'s entry is accessed at
1290                    // this time, and it is guaranteed to be in range
1291                    let count = unsafe {
1292                        &mut *global_results_counts.get_unchecked(left_bucket_index).get()
1293                    };
1294
1295                    // SAFETY: Matches come from the parent table and the size of `ys`, `positions`
1296                    // and `metadatas` is larger or equal to the number of matches
1297                    unsafe {
1298                        matches_to_results::<_, TABLE_NUMBER, _>(
1299                            parent_table,
1300                            matches,
1301                            ys,
1302                            positions,
1303                            metadatas,
1304                        )
1305                    };
1306                    *count = matches.len() as u16;
1307                }
1308            }
1309        });
1310
1311        parent_table.clear_ys_and_metadata();
1312
1313        let ys = strip_sync_unsafe_cell(ys);
1314        let positions = strip_sync_unsafe_cell(positions);
1315        let metadatas = strip_sync_unsafe_cell(metadatas);
1316        // SAFETY: Converting a boxed array to a vector of the same size, which has the same memory
1317        // layout
1318        let ys = unsafe {
1319            let ys_len = ys.len();
1320            let ys = Box::into_raw(ys);
1321            Vec::from_raw_parts(ys, ys_len, ys_len)
1322        };
1323        let ys = ys.into_flattened();
1324        // SAFETY: Converting a boxed array to a vector of the same size, which has the same memory
1325        // layout
1326        let metadatas = unsafe {
1327            let metadatas_len = metadatas.len();
1328            let metadatas = Box::into_raw(metadatas);
1329            Vec::from_raw_parts(metadatas, metadatas_len, metadatas_len)
1330        };
1331        let metadatas = metadatas.into_flattened();
1332
1333        // TODO: Try to group buckets in the process of collecting `y`s
1334        // SAFETY: `global_results_counts` corresponds to the number of initialized `ys`
1335        let buckets = unsafe {
1336            group_by_buckets_from_buckets::<K, _>(
1337                ys.iter().zip(
1338                    global_results_counts
1339                        .into_iter()
1340                        .map(|count| usize::from(count.into_inner())),
1341                ),
1342            )
1343        };
1344
1345        Self::OtherBuckets {
1346            ys,
1347            positions,
1348            metadatas,
1349            buckets,
1350        }
1351    }
1352
1353    /// Get `[left_position, right_position]` of a previous table for a specified position in a
1354    /// current table.
1355    ///
1356    /// # Safety
1357    /// `position` must come from [`Self::buckets()`] or [`Self::position()`] and not be a sentinel
1358    /// value.
1359    #[inline(always)]
1360    pub(super) unsafe fn position(&self, position: Position) -> [Position; 2] {
1361        match self {
1362            Table::First { .. } => {
1363                unreachable!("Not the first table");
1364            }
1365            Table::Other { positions, .. } => {
1366                // SAFETY: All non-sentinel positions returned by [`Self::buckets()`] are valid
1367                unsafe { positions.get_unchecked(usize::from(position)).assume_init() }
1368            }
1369            #[cfg(any(feature = "parallel", test))]
1370            Table::OtherBuckets { positions, .. } => {
1371                // SAFETY: All non-sentinel positions returned by [`Self::buckets()`] are valid
1372                unsafe {
1373                    positions
1374                        .as_flattened()
1375                        .get_unchecked(usize::from(position))
1376                        .assume_init()
1377                }
1378            }
1379        }
1380    }
1381}
1382
1383impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1384where
1385    Self: private::NotLastTable,
1386    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1387    [(); 1 << K]:,
1388    [(); num_buckets(K)]:,
1389{
1390    /// Returns `None` for an invalid position or for table number 7.
1391    ///
1392    /// # Safety
1393    /// `position` must come from [`Self::buckets()`] and not be a sentinel value.
1394    #[inline(always)]
1395    unsafe fn metadata(&self, position: Position) -> Metadata<K, TABLE_NUMBER> {
1396        match self {
1397            Table::First { .. } => {
1398                // X matches position
1399                Metadata::from(X::from(u32::from(position)))
1400            }
1401            Table::Other { metadatas, .. } => {
1402                // SAFETY: All non-sentinel positions returned by [`Self::buckets()`] are valid
1403                *unsafe { metadatas.get_unchecked(usize::from(position)) }
1404            }
1405            #[cfg(any(feature = "parallel", test))]
1406            Table::OtherBuckets { metadatas, .. } => {
1407                // SAFETY: All non-sentinel positions returned by [`Self::buckets()`] are valid
1408                unsafe {
1409                    metadatas
1410                        .as_flattened()
1411                        .get_unchecked(usize::from(position))
1412                        .assume_init()
1413                }
1414            }
1415        }
1416    }
1417}
1418
1419impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1420where
1421    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1422    [(); 1 << K]:,
1423    [(); num_buckets(K)]:,
1424{
1425    /// Get `y` at for a specified position.
1426    ///
1427    /// # Safety
1428    /// `position` must come from [`Self::buckets()`] and not be a sentinel value.
1429    #[inline(always)]
1430    pub(super) unsafe fn y(&self, position: Position) -> Y {
1431        match self {
1432            Table::First { ys, .. } => {
1433                // SAFETY: All non-sentinel positions returned by [`Self::buckets()`] are valid
1434                *unsafe { ys.get_unchecked(usize::from(position)) }
1435            }
1436            Table::Other { ys, .. } => {
1437                // SAFETY: All non-sentinel positions returned by [`Self::buckets()`] are valid
1438                *unsafe { ys.get_unchecked(usize::from(position)) }
1439            }
1440            #[cfg(any(feature = "parallel", test))]
1441            Table::OtherBuckets { ys, .. } => {
1442                // SAFETY: All non-sentinel positions returned by [`Self::buckets()`] are valid
1443                unsafe {
1444                    ys.as_flattened()
1445                        .get_unchecked(usize::from(position))
1446                        .assume_init()
1447                }
1448            }
1449        }
1450    }
1451
1452    fn clear_ys_and_metadata(&mut self) {
1453        match self {
1454            Table::First { ys, .. } => {
1455                mem::take(ys);
1456            }
1457            Table::Other { ys, metadatas, .. } => {
1458                mem::take(ys);
1459                mem::take(metadatas);
1460            }
1461            #[cfg(any(feature = "parallel", test))]
1462            Table::OtherBuckets { ys, metadatas, .. } => {
1463                mem::take(ys);
1464                mem::take(metadatas);
1465            }
1466        }
1467    }
1468
1469    /// Positions of `y`s grouped by the bucket they belong to
1470    #[inline(always)]
1471    pub(super) fn buckets(&self) -> &[[Position; REDUCED_BUCKETS_SIZE]; num_buckets(K)] {
1472        match self {
1473            Table::First { buckets, .. } => buckets,
1474            Table::Other { buckets, .. } => buckets,
1475            #[cfg(any(feature = "parallel", test))]
1476            Table::OtherBuckets { buckets, .. } => buckets,
1477        }
1478    }
1479}