ab_proof_of_space/chiapos/
table.rs

1#[cfg(test)]
2mod tests;
3pub(super) mod types;
4
5#[cfg(not(feature = "std"))]
6extern crate alloc;
7
8use crate::chiapos::Seed;
9use crate::chiapos::constants::{PARAM_B, PARAM_BC, PARAM_C, PARAM_EXT, PARAM_M};
10use crate::chiapos::table::types::{Metadata, Position, X, Y};
11use crate::chiapos::utils::EvaluatableUsize;
12use ab_chacha8::{ChaCha8Block, ChaCha8State};
13#[cfg(not(feature = "std"))]
14use alloc::vec;
15#[cfg(not(feature = "std"))]
16use alloc::vec::Vec;
17use chacha20::cipher::{Iv, KeyIvInit, StreamCipher};
18use chacha20::{ChaCha8, Key};
19use core::array;
20use core::simd::Simd;
21use core::simd::num::SimdUint;
22#[cfg(all(feature = "std", any(feature = "parallel", test)))]
23use parking_lot::Mutex;
24#[cfg(any(feature = "parallel", test))]
25use rayon::prelude::*;
26use seq_macro::seq;
27#[cfg(all(not(feature = "std"), any(feature = "parallel", test)))]
28use spin::Mutex;
29
30pub(super) const COMPUTE_F1_SIMD_FACTOR: usize = 8;
31pub(super) const FIND_MATCHES_AND_COMPUTE_UNROLL_FACTOR: usize = 8;
32
33/// Compute the size of `y` in bits
34pub(super) const fn y_size_bits(k: u8) -> usize {
35    k as usize + PARAM_EXT as usize
36}
37
38/// Metadata size in bytes
39pub const fn metadata_size_bytes(k: u8, table_number: u8) -> usize {
40    metadata_size_bits(k, table_number).div_ceil(u8::BITS as usize)
41}
42
43/// Metadata size in bits
44pub(super) const fn metadata_size_bits(k: u8, table_number: u8) -> usize {
45    k as usize
46        * match table_number {
47            1 => 1,
48            2 => 2,
49            3 | 4 => 4,
50            5 => 3,
51            6 => 2,
52            7 => 0,
53            _ => unreachable!(),
54        }
55}
56
57/// ChaCha8 [`Vec`] sufficient for the whole first table for [`K`].
58/// Prefer [`partial_y`] if you need partial y just for a single `x`.
59fn partial_ys<const K: u8>(seed: Seed) -> Vec<u8> {
60    let output_len_bits = usize::from(K) * (1 << K);
61    let mut output = vec![0; output_len_bits.div_ceil(u8::BITS as usize)];
62
63    let key = Key::from(seed);
64    let iv = Iv::<ChaCha8>::default();
65
66    let mut cipher = ChaCha8::new(&key, &iv);
67
68    cipher.write_keystream(&mut output);
69
70    output
71}
72
73#[derive(Debug, Clone)]
74struct LeftTargets {
75    left_targets: Vec<Position>,
76}
77
78fn calculate_left_targets() -> LeftTargets {
79    let mut left_targets = Vec::with_capacity(2 * usize::from(PARAM_BC) * usize::from(PARAM_M));
80
81    let param_b = u32::from(PARAM_B);
82    let param_c = u32::from(PARAM_C);
83
84    for parity in 0..=1u32 {
85        for r in 0..u32::from(PARAM_BC) {
86            let c = r / param_c;
87
88            for m in 0..u32::from(PARAM_M) {
89                let target = ((c + m) % param_b) * param_c
90                    + (((2 * m + parity) * (2 * m + parity) + r) % param_c);
91                left_targets.push(Position::from(target));
92            }
93        }
94    }
95
96    LeftTargets { left_targets }
97}
98
99fn calculate_left_target_on_demand(parity: u32, r: u32, m: u32) -> u32 {
100    let param_b = u32::from(PARAM_B);
101    let param_c = u32::from(PARAM_C);
102
103    let c = r / param_c;
104
105    ((c + m) % param_b) * param_c + (((2 * m + parity) * (2 * m + parity) + r) % param_c)
106}
107
108/// Caches that can be used to optimize creation of multiple [`Tables`](super::Tables).
109#[derive(Debug, Clone)]
110pub struct TablesCache<const K: u8> {
111    buckets: Vec<Bucket>,
112    rmap_scratch: Vec<RmapItem>,
113    left_targets: LeftTargets,
114}
115
116impl<const K: u8> Default for TablesCache<K> {
117    /// Create new instance
118    fn default() -> Self {
119        Self {
120            buckets: Vec::new(),
121            rmap_scratch: Vec::new(),
122            left_targets: calculate_left_targets(),
123        }
124    }
125}
126
127#[derive(Debug)]
128struct Match {
129    left_position: Position,
130    left_y: Y,
131    right_position: Position,
132}
133
134#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
135struct Bucket {
136    /// Bucket index
137    bucket_index: u32,
138    /// Start position of this bucket in the table
139    start_position: Position,
140    /// Size of this bucket
141    size: Position,
142}
143
144#[derive(Debug, Default, Copy, Clone)]
145pub(super) struct RmapItem {
146    count: Position,
147    start_position: Position,
148}
149
150/// `partial_y_offset` is in bits within `partial_y`
151pub(super) fn compute_f1<const K: u8>(x: X, seed: &Seed) -> Y {
152    let skip_bits = u32::from(K) * u32::from(x);
153    let skip_u32s = skip_bits / u32::BITS;
154    let partial_y_offset = skip_bits % u32::BITS;
155
156    const U32S_PER_BLOCK: usize = size_of::<ChaCha8Block>() / size_of::<u32>();
157
158    let initial_state = ChaCha8State::init(seed, &[0; _]);
159    let first_block_counter = skip_u32s / U32S_PER_BLOCK as u32;
160    let u32_in_first_block = skip_u32s as usize % U32S_PER_BLOCK;
161
162    let first_block = initial_state.compute_block(first_block_counter);
163    let hi = first_block[u32_in_first_block].to_be();
164
165    // TODO: Is SIMD version of `compute_block()` that produces two blocks at once possible?
166    let lo = if u32_in_first_block + 1 == U32S_PER_BLOCK {
167        // Spilled over into the second block
168        let second_block = initial_state.compute_block(first_block_counter + 1);
169        second_block[0].to_be()
170    } else {
171        first_block[u32_in_first_block + 1].to_be()
172    };
173
174    let partial_y = (u64::from(hi) << u32::BITS) | u64::from(lo);
175
176    let pre_y = partial_y >> (u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset);
177    let pre_y = pre_y as u32;
178    // Mask for clearing the rest of bits of `pre_y`.
179    let pre_y_mask = (u32::MAX << PARAM_EXT) & (u32::MAX >> (u32::BITS - u32::from(K + PARAM_EXT)));
180
181    // Extract `PARAM_EXT` most significant bits from `x` and store in the final offset of
182    // eventual `y` with the rest of bits being zero (`x` is `0..2^K`)
183    let pre_ext = u32::from(x) >> (K - PARAM_EXT);
184
185    // Combine all of the bits together:
186    // [padding zero bits][`K` bits rom `partial_y`][`PARAM_EXT` bits from `x`]
187    Y::from((pre_y & pre_y_mask) | pre_ext)
188}
189
190pub(super) fn compute_f1_simd<const K: u8>(
191    xs: [u32; COMPUTE_F1_SIMD_FACTOR],
192    partial_ys: &[u8; K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize],
193) -> [Y; COMPUTE_F1_SIMD_FACTOR] {
194    // Each element contains `K` desired bits of `partial_ys` in the final offset of eventual `ys`
195    // with the rest of bits being in undefined state
196    let pre_ys_bytes = array::from_fn(|i| {
197        let partial_y_offset = i * usize::from(K);
198        let partial_y_length =
199            (partial_y_offset % u8::BITS as usize + usize::from(K)).div_ceil(u8::BITS as usize);
200        let mut pre_y_bytes = 0u64.to_be_bytes();
201        pre_y_bytes[..partial_y_length].copy_from_slice(
202            &partial_ys[partial_y_offset / u8::BITS as usize..][..partial_y_length],
203        );
204
205        u64::from_be_bytes(pre_y_bytes)
206    });
207    let pre_ys_right_offset = array::from_fn(|i| {
208        let partial_y_offset = i as u32 * u32::from(K);
209        u64::from(u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset % u8::BITS)
210    });
211    let pre_ys = Simd::from_array(pre_ys_bytes) >> Simd::from_array(pre_ys_right_offset);
212
213    // Mask for clearing the rest of bits of `pre_ys`.
214    let pre_ys_mask = Simd::splat(
215        (u32::MAX << usize::from(PARAM_EXT))
216            & (u32::MAX >> (u32::BITS as usize - usize::from(K + PARAM_EXT))),
217    );
218
219    // Extract `PARAM_EXT` most significant bits from `xs` and store in the final offset of
220    // eventual `ys` with the rest of bits being in undefined state.
221    let pre_exts = Simd::from_array(xs) >> Simd::splat(u32::from(K - PARAM_EXT));
222
223    // Combine all of the bits together:
224    // [padding zero bits][`K` bits rom `partial_y`][`PARAM_EXT` bits from `x`]
225    let ys = (pre_ys.cast() & pre_ys_mask) | pre_exts;
226
227    Y::array_from_repr(ys.to_array())
228}
229
230/// `rmap_scratch` is just an optimization to reuse allocations between calls.
231///
232/// For verification purposes use [`has_match`] instead.
233///
234/// Returns `None` if either of buckets is empty.
235#[allow(clippy::too_many_arguments)]
236fn find_matches<T, Map>(
237    left_bucket_ys: &[Y],
238    left_bucket_start_position: Position,
239    right_bucket_ys: &[Y],
240    right_bucket_start_position: Position,
241    rmap_scratch: &mut Vec<RmapItem>,
242    left_targets: &LeftTargets,
243    map: Map,
244    output: &mut Vec<T>,
245) where
246    Map: Fn(Match) -> T,
247{
248    // Clear and set to correct size with zero values
249    rmap_scratch.clear();
250    rmap_scratch.resize_with(usize::from(PARAM_BC), RmapItem::default);
251    let rmap = rmap_scratch;
252
253    // Both left and right buckets can be empty
254    let Some(&first_left_bucket_y) = left_bucket_ys.first() else {
255        return;
256    };
257    let Some(&first_right_bucket_y) = right_bucket_ys.first() else {
258        return;
259    };
260    // Since all entries in a bucket are obtained after division by `PARAM_BC`, we can compute
261    // quotient more efficiently by subtracting base value rather than computing remainder of
262    // division
263    let base = (usize::from(first_right_bucket_y) / usize::from(PARAM_BC)) * usize::from(PARAM_BC);
264    for (&y, right_position) in right_bucket_ys.iter().zip(right_bucket_start_position..) {
265        let r = usize::from(y) - base;
266
267        // Same `y` and as the result `r` can appear in the table multiple times, in which case
268        // they'll all occupy consecutive slots in `right_bucket` and all we need to store is just
269        // the first position and number of elements.
270        if rmap[r].count == Position::ZERO {
271            rmap[r].start_position = right_position;
272        }
273        rmap[r].count += Position::ONE;
274    }
275    let rmap = rmap.as_slice();
276
277    // Same idea as above, but avoids division by leveraging the fact that each bucket is exactly
278    // `PARAM_BC` away from the previous one in terms of divisor by `PARAM_BC`
279    let base = base - usize::from(PARAM_BC);
280    let parity = (usize::from(first_left_bucket_y) / usize::from(PARAM_BC)) % 2;
281    let left_targets_parity = {
282        let (a, b) = left_targets
283            .left_targets
284            .split_at(left_targets.left_targets.len() / 2);
285        if parity == 0 { a } else { b }
286    };
287
288    for (&y, left_position) in left_bucket_ys.iter().zip(left_bucket_start_position..) {
289        let r = usize::from(y) - base;
290        let left_targets_r = left_targets_parity
291            .chunks_exact(left_targets_parity.len() / usize::from(PARAM_BC))
292            .nth(r)
293            .expect("r is valid; qed");
294
295        const _: () = {
296            assert!((PARAM_M as usize).is_multiple_of(FIND_MATCHES_AND_COMPUTE_UNROLL_FACTOR));
297        };
298
299        for r_targets in left_targets_r
300            .array_chunks::<{ FIND_MATCHES_AND_COMPUTE_UNROLL_FACTOR }>()
301            .take(usize::from(PARAM_M) / FIND_MATCHES_AND_COMPUTE_UNROLL_FACTOR)
302        {
303            let _: [(); FIND_MATCHES_AND_COMPUTE_UNROLL_FACTOR] = seq!(N in 0..8 {
304                [
305                #(
306                {
307                    let rmap_item = rmap[usize::from(r_targets[N])];
308
309                    for right_position in
310                        rmap_item.start_position..rmap_item.start_position + rmap_item.count
311                    {
312                        let m = Match {
313                            left_position,
314                            left_y: y,
315                            right_position,
316                        };
317                        output.push(map(m));
318                    }
319                },
320                )*
321                ]
322            });
323        }
324    }
325}
326
327/// Simplified version of [`find_matches`] for verification purposes.
328pub(super) fn has_match(left_y: Y, right_y: Y) -> bool {
329    let right_r = u32::from(right_y) % u32::from(PARAM_BC);
330    let parity = (u32::from(left_y) / u32::from(PARAM_BC)) % 2;
331    let left_r = u32::from(left_y) % u32::from(PARAM_BC);
332
333    let r_targets = array::from_fn::<_, { PARAM_M as usize }, _>(|i| {
334        calculate_left_target_on_demand(parity, left_r, i as u32)
335    });
336
337    r_targets.contains(&right_r)
338}
339
340pub(super) fn compute_fn<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
341    y: Y,
342    left_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
343    right_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
344) -> (Y, Metadata<K, TABLE_NUMBER>)
345where
346    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
347    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
348{
349    let left_metadata = u128::from(left_metadata);
350    let right_metadata = u128::from(right_metadata);
351
352    let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
353
354    // Only supports `K` from 15 to 25 (otherwise math will not be correct when concatenating y,
355    // left metadata and right metadata)
356    let hash = {
357        // Take only bytes where bits were set
358        let num_bytes_with_data =
359            (y_size_bits(K) + parent_metadata_bits * 2).div_ceil(u8::BITS as usize);
360
361        // Collect `K` most significant bits of `y` at the final offset of eventual `input_a`
362        let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
363
364        // Move bits of `left_metadata` at the final offset of eventual `input_a`
365        let left_metadata_bits =
366            left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
367
368        // Part of the `right_bits` at the final offset of eventual `input_a`
369        let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
370        let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
371
372        // If `right_metadata` bits start to the left of the desired position in `input_a` move
373        // bits right, else move left
374        if right_bits_start_offset < y_and_left_bits {
375            let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
376            // Collect bits of `right_metadata` that will fit into `input_a` at the final offset in
377            // eventual `input_a`
378            let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
379            let input_a = y_bits | left_metadata_bits | right_bits_a;
380            // Collect bits of `right_metadata` that will spill over into `input_b`
381            let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
382
383            let input = [input_a.to_be_bytes(), input_b.to_be_bytes()];
384            let input_len =
385                size_of::<u128>() + right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize);
386            ab_blake3::single_block_hash(&input.as_flattened()[..input_len])
387                .expect("Exactly a single block worth of bytes; qed")
388        } else {
389            let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
390            let input_a = y_bits | left_metadata_bits | right_bits_a;
391
392            ab_blake3::single_block_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
393                .expect("Less than a single block worth of bytes; qed")
394        }
395    };
396
397    let y_output = Y::from(
398        u32::from_be_bytes([hash[0], hash[1], hash[2], hash[3]])
399            >> (u32::BITS as usize - y_size_bits(K)),
400    );
401
402    let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
403
404    let metadata = if TABLE_NUMBER < 4 {
405        (left_metadata << parent_metadata_bits) | right_metadata
406    } else if metadata_size_bits > 0 {
407        // For K up to 25 it is guaranteed that metadata + bit offset will always fit into u128.
408        // We collect the bytes necessary, potentially with extra bits at the start and end of the
409        // bytes that will be taken care of later.
410        let metadata = u128::from_be_bytes(
411            hash[y_size_bits(K) / u8::BITS as usize..][..size_of::<u128>()]
412                .try_into()
413                .expect("Always enough bits for any K; qed"),
414        );
415        // Remove extra bits at the beginning
416        let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
417        // Move bits into the correct location
418        metadata >> (u128::BITS as usize - metadata_size_bits)
419    } else {
420        0
421    };
422
423    (y_output, Metadata::from(metadata))
424}
425
426fn match_to_result<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
427    last_table: &Table<K, PARENT_TABLE_NUMBER>,
428    m: Match,
429) -> (Y, [Position; 2], Metadata<K, TABLE_NUMBER>)
430where
431    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
432    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
433{
434    let left_metadata = last_table
435        .metadata(m.left_position)
436        .expect("Position resulted from matching is correct; qed");
437    let right_metadata = last_table
438        .metadata(m.right_position)
439        .expect("Position resulted from matching is correct; qed");
440
441    let (y, metadata) =
442        compute_fn::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(m.left_y, left_metadata, right_metadata);
443
444    (y, [m.left_position, m.right_position], metadata)
445}
446
447fn match_and_compute_fn<'a, const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
448    last_table: &'a Table<K, PARENT_TABLE_NUMBER>,
449    left_bucket: Bucket,
450    right_bucket: Bucket,
451    rmap_scratch: &'a mut Vec<RmapItem>,
452    left_targets: &'a LeftTargets,
453    results_table: &mut Vec<(Y, [Position; 2], Metadata<K, TABLE_NUMBER>)>,
454) where
455    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
456    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
457{
458    find_matches(
459        &last_table.ys()[usize::from(left_bucket.start_position)..]
460            [..usize::from(left_bucket.size)],
461        left_bucket.start_position,
462        &last_table.ys()[usize::from(right_bucket.start_position)..]
463            [..usize::from(right_bucket.size)],
464        right_bucket.start_position,
465        rmap_scratch,
466        left_targets,
467        |m| match_to_result(last_table, m),
468        results_table,
469    )
470}
471
472#[derive(Debug)]
473pub(super) enum Table<const K: u8, const TABLE_NUMBER: u8>
474where
475    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
476{
477    /// First table with contents of entries split into separate vectors for more efficient access
478    First {
479        /// Derived values computed from `x`
480        ys: Vec<Y>,
481        /// X values
482        xs: Vec<X>,
483    },
484    /// Other tables
485    Other {
486        /// Derived values computed from previous table
487        ys: Vec<Y>,
488        /// Left and right entry positions in a previous table encoded into bits
489        positions: Vec<[Position; 2]>,
490        /// Metadata corresponding to each entry
491        metadatas: Vec<Metadata<K, TABLE_NUMBER>>,
492    },
493}
494
495impl<const K: u8> Table<K, 1>
496where
497    EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
498{
499    /// Create the table
500    pub(super) fn create(seed: Seed) -> Self
501    where
502        EvaluatableUsize<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
503    {
504        let partial_ys = partial_ys::<K>(seed);
505
506        let mut t_1 = Vec::with_capacity(1_usize << K);
507        for (x_batch, partial_ys) in partial_ys
508            .array_chunks::<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
509            .copied()
510            .enumerate()
511        {
512            let xs = array::from_fn::<_, COMPUTE_F1_SIMD_FACTOR, _>(|i| {
513                (x_batch * COMPUTE_F1_SIMD_FACTOR + i) as u32
514            });
515            let ys = compute_f1_simd::<K>(xs, &partial_ys);
516            t_1.extend(ys.into_iter().zip(X::array_from_repr(xs)));
517        }
518
519        t_1.sort_unstable();
520
521        let (ys, xs) = t_1.into_iter().unzip();
522
523        Self::First { ys, xs }
524    }
525
526    /// Create the table, leverages available parallelism
527    #[cfg(any(feature = "parallel", test))]
528    pub(super) fn create_parallel(seed: Seed) -> Self
529    where
530        EvaluatableUsize<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
531    {
532        let partial_ys = partial_ys::<K>(seed);
533
534        let mut t_1 = Vec::with_capacity(1_usize << K);
535        for (x_batch, partial_ys) in partial_ys
536            .array_chunks::<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
537            .copied()
538            .enumerate()
539        {
540            let xs = array::from_fn::<_, COMPUTE_F1_SIMD_FACTOR, _>(|i| {
541                (x_batch * COMPUTE_F1_SIMD_FACTOR + i) as u32
542            });
543            let ys = compute_f1_simd::<K>(xs, &partial_ys);
544            t_1.extend(ys.into_iter().zip(X::array_from_repr(xs)));
545        }
546
547        t_1.par_sort_unstable();
548
549        let (ys, xs) = t_1.into_iter().unzip();
550
551        Self::First { ys, xs }
552    }
553
554    /// All `x`s as [`BitSlice`], for individual `x`s needs to be slices into [`K`] bits slices
555    pub(super) fn xs(&self) -> &[X] {
556        match self {
557            Table::First { xs, .. } => xs,
558            _ => {
559                unreachable!()
560            }
561        }
562    }
563}
564
565mod private {
566    pub(in super::super) trait SupportedOtherTables {}
567}
568
569impl<const K: u8> private::SupportedOtherTables for Table<K, 2> where
570    EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized
571{
572}
573
574impl<const K: u8> private::SupportedOtherTables for Table<K, 3> where
575    EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized
576{
577}
578
579impl<const K: u8> private::SupportedOtherTables for Table<K, 4> where
580    EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized
581{
582}
583
584impl<const K: u8> private::SupportedOtherTables for Table<K, 5> where
585    EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized
586{
587}
588
589impl<const K: u8> private::SupportedOtherTables for Table<K, 6> where
590    EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized
591{
592}
593
594impl<const K: u8> private::SupportedOtherTables for Table<K, 7> where
595    EvaluatableUsize<{ metadata_size_bytes(K, 7) }>: Sized
596{
597}
598
599impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
600where
601    Self: private::SupportedOtherTables,
602    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
603{
604    /// Creates new [`TABLE_NUMBER`] table. There also exists [`Self::create_parallel()`] that
605    /// trades CPU efficiency and memory usage for lower latency and with multiple parallel calls,
606    /// better overall performance.
607    pub(super) fn create<const PARENT_TABLE_NUMBER: u8>(
608        last_table: &Table<K, PARENT_TABLE_NUMBER>,
609        cache: &mut TablesCache<K>,
610    ) -> Self
611    where
612        EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
613    {
614        let buckets = &mut cache.buckets;
615        let rmap_scratch = &mut cache.rmap_scratch;
616        let left_targets = &cache.left_targets;
617
618        let mut bucket = Bucket {
619            bucket_index: 0,
620            start_position: Position::ZERO,
621            size: Position::ZERO,
622        };
623
624        let last_y = *last_table
625            .ys()
626            .last()
627            .expect("List of y values is never empty; qed");
628        buckets.clear();
629        buckets.reserve(1 + usize::from(last_y) / usize::from(PARAM_BC));
630        last_table
631            .ys()
632            .iter()
633            .zip(Position::ZERO..)
634            .for_each(|(&y, position)| {
635                let bucket_index = u32::from(y) / u32::from(PARAM_BC);
636
637                if bucket_index == bucket.bucket_index {
638                    bucket.size += Position::ONE;
639                    return;
640                }
641
642                buckets.push(bucket);
643
644                bucket = Bucket {
645                    bucket_index,
646                    start_position: position,
647                    size: Position::ONE,
648                };
649            });
650        // Iteration stopped, but we did not store the last bucket yet
651        buckets.push(bucket);
652
653        let num_values = 1 << K;
654        let mut t_n = Vec::with_capacity(num_values);
655        buckets
656            .array_windows::<2>()
657            .for_each(|&[left_bucket, right_bucket]| {
658                match_and_compute_fn::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(
659                    last_table,
660                    left_bucket,
661                    right_bucket,
662                    rmap_scratch,
663                    left_targets,
664                    &mut t_n,
665                );
666            });
667
668        t_n.sort_unstable();
669
670        let mut ys = Vec::with_capacity(t_n.len());
671        let mut positions = Vec::with_capacity(t_n.len());
672        let mut metadatas = Vec::with_capacity(t_n.len());
673
674        for (y, [left_position, right_position], metadata) in t_n {
675            ys.push(y);
676            positions.push([left_position, right_position]);
677            // Last table doesn't have metadata
678            if metadata_size_bits(K, TABLE_NUMBER) > 0 {
679                metadatas.push(metadata);
680            }
681        }
682
683        Self::Other {
684            ys,
685            positions,
686            metadatas,
687        }
688    }
689
690    /// Almost the same as [`Self::create()`], but uses parallelism internally for better
691    /// performance (though not efficiency of CPU and memory usage), if you create multiple tables
692    /// in parallel, prefer [`Self::create_parallel()`] for better overall performance.
693    #[cfg(any(feature = "parallel", test))]
694    pub(super) fn create_parallel<const PARENT_TABLE_NUMBER: u8>(
695        last_table: &Table<K, PARENT_TABLE_NUMBER>,
696        cache: &mut TablesCache<K>,
697    ) -> Self
698    where
699        EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
700    {
701        let left_targets = &cache.left_targets;
702
703        let mut first_bucket = Bucket {
704            bucket_index: u32::from(last_table.ys()[0]) / u32::from(PARAM_BC),
705            start_position: Position::ZERO,
706            size: Position::ZERO,
707        };
708        for &y in last_table.ys() {
709            let bucket_index = u32::from(y) / u32::from(PARAM_BC);
710
711            if bucket_index == first_bucket.bucket_index {
712                first_bucket.size += Position::ONE;
713            } else {
714                break;
715            }
716        }
717
718        let previous_bucket = Mutex::new(first_bucket);
719
720        let t_n = rayon::broadcast(|_ctx| {
721            let mut entries = Vec::new();
722            let mut rmap_scratch = Vec::new();
723
724            loop {
725                let left_bucket;
726                let right_bucket;
727                {
728                    let mut previous_bucket = previous_bucket.lock();
729
730                    let right_bucket_start_position =
731                        previous_bucket.start_position + previous_bucket.size;
732                    let right_bucket_index = match last_table
733                        .ys()
734                        .get(usize::from(right_bucket_start_position))
735                    {
736                        Some(&y) => u32::from(y) / u32::from(PARAM_BC),
737                        None => {
738                            break;
739                        }
740                    };
741                    let mut right_bucket_size = Position::ZERO;
742
743                    for &y in &last_table.ys()[usize::from(right_bucket_start_position)..] {
744                        let bucket_index = u32::from(y) / u32::from(PARAM_BC);
745
746                        if bucket_index == right_bucket_index {
747                            right_bucket_size += Position::ONE;
748                        } else {
749                            break;
750                        }
751                    }
752
753                    right_bucket = Bucket {
754                        bucket_index: right_bucket_index,
755                        start_position: right_bucket_start_position,
756                        size: right_bucket_size,
757                    };
758
759                    left_bucket = *previous_bucket;
760                    *previous_bucket = right_bucket;
761                }
762
763                match_and_compute_fn::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(
764                    last_table,
765                    left_bucket,
766                    right_bucket,
767                    &mut rmap_scratch,
768                    left_targets,
769                    &mut entries,
770                );
771            }
772
773            entries
774        });
775
776        let mut t_n = t_n.into_iter().flatten().collect::<Vec<_>>();
777        t_n.par_sort_unstable();
778
779        let mut ys = Vec::with_capacity(t_n.len());
780        let mut positions = Vec::with_capacity(t_n.len());
781        let mut metadatas = Vec::with_capacity(t_n.len());
782
783        for (y, [left_position, right_position], metadata) in t_n.drain(..) {
784            ys.push(y);
785            positions.push([left_position, right_position]);
786            // Last table doesn't have metadata
787            if metadata_size_bits(K, TABLE_NUMBER) > 0 {
788                metadatas.push(metadata);
789            }
790        }
791
792        // Drop from a background thread, which typically helps with overall concurrency
793        rayon::spawn(move || {
794            drop(t_n);
795        });
796
797        Self::Other {
798            ys,
799            positions,
800            metadatas,
801        }
802    }
803}
804
805impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
806where
807    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
808{
809    /// All `y`s as [`BitSlice`], for individual `x`s needs to be slices into [`K`] bits slices
810    pub(super) fn ys(&self) -> &[Y] {
811        let (Table::First { ys, .. } | Table::Other { ys, .. }) = self;
812        ys
813    }
814
815    /// Returns `None` on invalid position or first table, `Some(left_position, right_position)` in
816    /// previous table on success
817    pub(super) fn position(&self, position: Position) -> Option<[Position; 2]> {
818        match self {
819            Table::First { .. } => None,
820            Table::Other { positions, .. } => positions.get(usize::from(position)).copied(),
821        }
822    }
823
824    /// Returns `None` on invalid position or for table number 7
825    pub(super) fn metadata(&self, position: Position) -> Option<Metadata<K, TABLE_NUMBER>> {
826        match self {
827            Table::First { xs, .. } => xs.get(usize::from(position)).map(|&x| Metadata::from(x)),
828            Table::Other { metadatas, .. } => metadatas.get(usize::from(position)).copied(),
829        }
830    }
831}