1#[cfg(any(feature = "alloc", test))]
2mod rmap;
3#[cfg(test)]
4mod tests;
5pub(super) mod types;
6
7use crate::chiapos::Seed;
8use crate::chiapos::constants::{PARAM_B, PARAM_BC, PARAM_C, PARAM_EXT, PARAM_M};
9#[cfg(feature = "alloc")]
10use crate::chiapos::table::rmap::Rmap;
11use crate::chiapos::table::types::{Metadata, X, Y};
12#[cfg(feature = "alloc")]
13use crate::chiapos::table::types::{Position, R};
14use crate::chiapos::utils::EvaluatableUsize;
15use ab_chacha8::{ChaCha8Block, ChaCha8State};
16#[cfg(feature = "alloc")]
17use ab_core_primitives::pieces::Record;
18#[cfg(feature = "alloc")]
19use alloc::boxed::Box;
20#[cfg(feature = "alloc")]
21use alloc::vec;
22#[cfg(feature = "alloc")]
23use alloc::vec::Vec;
24#[cfg(feature = "alloc")]
25use chacha20::cipher::{Iv, KeyIvInit, StreamCipher};
26#[cfg(feature = "alloc")]
27use chacha20::{ChaCha8, Key};
28use core::array;
29#[cfg(feature = "parallel")]
30use core::cell::SyncUnsafeCell;
31#[cfg(feature = "alloc")]
32use core::mem;
33#[cfg(feature = "alloc")]
34use core::mem::MaybeUninit;
35#[cfg(any(feature = "alloc", test))]
36use core::simd::prelude::*;
37#[cfg(feature = "parallel")]
38use core::sync::atomic::{AtomicUsize, Ordering};
39#[cfg(feature = "alloc")]
40use derive_more::Deref;
41#[cfg(feature = "alloc")]
42use rclite::Arc;
43#[cfg(any(feature = "alloc", test))]
44use seq_macro::seq;
45
46pub(super) const COMPUTE_F1_SIMD_FACTOR: usize = 8;
47#[cfg(any(feature = "alloc", test))]
48const COMPUTE_FN_SIMD_FACTOR: usize = 16;
49const MAX_BUCKET_SIZE: usize = 512;
50#[cfg(any(feature = "alloc", test))]
51const BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS: u8 = 128;
52const REDUCED_BUCKET_SIZE: usize = 272;
57const REDUCED_MATCHES_COUNT: usize = 288;
62#[cfg(feature = "parallel")]
63const CACHE_LINE_SIZE: usize = 64;
64
65const {
66 debug_assert!(REDUCED_BUCKET_SIZE <= MAX_BUCKET_SIZE);
67 debug_assert!(REDUCED_MATCHES_COUNT <= MAX_BUCKET_SIZE);
68}
69
70pub(super) const fn y_size_bits(k: u8) -> usize {
72 k as usize + PARAM_EXT as usize
73}
74
75pub const fn metadata_size_bytes(k: u8, table_number: u8) -> usize {
77 metadata_size_bits(k, table_number).div_ceil(u8::BITS as usize)
78}
79
80pub(super) const fn metadata_size_bits(k: u8, table_number: u8) -> usize {
82 k as usize
83 * match table_number {
84 1 => 1,
85 2 => 2,
86 3 | 4 => 4,
87 5 => 3,
88 6 => 2,
89 7 => 0,
90 _ => unreachable!(),
91 }
92}
93
94pub const fn num_buckets(k: u8) -> usize {
96 2_usize
97 .pow(y_size_bits(k) as u32)
98 .div_ceil(usize::from(PARAM_BC))
99}
100
101#[cfg(feature = "parallel")]
102#[inline(always)]
103fn strip_sync_unsafe_cell<const N: usize, T>(value: Box<[SyncUnsafeCell<T>; N]>) -> Box<[T; N]> {
104 unsafe { Box::from_raw(Box::into_raw(value).cast()) }
106}
107
108#[cfg(feature = "alloc")]
111fn partial_ys<const K: u8>(seed: Seed) -> Vec<u8> {
112 let output_len_bits = usize::from(K) * (1 << K);
113 let mut output = vec![0; output_len_bits.div_ceil(u8::BITS as usize)];
114
115 let key = Key::from(seed);
116 let iv = Iv::<ChaCha8>::default();
117
118 let mut cipher = ChaCha8::new(&key, &iv);
119
120 cipher.write_keystream(&mut output);
121
122 output
123}
124
125#[cfg(feature = "alloc")]
133const fn bucket_size_upper_bound(k: u8, security_bits: u8) -> usize {
134 const LAMBDA: u64 = PARAM_BC as u64 / 2u64.pow(PARAM_EXT as u32);
137 const LN2_NUM: u128 = 693_147;
140 const LN2_DEN: u128 = 1_000_000;
141
142 let ks = k as u128 + security_bits as u128;
144 let num = 3u128 * LAMBDA as u128 * ks * LN2_NUM;
147 let den = LN2_DEN;
149
150 let ceil_div = num.div_ceil(den);
151
152 let mut low = 0u64;
157 let mut high = u64::MAX;
158 while low < high {
159 let mid = low + (high - low) / 2;
160 let left = (mid as u128) * (mid as u128);
161 if left >= ceil_div {
162 high = mid;
163 } else {
164 low = mid + 1;
165 }
166 }
167 let add_term = low;
168
169 (LAMBDA + add_term) as usize
170}
171
172#[cfg(feature = "alloc")]
173fn group_by_buckets<const K: u8>(
174 ys: &[Y],
175) -> Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>
176where
177 [(); num_buckets(K)]:,
178{
179 let mut bucket_offsets = [0_u16; num_buckets(K)];
180 let mut buckets = unsafe {
182 Box::<[[MaybeUninit<(Position, Y)>; REDUCED_BUCKET_SIZE]; num_buckets(K)]>::new_uninit()
183 .assume_init()
184 };
185
186 for (&y, position) in ys.iter().zip(Position::ZERO..) {
187 let bucket_index = (u32::from(y) / u32::from(PARAM_BC)) as usize;
188
189 let bucket_offset = unsafe { bucket_offsets.get_unchecked_mut(bucket_index) };
191 let bucket = unsafe { buckets.get_unchecked_mut(bucket_index) };
193
194 if *bucket_offset < REDUCED_BUCKET_SIZE as u16 {
195 bucket[*bucket_offset as usize].write((position, y));
196 *bucket_offset += 1;
197 }
198 }
199
200 for (bucket, initialized) in buckets.iter_mut().zip(bucket_offsets) {
201 bucket[usize::from(initialized)..].write_filled((Position::SENTINEL, Y::SENTINEL));
202 }
203
204 unsafe { Box::from_raw(Box::into_raw(buckets).cast()) }
206}
207
208#[cfg(feature = "parallel")]
214unsafe fn group_by_buckets_from_buckets<'a, const K: u8, I>(
215 iter: I,
216) -> Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>
217where
218 I: Iterator<Item = (&'a [MaybeUninit<Y>; REDUCED_MATCHES_COUNT], usize)> + 'a,
219 [(); num_buckets(K)]:,
220{
221 let mut bucket_offsets = [0_u16; num_buckets(K)];
222 let mut buckets = unsafe {
224 Box::<[[MaybeUninit<(Position, Y)>; REDUCED_BUCKET_SIZE]; num_buckets(K)]>::new_uninit()
225 .assume_init()
226 };
227
228 for ((ys, count), batch_start) in iter.zip((Position::ZERO..).step_by(REDUCED_MATCHES_COUNT)) {
229 let ys = unsafe { ys[..count].assume_init_ref() };
231 for (&y, position) in ys.iter().zip(batch_start..) {
232 let bucket_index = (u32::from(y) / u32::from(PARAM_BC)) as usize;
233
234 let bucket_offset = unsafe { bucket_offsets.get_unchecked_mut(bucket_index) };
236 let bucket = unsafe { buckets.get_unchecked_mut(bucket_index) };
238
239 if *bucket_offset < REDUCED_BUCKET_SIZE as u16 {
240 bucket[*bucket_offset as usize].write((position, y));
241 *bucket_offset += 1;
242 }
243 }
244 }
245
246 for (bucket, initialized) in buckets.iter_mut().zip(bucket_offsets) {
247 bucket[usize::from(initialized)..].write_filled((Position::SENTINEL, Y::SENTINEL));
248 }
249
250 unsafe { Box::from_raw(Box::into_raw(buckets).cast()) }
252}
253
254#[cfg(feature = "alloc")]
255#[derive(Debug, Copy, Clone, Deref)]
256#[repr(align(64))]
257struct CacheLineAligned<T>(T);
258
259#[cfg(feature = "alloc")]
261type LeftTargets = [[CacheLineAligned<[R; PARAM_M as usize]>; PARAM_BC as usize]; 2];
262
263#[cfg(feature = "alloc")]
264fn calculate_left_targets() -> Arc<LeftTargets> {
265 let mut left_targets = Arc::<LeftTargets>::new_uninit();
266 let left_targets_slice = unsafe {
268 mem::transmute::<
269 &mut MaybeUninit<[[CacheLineAligned<[R; PARAM_M as usize]>; PARAM_BC as usize]; 2]>,
270 &mut [[MaybeUninit<CacheLineAligned<[R; PARAM_M as usize]>>; PARAM_BC as usize]; 2],
271 >(Arc::get_mut_unchecked(&mut left_targets))
272 };
273
274 for parity in 0..=1 {
275 for r in 0..PARAM_BC {
276 let c = r / PARAM_C;
277
278 let arr = array::from_fn(|m| {
279 let m = m as u16;
280 R::from(
281 ((c + m) % PARAM_B) * PARAM_C
282 + (((2 * m + parity) * (2 * m + parity) + r) % PARAM_C),
283 )
284 });
285 left_targets_slice[parity as usize][r as usize].write(CacheLineAligned(arr));
286 }
287 }
288
289 unsafe { left_targets.assume_init() }
291}
292
293fn calculate_left_target_on_demand(parity: u32, r: u32, m: u32) -> u32 {
294 let param_b = u32::from(PARAM_B);
295 let param_c = u32::from(PARAM_C);
296
297 ((r / param_c + m) % param_b) * param_c + (((2 * m + parity) * (2 * m + parity) + r) % param_c)
298}
299
300#[cfg(feature = "alloc")]
302#[derive(Debug, Clone)]
303pub struct TablesCache {
304 left_targets: Arc<LeftTargets>,
305}
306
307#[cfg(feature = "alloc")]
308impl Default for TablesCache {
309 fn default() -> Self {
311 Self {
312 left_targets: calculate_left_targets(),
313 }
314 }
315}
316
317#[cfg(feature = "alloc")]
318#[derive(Debug, Copy, Clone)]
319struct Match {
320 left_position: Position,
321 left_y: Y,
322 right_position: Position,
323}
324
325pub(super) fn compute_f1<const K: u8>(x: X, seed: &Seed) -> Y {
327 let skip_bits = u32::from(K) * u32::from(x);
328 let skip_u32s = skip_bits / u32::BITS;
329 let partial_y_offset = skip_bits % u32::BITS;
330
331 const U32S_PER_BLOCK: usize = size_of::<ChaCha8Block>() / size_of::<u32>();
332
333 let initial_state = ChaCha8State::init(seed, &[0; _]);
334 let first_block_counter = skip_u32s / U32S_PER_BLOCK as u32;
335 let u32_in_first_block = skip_u32s as usize % U32S_PER_BLOCK;
336
337 let first_block = initial_state.compute_block(first_block_counter);
338 let hi = first_block[u32_in_first_block].to_be();
339
340 let lo = if u32_in_first_block + 1 == U32S_PER_BLOCK {
342 let second_block = initial_state.compute_block(first_block_counter + 1);
344 second_block[0].to_be()
345 } else {
346 first_block[u32_in_first_block + 1].to_be()
347 };
348
349 let partial_y = (u64::from(hi) << u32::BITS) | u64::from(lo);
350
351 let pre_y = partial_y >> (u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset);
352 let pre_y = pre_y as u32;
353 let pre_y_mask = (u32::MAX << PARAM_EXT) & (u32::MAX >> (u32::BITS - u32::from(K + PARAM_EXT)));
355
356 let pre_ext = u32::from(x) >> (K - PARAM_EXT);
359
360 Y::from((pre_y & pre_y_mask) | pre_ext)
363}
364
365#[cfg(any(feature = "alloc", test))]
366pub(super) fn compute_f1_simd<const K: u8>(
367 xs: Simd<u32, COMPUTE_F1_SIMD_FACTOR>,
368 partial_ys: &[u8; usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize],
369) -> [Y; COMPUTE_F1_SIMD_FACTOR] {
370 let pre_ys_bytes = array::from_fn(|i| {
373 let partial_y_offset = i * usize::from(K);
374 let partial_y_length =
375 (partial_y_offset % u8::BITS as usize + usize::from(K)).div_ceil(u8::BITS as usize);
376 let mut pre_y_bytes = 0u64.to_be_bytes();
377 pre_y_bytes[..partial_y_length].copy_from_slice(
378 &partial_ys[partial_y_offset / u8::BITS as usize..][..partial_y_length],
379 );
380
381 u64::from_be_bytes(pre_y_bytes)
382 });
383 let pre_ys_right_offset = array::from_fn(|i| {
384 let partial_y_offset = i as u32 * u32::from(K);
385 u64::from(u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset % u8::BITS)
386 });
387 let pre_ys = Simd::from_array(pre_ys_bytes) >> Simd::from_array(pre_ys_right_offset);
388
389 let pre_ys_mask = Simd::splat(
391 (u32::MAX << usize::from(PARAM_EXT))
392 & (u32::MAX >> (u32::BITS as usize - usize::from(K + PARAM_EXT))),
393 );
394
395 let pre_exts = xs >> Simd::splat(u32::from(K - PARAM_EXT));
398
399 let ys = (pre_ys.cast() & pre_ys_mask) | pre_exts;
402
403 Y::array_from_repr(ys.to_array())
404}
405
406#[cfg(feature = "alloc")]
413unsafe fn find_matches_in_buckets<'a>(
414 left_bucket_index: u32,
415 left_bucket: &[(Position, Y); REDUCED_BUCKET_SIZE],
416 right_bucket: &[(Position, Y); REDUCED_BUCKET_SIZE],
417 matches: &'a mut [MaybeUninit<Match>; REDUCED_MATCHES_COUNT + PARAM_M as usize * 2],
420 left_targets: &LeftTargets,
421) -> &'a [Match] {
422 let left_base = left_bucket_index * u32::from(PARAM_BC);
423 let right_base = left_base + u32::from(PARAM_BC);
424
425 let mut rmap = Rmap::new();
426 for &(right_position, y) in right_bucket {
427 if right_position == Position::SENTINEL {
429 break;
430 }
431 let r = R::from((u32::from(y) - right_base) as u16);
432 unsafe {
435 rmap.add(r, right_position);
436 }
437 }
438
439 let parity = left_base % 2;
440 let left_targets_parity = &left_targets[parity as usize];
441 let mut next_match_index = 0;
442
443 for &(left_position, y) in left_bucket {
446 if left_position == Position::SENTINEL || next_match_index >= REDUCED_MATCHES_COUNT {
449 break;
451 }
452
453 let r = R::from((u32::from(y) - left_base) as u16);
454 let left_targets_r = unsafe { left_targets_parity.get_unchecked(usize::from(r)) };
456
457 for &r_target in left_targets_r.iter() {
458 let [right_position_a, right_position_b] = unsafe { rmap.get(r_target) };
460
461 if right_position_a != Position::SENTINEL {
462 unsafe { matches.get_unchecked_mut(next_match_index) }.write(Match {
465 left_position,
466 left_y: y,
467 right_position: right_position_a,
468 });
469 next_match_index += 1;
470
471 if right_position_b != Position::SENTINEL {
472 unsafe { matches.get_unchecked_mut(next_match_index) }.write(Match {
475 left_position,
476 left_y: y,
477 right_position: right_position_b,
478 });
479 next_match_index += 1;
480 }
481 }
482 }
483 }
484
485 unsafe { matches[..next_match_index].assume_init_ref() }
487}
488
489pub(super) fn has_match(left_y: Y, right_y: Y) -> bool {
491 let right_r = u32::from(right_y) % u32::from(PARAM_BC);
492 let parity = (u32::from(left_y) / u32::from(PARAM_BC)) % 2;
493 let left_r = u32::from(left_y) % u32::from(PARAM_BC);
494
495 let r_targets = array::from_fn::<_, { PARAM_M as usize }, _>(|i| {
496 calculate_left_target_on_demand(parity, left_r, i as u32)
497 });
498
499 r_targets.contains(&right_r)
500}
501
502#[inline(always)]
503pub(super) fn compute_fn<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
504 y: Y,
505 left_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
506 right_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
507) -> (Y, Metadata<K, TABLE_NUMBER>)
508where
509 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
510 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
511{
512 let left_metadata = u128::from(left_metadata);
513 let right_metadata = u128::from(right_metadata);
514
515 let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
516
517 let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
519 let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
520
521 let num_bytes_with_data =
523 (y_size_bits(K) + parent_metadata_bits * 2).div_ceil(u8::BITS as usize);
524
525 let hash = {
528 let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
530
531 let left_metadata_bits =
533 left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
534
535 if right_bits_start_offset < y_and_left_bits {
538 let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
539 let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
542 let input_a = y_bits | left_metadata_bits | right_bits_a;
543 let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
545
546 let input = [input_a.to_be_bytes(), input_b.to_be_bytes()];
547 let input_len =
548 size_of::<u128>() + right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize);
549 ab_blake3::single_block_hash(&input.as_flattened()[..input_len])
550 .expect("Exactly a single block worth of bytes; qed")
551 } else {
552 let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
553 let input_a = y_bits | left_metadata_bits | right_bits_a;
554
555 ab_blake3::single_block_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
556 .expect("Less than a single block worth of bytes; qed")
557 }
558 };
559
560 let y_output = Y::from(
561 u32::from_be_bytes([hash[0], hash[1], hash[2], hash[3]])
562 >> (u32::BITS as usize - y_size_bits(K)),
563 );
564
565 let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
566
567 let metadata = if TABLE_NUMBER < 4 {
568 (left_metadata << parent_metadata_bits) | right_metadata
569 } else if metadata_size_bits > 0 {
570 let metadata = u128::from_be_bytes(
574 hash[y_size_bits(K) / u8::BITS as usize..][..size_of::<u128>()]
575 .try_into()
576 .expect("Always enough bits for any K; qed"),
577 );
578 let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
580 metadata >> (u128::BITS as usize - metadata_size_bits)
582 } else {
583 0
584 };
585
586 (y_output, Metadata::from(metadata))
587}
588
589#[cfg(any(feature = "alloc", test))]
593fn compute_fn_simd<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
594 left_ys: [Y; COMPUTE_FN_SIMD_FACTOR],
595 left_metadatas: [Metadata<K, PARENT_TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
596 right_metadatas: [Metadata<K, PARENT_TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
597) -> (
598 Simd<u32, COMPUTE_FN_SIMD_FACTOR>,
599 [Metadata<K, TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
600)
601where
602 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
603 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
604{
605 let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
606 let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
607
608 let left_metadatas: [u128; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
611 [
612 #(
613 u128::from(left_metadatas[N]),
614 )*
615 ]
616 });
617 let right_metadatas: [u128; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
618 [
619 #(
620 u128::from(right_metadatas[N]),
621 )*
622 ]
623 });
624
625 let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
627 let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
628
629 let num_bytes_with_data =
631 (y_size_bits(K) + parent_metadata_bits * 2).div_ceil(u8::BITS as usize);
632
633 let hashes: [_; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
638 [
639 #(
640 {
641 let y = left_ys[N];
642 let left_metadata = left_metadatas[N];
643 let right_metadata = right_metadatas[N];
644
645 let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
648
649 let left_metadata_bits =
651 left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
652
653 if right_bits_start_offset < y_and_left_bits {
656 let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
657 let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
660 let input_a = y_bits | left_metadata_bits | right_bits_a;
661 let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
663
664 let input = [input_a.to_be_bytes(), input_b.to_be_bytes()];
665 let input_len =
666 size_of::<u128>() + right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize);
667 ab_blake3::single_block_hash(&input.as_flattened()[..input_len])
668 .expect("Exactly a single block worth of bytes; qed")
669 } else {
670 let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
671 let input_a = y_bits | left_metadata_bits | right_bits_a;
672
673 ab_blake3::single_block_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
674 .expect("Exactly a single block worth of bytes; qed")
675 }
676 },
677 )*
678 ]
679 });
680
681 let y_outputs = Simd::from_array(
682 hashes.map(|hash| u32::from_be_bytes([hash[0], hash[1], hash[2], hash[3]])),
683 ) >> (u32::BITS - y_size_bits(K) as u32);
684
685 let metadatas = if TABLE_NUMBER < 4 {
686 seq!(N in 0..16 {
687 [
688 #(
689 Metadata::from((left_metadatas[N] << parent_metadata_bits) | right_metadatas[N]),
690 )*
691 ]
692 })
693 } else if metadata_size_bits > 0 {
694 seq!(N in 0..16 {
698 [
699 #(
700 {
701 let metadata = u128::from_be_bytes(
702 hashes[N][y_size_bits(K) / u8::BITS as usize..][..size_of::<u128>()]
703 .try_into()
704 .expect("Always enough bits for any K; qed"),
705 );
706 let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
708 Metadata::from(metadata >> (u128::BITS as usize - metadata_size_bits))
710 },
711 )*
712 ]
713 })
714 } else {
715 [Metadata::default(); _]
716 };
717
718 (y_outputs, metadatas)
719}
720
721#[cfg(feature = "alloc")]
724#[inline(always)]
725unsafe fn match_to_result<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
726 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
727 m: &Match,
728) -> (Y, [Position; 2], Metadata<K, TABLE_NUMBER>)
729where
730 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
731 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
732 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
733 [(); 1 << K]:,
734 [(); num_buckets(K)]:,
735 [(); num_buckets(K) - 1]:,
736{
737 let left_metadata = unsafe { parent_table.metadata(m.left_position) };
739 let right_metadata = unsafe { parent_table.metadata(m.right_position) };
741
742 let (y, metadata) =
743 compute_fn::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(m.left_y, left_metadata, right_metadata);
744
745 (y, [m.left_position, m.right_position], metadata)
746}
747
748#[cfg(feature = "alloc")]
751#[inline(always)]
752unsafe fn match_to_result_simd<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
753 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
754 matches: &[Match; COMPUTE_FN_SIMD_FACTOR],
755) -> (
756 Simd<u32, COMPUTE_FN_SIMD_FACTOR>,
757 [[Position; 2]; COMPUTE_FN_SIMD_FACTOR],
758 [Metadata<K, TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
759)
760where
761 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
762 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
763 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
764 [(); 1 << K]:,
765 [(); num_buckets(K)]:,
766 [(); num_buckets(K) - 1]:,
767{
768 let left_ys: [_; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
769 [
770 #(
771 matches[N].left_y,
772 )*
773 ]
774 });
775 let left_metadatas: [_; COMPUTE_FN_SIMD_FACTOR] = unsafe {
777 seq!(N in 0..16 {
778 [
779 #(
780 parent_table.metadata(matches[N].left_position),
781 )*
782 ]
783 })
784 };
785 let right_metadatas: [_; COMPUTE_FN_SIMD_FACTOR] = unsafe {
787 seq!(N in 0..16 {
788 [
789 #(
790 parent_table.metadata(matches[N].right_position),
791 )*
792 ]
793 })
794 };
795
796 let (y_outputs, metadatas) = compute_fn_simd::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(
797 left_ys,
798 left_metadatas,
799 right_metadatas,
800 );
801
802 let positions = seq!(N in 0..16 {
803 [
804 #(
805 [
806 matches[N].left_position,
807 matches[N].right_position,
808 ],
809 )*
810 ]
811 });
812
813 (y_outputs, positions, metadatas)
814}
815
816#[cfg(feature = "alloc")]
820#[inline(always)]
821unsafe fn matches_to_results<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
822 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
823 matches: &[Match],
824 ys: &mut [MaybeUninit<Y>],
825 positions: &mut [MaybeUninit<[Position; 2]>],
826 metadatas: &mut [MaybeUninit<Metadata<K, TABLE_NUMBER>>],
827) where
828 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
829 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
830 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
831 [(); 1 << K]:,
832 [(); num_buckets(K)]:,
833 [(); num_buckets(K) - 1]:,
834{
835 let (grouped_matches, other_matches) = matches.as_chunks::<COMPUTE_FN_SIMD_FACTOR>();
836 let (grouped_ys, other_ys) = ys.split_at_mut(grouped_matches.as_flattened().len());
837 let grouped_ys = grouped_ys.as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>().0;
838 let (grouped_positions, other_positions) =
839 positions.split_at_mut(grouped_matches.as_flattened().len());
840 let grouped_positions = grouped_positions
841 .as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>()
842 .0;
843 let (grouped_metadatas, other_metadatas) =
844 metadatas.split_at_mut(grouped_matches.as_flattened().len());
845 let grouped_metadatas = grouped_metadatas
846 .as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>()
847 .0;
848
849 for (((grouped_matches, grouped_ys), grouped_positions), grouped_metadatas) in grouped_matches
850 .iter()
851 .zip(grouped_ys)
852 .zip(grouped_positions)
853 .zip(grouped_metadatas)
854 {
855 let (ys_group, positions_group, metadatas_group) =
857 unsafe { match_to_result_simd(parent_table, grouped_matches) };
858 let ys_group = Y::array_from_repr(ys_group.to_array());
859 grouped_ys.write_copy_of_slice(&ys_group);
860 grouped_positions.write_copy_of_slice(&positions_group);
861
862 if metadata_size_bits(K, TABLE_NUMBER) > 0 {
864 grouped_metadatas.write_copy_of_slice(&metadatas_group);
865 }
866 }
867 for (((other_match, other_y), other_positions), other_metadata) in other_matches
868 .iter()
869 .zip(other_ys)
870 .zip(other_positions)
871 .zip(other_metadatas)
872 {
873 let (y, p, metadata) = unsafe { match_to_result(parent_table, other_match) };
875 other_y.write(y);
876 other_positions.write(p);
877 if metadata_size_bits(K, TABLE_NUMBER) > 0 {
879 other_metadata.write(metadata);
880 }
881 }
882}
883
884#[cfg(feature = "alloc")]
886#[derive(Debug)]
887pub(super) enum PrunedTable<const K: u8, const TABLE_NUMBER: u8>
888where
889 [(); 1 << K]:,
890 [(); num_buckets(K) - 1]:,
891{
892 First,
893 Other {
895 positions: Box<[MaybeUninit<[Position; 2]>; 1 << K]>,
897 },
898 #[cfg(feature = "parallel")]
900 OtherBuckets {
901 positions: Box<[[MaybeUninit<[Position; 2]>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1]>,
905 },
906}
907
908#[cfg(feature = "alloc")]
909impl<const K: u8, const TABLE_NUMBER: u8> PrunedTable<K, TABLE_NUMBER>
910where
911 [(); 1 << K]:,
912 [(); num_buckets(K) - 1]:,
913{
914 #[inline(always)]
921 pub(super) unsafe fn position(&self, position: Position) -> [Position; 2] {
922 match self {
923 Self::First => {
924 unreachable!("Not the first table");
925 }
926 Self::Other { positions, .. } => {
927 unsafe { positions.get_unchecked(usize::from(position)).assume_init() }
929 }
930 #[cfg(feature = "parallel")]
931 Self::OtherBuckets { positions, .. } => {
932 unsafe {
934 positions
935 .as_flattened()
936 .get_unchecked(usize::from(position))
937 .assume_init()
938 }
939 }
940 }
941 }
942}
943
944#[cfg(feature = "alloc")]
945#[derive(Debug)]
946pub(super) enum Table<const K: u8, const TABLE_NUMBER: u8>
947where
948 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
949 [(); 1 << K]:,
950 [(); num_buckets(K)]:,
951 [(); num_buckets(K) - 1]:,
952{
953 First {
955 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
959 },
960 Other {
962 positions: Box<[MaybeUninit<[Position; 2]>; 1 << K]>,
964 metadatas: Box<[MaybeUninit<Metadata<K, TABLE_NUMBER>>; 1 << K]>,
966 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
970 },
971 #[cfg(feature = "parallel")]
973 OtherBuckets {
974 positions: Box<[[MaybeUninit<[Position; 2]>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1]>,
978 metadatas: Box<
982 [[MaybeUninit<Metadata<K, TABLE_NUMBER>>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1],
983 >,
984 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
988 },
989}
990
991#[cfg(feature = "alloc")]
992impl<const K: u8> Table<K, 1>
993where
994 EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
995 [(); 1 << K]:,
996 [(); num_buckets(K)]:,
997 [(); num_buckets(K) - 1]:,
998{
999 pub(super) fn create(seed: Seed) -> Self
1001 where
1002 EvaluatableUsize<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
1003 {
1004 debug_assert!(
1007 MAX_BUCKET_SIZE >= bucket_size_upper_bound(K, BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS),
1008 "Max bucket size is not sufficiently large"
1009 );
1010
1011 let partial_ys = partial_ys::<K>(seed);
1012
1013 let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1015
1016 for ((ys, xs_batch_start), partial_ys) in ys
1017 .as_chunks_mut::<COMPUTE_F1_SIMD_FACTOR>()
1018 .0
1019 .iter_mut()
1020 .zip((X::ZERO..).step_by(COMPUTE_F1_SIMD_FACTOR))
1021 .zip(
1022 partial_ys
1023 .as_chunks::<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
1024 .0,
1025 )
1026 {
1027 let xs = Simd::splat(u32::from(xs_batch_start))
1028 + Simd::from_array(array::from_fn(|i| i as u32));
1029 let ys_batch = compute_f1_simd::<K>(xs, partial_ys);
1030
1031 ys.write_copy_of_slice(&ys_batch);
1032 }
1033
1034 let ys = unsafe { ys.assume_init_ref() };
1036
1037 let buckets = group_by_buckets::<K>(ys);
1039
1040 Self::First { buckets }
1041 }
1042
1043 #[cfg(feature = "parallel")]
1045 pub(super) fn create_parallel(seed: Seed) -> Self
1046 where
1047 EvaluatableUsize<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
1048 {
1049 debug_assert!(
1052 MAX_BUCKET_SIZE >= bucket_size_upper_bound(K, BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS),
1053 "Max bucket size is not sufficiently large"
1054 );
1055
1056 let partial_ys = partial_ys::<K>(seed);
1057
1058 let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1060
1061 for ((ys, xs_batch_start), partial_ys) in ys
1063 .as_chunks_mut::<COMPUTE_F1_SIMD_FACTOR>()
1064 .0
1065 .iter_mut()
1066 .zip((X::ZERO..).step_by(COMPUTE_F1_SIMD_FACTOR))
1067 .zip(
1068 partial_ys
1069 .as_chunks::<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
1070 .0,
1071 )
1072 {
1073 let xs = Simd::splat(u32::from(xs_batch_start))
1074 + Simd::from_array(array::from_fn(|i| i as u32));
1075 let ys_batch = compute_f1_simd::<K>(xs, partial_ys);
1076
1077 ys.write_copy_of_slice(&ys_batch);
1078 }
1079
1080 let ys = unsafe { ys.assume_init_ref() };
1082
1083 let buckets = group_by_buckets::<K>(ys);
1085
1086 Self::First { buckets }
1087 }
1088}
1089
1090#[cfg(feature = "alloc")]
1091mod private {
1092 pub(in super::super) trait SupportedOtherTables {}
1093 pub(in super::super) trait NotLastTable {}
1094}
1095
1096#[cfg(feature = "alloc")]
1097impl<const K: u8> private::SupportedOtherTables for Table<K, 2>
1098where
1099 EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized,
1100 [(); 1 << K]:,
1101 [(); num_buckets(K)]:,
1102 [(); num_buckets(K) - 1]:,
1103{
1104}
1105#[cfg(feature = "alloc")]
1106impl<const K: u8> private::SupportedOtherTables for Table<K, 3>
1107where
1108 EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized,
1109 [(); 1 << K]:,
1110 [(); num_buckets(K)]:,
1111 [(); num_buckets(K) - 1]:,
1112{
1113}
1114#[cfg(feature = "alloc")]
1115impl<const K: u8> private::SupportedOtherTables for Table<K, 4>
1116where
1117 EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized,
1118 [(); 1 << K]:,
1119 [(); num_buckets(K)]:,
1120 [(); num_buckets(K) - 1]:,
1121{
1122}
1123#[cfg(feature = "alloc")]
1124impl<const K: u8> private::SupportedOtherTables for Table<K, 5>
1125where
1126 EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized,
1127 [(); 1 << K]:,
1128 [(); num_buckets(K)]:,
1129 [(); num_buckets(K) - 1]:,
1130{
1131}
1132#[cfg(feature = "alloc")]
1133impl<const K: u8> private::SupportedOtherTables for Table<K, 6>
1134where
1135 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1136 [(); 1 << K]:,
1137 [(); num_buckets(K)]:,
1138 [(); num_buckets(K) - 1]:,
1139{
1140}
1141#[cfg(feature = "alloc")]
1142impl<const K: u8> private::SupportedOtherTables for Table<K, 7>
1143where
1144 EvaluatableUsize<{ metadata_size_bytes(K, 7) }>: Sized,
1145 [(); 1 << K]:,
1146 [(); num_buckets(K)]:,
1147 [(); num_buckets(K) - 1]:,
1148{
1149}
1150
1151#[cfg(feature = "alloc")]
1152impl<const K: u8> private::NotLastTable for Table<K, 1>
1153where
1154 EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
1155 [(); 1 << K]:,
1156 [(); num_buckets(K)]:,
1157 [(); num_buckets(K) - 1]:,
1158{
1159}
1160#[cfg(feature = "alloc")]
1161impl<const K: u8> private::NotLastTable for Table<K, 2>
1162where
1163 EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized,
1164 [(); 1 << K]:,
1165 [(); num_buckets(K)]:,
1166 [(); num_buckets(K) - 1]:,
1167{
1168}
1169#[cfg(feature = "alloc")]
1170impl<const K: u8> private::NotLastTable for Table<K, 3>
1171where
1172 EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized,
1173 [(); 1 << K]:,
1174 [(); num_buckets(K)]:,
1175 [(); num_buckets(K) - 1]:,
1176{
1177}
1178#[cfg(feature = "alloc")]
1179impl<const K: u8> private::NotLastTable for Table<K, 4>
1180where
1181 EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized,
1182 [(); 1 << K]:,
1183 [(); num_buckets(K)]:,
1184 [(); num_buckets(K) - 1]:,
1185{
1186}
1187#[cfg(feature = "alloc")]
1188impl<const K: u8> private::NotLastTable for Table<K, 5>
1189where
1190 EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized,
1191 [(); 1 << K]:,
1192 [(); num_buckets(K)]:,
1193 [(); num_buckets(K) - 1]:,
1194{
1195}
1196#[cfg(feature = "alloc")]
1197impl<const K: u8> private::NotLastTable for Table<K, 6>
1198where
1199 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1200 [(); 1 << K]:,
1201 [(); num_buckets(K)]:,
1202 [(); num_buckets(K) - 1]:,
1203{
1204}
1205
1206#[cfg(feature = "alloc")]
1207impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1208where
1209 Self: private::SupportedOtherTables,
1210 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1211 [(); 1 << K]:,
1212 [(); num_buckets(K)]:,
1213 [(); num_buckets(K) - 1]:,
1214{
1215 pub(super) fn create<const PARENT_TABLE_NUMBER: u8>(
1219 parent_table: Table<K, PARENT_TABLE_NUMBER>,
1220 cache: &TablesCache,
1221 ) -> (Self, PrunedTable<K, PARENT_TABLE_NUMBER>)
1222 where
1223 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
1224 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
1225 {
1226 let left_targets = &*cache.left_targets;
1227 let mut initialized_elements = 0_usize;
1228 let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1230 let mut positions =
1232 unsafe { Box::<[MaybeUninit<[Position; 2]>; 1 << K]>::new_uninit().assume_init() };
1233 let mut metadatas = unsafe {
1235 Box::<[MaybeUninit<Metadata<K, TABLE_NUMBER>>; 1 << K]>::new_uninit().assume_init()
1236 };
1237
1238 for ([left_bucket, right_bucket], left_bucket_index) in
1239 parent_table.buckets().array_windows().zip(0..)
1240 {
1241 let mut matches = [MaybeUninit::uninit(); _];
1242 let matches = unsafe {
1245 find_matches_in_buckets(
1246 left_bucket_index,
1247 left_bucket,
1248 right_bucket,
1249 &mut matches,
1250 left_targets,
1251 )
1252 };
1253 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1255 let (ys, positions, metadatas) = unsafe {
1257 (
1258 ys.get_unchecked_mut(initialized_elements..),
1259 positions.get_unchecked_mut(initialized_elements..),
1260 metadatas.get_unchecked_mut(initialized_elements..),
1261 )
1262 };
1263
1264 let (ys, positions, metadatas) = unsafe {
1266 (
1267 ys.get_unchecked_mut(..matches.len()),
1268 positions.get_unchecked_mut(..matches.len()),
1269 metadatas.get_unchecked_mut(..matches.len()),
1270 )
1271 };
1272
1273 unsafe {
1276 matches_to_results(&parent_table, matches, ys, positions, metadatas);
1277 }
1278
1279 initialized_elements += matches.len();
1280 }
1281
1282 let parent_table = parent_table.prune();
1283
1284 let ys = unsafe {
1287 let ys_len = ys.len();
1288 let ys = Box::into_raw(ys);
1289 Vec::from_raw_parts(ys.cast(), initialized_elements, ys_len)
1290 };
1291
1292 let buckets = group_by_buckets::<K>(&ys);
1294
1295 let table = Self::Other {
1296 positions,
1297 metadatas,
1298 buckets,
1299 };
1300
1301 (table, parent_table)
1302 }
1303
1304 #[cfg(feature = "parallel")]
1308 pub(super) fn create_parallel<const PARENT_TABLE_NUMBER: u8>(
1309 parent_table: Table<K, PARENT_TABLE_NUMBER>,
1310 cache: &TablesCache,
1311 ) -> (Self, PrunedTable<K, PARENT_TABLE_NUMBER>)
1312 where
1313 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
1314 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
1315 {
1316 let ys = unsafe {
1318 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1319 };
1320 let positions = unsafe {
1322 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1323 };
1324 let metadatas = unsafe {
1326 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1327 };
1328 let global_results_counts =
1329 array::from_fn::<_, { num_buckets(K) - 1 }, _>(|_| SyncUnsafeCell::new(0u16));
1330
1331 let left_targets = &*cache.left_targets;
1332
1333 let buckets = parent_table.buckets();
1334 let bucket_batch_size = CACHE_LINE_SIZE / size_of::<u16>();
1337 let bucket_batch_index = AtomicUsize::new(0);
1338
1339 rayon::broadcast(|_ctx| {
1340 loop {
1341 let bucket_batch_index = bucket_batch_index.fetch_add(1, Ordering::Relaxed);
1342
1343 let buckets_batch = buckets
1344 .array_windows()
1345 .enumerate()
1346 .skip(bucket_batch_index * bucket_batch_size)
1347 .take(bucket_batch_size);
1348
1349 if buckets_batch.is_empty() {
1350 break;
1351 }
1352
1353 for (left_bucket_index, [left_bucket, right_bucket]) in buckets_batch {
1354 let mut matches = [MaybeUninit::uninit(); _];
1355 let matches = unsafe {
1358 find_matches_in_buckets(
1359 left_bucket_index as u32,
1360 left_bucket,
1361 right_bucket,
1362 &mut matches,
1363 left_targets,
1364 )
1365 };
1366 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1368
1369 let ys = unsafe { &mut *ys.get_unchecked(left_bucket_index).get() };
1372 let positions =
1375 unsafe { &mut *positions.get_unchecked(left_bucket_index).get() };
1376 let metadatas =
1379 unsafe { &mut *metadatas.get_unchecked(left_bucket_index).get() };
1380 let count = unsafe {
1383 &mut *global_results_counts.get_unchecked(left_bucket_index).get()
1384 };
1385
1386 unsafe {
1389 matches_to_results::<_, TABLE_NUMBER, _>(
1390 &parent_table,
1391 matches,
1392 ys,
1393 positions,
1394 metadatas,
1395 );
1396 }
1397 *count = matches.len() as u16;
1398 }
1399 }
1400 });
1401
1402 let parent_table = parent_table.prune();
1403
1404 let ys = strip_sync_unsafe_cell(ys);
1405 let positions = strip_sync_unsafe_cell(positions);
1406 let metadatas = strip_sync_unsafe_cell(metadatas);
1407
1408 let buckets = unsafe {
1411 group_by_buckets_from_buckets::<K, _>(
1412 ys.iter().zip(
1413 global_results_counts
1414 .into_iter()
1415 .map(|count| usize::from(count.into_inner())),
1416 ),
1417 )
1418 };
1419
1420 let table = Self::OtherBuckets {
1421 positions,
1422 metadatas,
1423 buckets,
1424 };
1425
1426 (table, parent_table)
1427 }
1428
1429 #[inline(always)]
1436 pub(super) unsafe fn position(&self, position: Position) -> [Position; 2] {
1437 match self {
1438 Self::First { .. } => {
1439 unreachable!("Not the first table");
1440 }
1441 Self::Other { positions, .. } => {
1442 unsafe { positions.get_unchecked(usize::from(position)).assume_init() }
1444 }
1445 #[cfg(feature = "parallel")]
1446 Self::OtherBuckets { positions, .. } => {
1447 unsafe {
1449 positions
1450 .as_flattened()
1451 .get_unchecked(usize::from(position))
1452 .assume_init()
1453 }
1454 }
1455 }
1456 }
1457}
1458
1459#[cfg(feature = "alloc")]
1460impl<const K: u8> Table<K, 7>
1461where
1462 Self: private::SupportedOtherTables,
1463 EvaluatableUsize<{ metadata_size_bytes(K, 7) }>: Sized,
1464 [(); 1 << K]:,
1465 [(); num_buckets(K)]:,
1466 [(); num_buckets(K) - 1]:,
1467{
1468 pub(super) fn create_proof_targets(
1471 parent_table: Table<K, 6>,
1472 cache: &TablesCache,
1473 ) -> (
1474 Box<[[Position; 2]; Record::NUM_S_BUCKETS]>,
1475 PrunedTable<K, 6>,
1476 )
1477 where
1478 Table<K, 6>: private::NotLastTable,
1479 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1480 {
1481 let left_targets = &*cache.left_targets;
1482 let mut table_6_proof_targets =
1484 unsafe { Box::<[[Position; 2]; Record::NUM_S_BUCKETS]>::new_zeroed().assume_init() };
1485
1486 for ([left_bucket, right_bucket], left_bucket_index) in
1487 parent_table.buckets().array_windows().zip(0..)
1488 {
1489 let mut matches = [MaybeUninit::uninit(); _];
1490 let matches = unsafe {
1493 find_matches_in_buckets(
1494 left_bucket_index,
1495 left_bucket,
1496 right_bucket,
1497 &mut matches,
1498 left_targets,
1499 )
1500 };
1501 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1503
1504 let (grouped_matches, other_matches) = matches.as_chunks::<COMPUTE_FN_SIMD_FACTOR>();
1505
1506 for grouped_matches in grouped_matches {
1507 let (ys_group, positions_group, _) =
1509 unsafe { match_to_result_simd::<_, 7, _>(&parent_table, grouped_matches) };
1510
1511 let s_buckets = ys_group >> Simd::splat(u32::from(PARAM_EXT));
1512
1513 for (s_bucket, p) in s_buckets.to_array().into_iter().zip(positions_group) {
1514 const {
1515 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1516 }
1517 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1518 continue;
1519 };
1520 let positions = &mut table_6_proof_targets[usize::from(s_bucket)];
1521 if positions == &[Position::ZERO; 2] {
1522 *positions = p;
1523 }
1524 }
1525 }
1526 for other_match in other_matches {
1527 let (y, p, _) = unsafe { match_to_result::<_, 7, _>(&parent_table, other_match) };
1529
1530 let s_bucket = y.first_k_bits();
1531
1532 const {
1533 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1534 }
1535 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1536 continue;
1537 };
1538
1539 let positions = &mut table_6_proof_targets[usize::from(s_bucket)];
1540 if positions == &[Position::ZERO; 2] {
1541 *positions = p;
1542 }
1543 }
1544 }
1545
1546 let parent_table = parent_table.prune();
1547
1548 (table_6_proof_targets, parent_table)
1549 }
1550
1551 #[cfg(feature = "parallel")]
1555 pub(super) fn create_proof_targets_parallel(
1556 parent_table: Table<K, 6>,
1557 cache: &TablesCache,
1558 ) -> (
1559 Box<[[Position; 2]; Record::NUM_S_BUCKETS]>,
1560 PrunedTable<K, 6>,
1561 )
1562 where
1563 Table<K, 6>: private::NotLastTable,
1564 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1565 {
1566 let buckets_positions = unsafe {
1568 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1569 };
1570 let global_results_counts =
1571 array::from_fn::<_, { num_buckets(K) - 1 }, _>(|_| SyncUnsafeCell::new(0u16));
1572
1573 let left_targets = &*cache.left_targets;
1574
1575 let buckets = parent_table.buckets();
1576 let bucket_batch_size = CACHE_LINE_SIZE / size_of::<u16>();
1579 let bucket_batch_index = AtomicUsize::new(0);
1580
1581 rayon::broadcast(|_ctx| {
1582 loop {
1583 let bucket_batch_index = bucket_batch_index.fetch_add(1, Ordering::Relaxed);
1584
1585 let buckets_batch = buckets
1586 .array_windows()
1587 .enumerate()
1588 .skip(bucket_batch_index * bucket_batch_size)
1589 .take(bucket_batch_size);
1590
1591 if buckets_batch.is_empty() {
1592 break;
1593 }
1594
1595 for (left_bucket_index, [left_bucket, right_bucket]) in buckets_batch {
1596 let mut matches = [MaybeUninit::uninit(); _];
1597 let matches = unsafe {
1600 find_matches_in_buckets(
1601 left_bucket_index as u32,
1602 left_bucket,
1603 right_bucket,
1604 &mut matches,
1605 left_targets,
1606 )
1607 };
1608 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1610
1611 let buckets_positions =
1614 unsafe { &mut *buckets_positions.get_unchecked(left_bucket_index).get() };
1615 let count = unsafe {
1618 &mut *global_results_counts.get_unchecked(left_bucket_index).get()
1619 };
1620
1621 let (grouped_matches, other_matches) =
1622 matches.as_chunks::<COMPUTE_FN_SIMD_FACTOR>();
1623
1624 let mut reduced_count = 0_usize;
1625 for grouped_matches in grouped_matches {
1626 let (ys_group, positions_group, _) = unsafe {
1628 match_to_result_simd::<_, 7, _>(&parent_table, grouped_matches)
1629 };
1630
1631 let s_buckets = ys_group >> Simd::splat(u32::from(PARAM_EXT));
1632 let s_buckets = s_buckets.to_array();
1633
1634 for (s_bucket, p) in s_buckets.into_iter().zip(positions_group) {
1635 const {
1636 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1637 }
1638 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1639 continue;
1640 };
1641
1642 buckets_positions[reduced_count].write((s_bucket, p));
1643 reduced_count += 1;
1644 }
1645 }
1646 for other_match in other_matches {
1647 let (y, p, _) =
1649 unsafe { match_to_result::<_, 7, _>(&parent_table, other_match) };
1650
1651 let s_bucket = y.first_k_bits();
1652
1653 const {
1654 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1655 }
1656 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1657 continue;
1658 };
1659
1660 buckets_positions[reduced_count].write((s_bucket, p));
1661 reduced_count += 1;
1662 }
1663
1664 *count = reduced_count as u16;
1665 }
1666 }
1667 });
1668
1669 let parent_table = parent_table.prune();
1670
1671 let buckets_positions = strip_sync_unsafe_cell(buckets_positions);
1672
1673 let mut table_6_proof_targets =
1675 unsafe { Box::<[[Position; 2]; Record::NUM_S_BUCKETS]>::new_zeroed().assume_init() };
1676
1677 for (bucket, results_count) in buckets_positions.iter().zip(
1678 global_results_counts
1679 .into_iter()
1680 .map(|count| usize::from(count.into_inner())),
1681 ) {
1682 for &(s_bucket, p) in unsafe { bucket[..results_count].assume_init_ref() } {
1684 let positions = &mut table_6_proof_targets[usize::from(s_bucket)];
1685 if positions == &[Position::ZERO; 2] {
1686 *positions = p;
1687 }
1688 }
1689 }
1690
1691 (table_6_proof_targets, parent_table)
1692 }
1693}
1694
1695#[cfg(feature = "alloc")]
1696impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1697where
1698 Self: private::NotLastTable,
1699 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1700 [(); 1 << K]:,
1701 [(); num_buckets(K)]:,
1702 [(); num_buckets(K) - 1]:,
1703{
1704 #[inline(always)]
1709 unsafe fn metadata(&self, position: Position) -> Metadata<K, TABLE_NUMBER> {
1710 match self {
1711 Self::First { .. } => {
1712 Metadata::from(X::from(u32::from(position)))
1714 }
1715 Self::Other { metadatas, .. } => {
1716 unsafe { metadatas.get_unchecked(usize::from(position)).assume_init() }
1718 }
1719 #[cfg(feature = "parallel")]
1720 Self::OtherBuckets { metadatas, .. } => {
1721 unsafe {
1723 metadatas
1724 .as_flattened()
1725 .get_unchecked(usize::from(position))
1726 .assume_init()
1727 }
1728 }
1729 }
1730 }
1731}
1732
1733#[cfg(feature = "alloc")]
1734impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1735where
1736 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1737 [(); 1 << K]:,
1738 [(); num_buckets(K)]:,
1739 [(); num_buckets(K) - 1]:,
1740{
1741 #[inline(always)]
1742 fn prune(self) -> PrunedTable<K, TABLE_NUMBER> {
1743 match self {
1744 Self::First { .. } => PrunedTable::First,
1745 Self::Other { positions, .. } => PrunedTable::Other { positions },
1746 #[cfg(feature = "parallel")]
1747 Self::OtherBuckets { positions, .. } => PrunedTable::OtherBuckets { positions },
1748 }
1749 }
1750
1751 #[inline(always)]
1753 pub(super) fn buckets(&self) -> &[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)] {
1754 match self {
1755 Self::First { buckets, .. } => buckets,
1756 Self::Other { buckets, .. } => buckets,
1757 #[cfg(feature = "parallel")]
1758 Self::OtherBuckets { buckets, .. } => buckets,
1759 }
1760 }
1761}