1#[cfg(any(feature = "alloc", test))]
2mod rmap;
3#[cfg(test)]
4mod tests;
5pub(super) mod types;
6
7use crate::chiapos::Seed;
8use crate::chiapos::constants::{PARAM_B, PARAM_BC, PARAM_C, PARAM_EXT, PARAM_M};
9#[cfg(feature = "alloc")]
10use crate::chiapos::table::rmap::Rmap;
11use crate::chiapos::table::types::{Metadata, X, Y};
12#[cfg(feature = "alloc")]
13use crate::chiapos::table::types::{Position, R};
14use crate::chiapos::utils::EvaluatableUsize;
15use ab_chacha8::{ChaCha8Block, ChaCha8State};
16#[cfg(feature = "alloc")]
17use ab_core_primitives::pieces::Record;
18#[cfg(feature = "alloc")]
19use alloc::boxed::Box;
20#[cfg(feature = "alloc")]
21use alloc::vec;
22#[cfg(feature = "alloc")]
23use alloc::vec::Vec;
24#[cfg(feature = "alloc")]
25use chacha20::cipher::{Iv, KeyIvInit, StreamCipher};
26#[cfg(feature = "alloc")]
27use chacha20::{ChaCha8, Key};
28use core::array;
29#[cfg(feature = "parallel")]
30use core::cell::SyncUnsafeCell;
31#[cfg(feature = "alloc")]
32use core::mem;
33#[cfg(feature = "alloc")]
34use core::mem::MaybeUninit;
35#[cfg(any(feature = "alloc", test))]
36use core::simd::prelude::*;
37#[cfg(feature = "parallel")]
38use core::sync::atomic::{AtomicUsize, Ordering};
39#[cfg(feature = "alloc")]
40use derive_more::Deref;
41#[cfg(feature = "alloc")]
42use rclite::Arc;
43#[cfg(any(feature = "alloc", test))]
44use seq_macro::seq;
45
46pub(super) const COMPUTE_F1_SIMD_FACTOR: usize = 8;
47#[cfg(any(feature = "alloc", test))]
48const COMPUTE_FN_SIMD_FACTOR: usize = 16;
49const MAX_BUCKET_SIZE: usize = 512;
50#[cfg(any(feature = "alloc", test))]
51const BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS: u8 = 128;
52const REDUCED_BUCKET_SIZE: usize = 272;
57const REDUCED_MATCHES_COUNT: usize = 288;
62#[cfg(feature = "parallel")]
63const CACHE_LINE_SIZE: usize = 64;
64
65const _: () = {
66 debug_assert!(REDUCED_BUCKET_SIZE <= MAX_BUCKET_SIZE);
67 debug_assert!(REDUCED_MATCHES_COUNT <= MAX_BUCKET_SIZE);
68};
69
70pub(super) const fn y_size_bits(k: u8) -> usize {
72 k as usize + PARAM_EXT as usize
73}
74
75pub const fn metadata_size_bytes(k: u8, table_number: u8) -> usize {
77 metadata_size_bits(k, table_number).div_ceil(u8::BITS as usize)
78}
79
80pub(super) const fn metadata_size_bits(k: u8, table_number: u8) -> usize {
82 k as usize
83 * match table_number {
84 1 => 1,
85 2 => 2,
86 3 | 4 => 4,
87 5 => 3,
88 6 => 2,
89 7 => 0,
90 _ => unreachable!(),
91 }
92}
93
94pub const fn num_buckets(k: u8) -> usize {
96 2_usize
97 .pow(y_size_bits(k) as u32)
98 .div_ceil(usize::from(PARAM_BC))
99}
100
101#[cfg(feature = "parallel")]
102#[inline(always)]
103fn strip_sync_unsafe_cell<const N: usize, T>(value: Box<[SyncUnsafeCell<T>; N]>) -> Box<[T; N]> {
104 unsafe { Box::from_raw(Box::into_raw(value).cast()) }
106}
107
108#[cfg(feature = "alloc")]
111fn partial_ys<const K: u8>(seed: Seed) -> Vec<u8> {
112 let output_len_bits = usize::from(K) * (1 << K);
113 let mut output = vec![0; output_len_bits.div_ceil(u8::BITS as usize)];
114
115 let key = Key::from(seed);
116 let iv = Iv::<ChaCha8>::default();
117
118 let mut cipher = ChaCha8::new(&key, &iv);
119
120 cipher.write_keystream(&mut output);
121
122 output
123}
124
125#[cfg(feature = "alloc")]
133const fn bucket_size_upper_bound(k: u8, security_bits: u8) -> usize {
134 const LAMBDA: u64 = PARAM_BC as u64 / 2u64.pow(PARAM_EXT as u32);
137 const LN2_NUM: u128 = 693147;
140 const LN2_DEN: u128 = 1000000;
141
142 let ks = k as u128 + security_bits as u128;
144 let num = 3u128 * LAMBDA as u128 * ks * LN2_NUM;
147 let den = LN2_DEN;
149
150 let ceil_div = num.div_ceil(den);
151
152 let mut low = 0u64;
157 let mut high = u64::MAX;
158 while low < high {
159 let mid = low + (high - low) / 2;
160 let left = (mid as u128) * (mid as u128);
161 if left >= ceil_div {
162 high = mid;
163 } else {
164 low = mid + 1;
165 }
166 }
167 let add_term = low;
168
169 (LAMBDA + add_term) as usize
170}
171
172#[cfg(feature = "alloc")]
173fn group_by_buckets<const K: u8>(
174 ys: &[Y],
175) -> Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>
176where
177 [(); num_buckets(K)]:,
178{
179 let mut bucket_offsets = [0_u16; num_buckets(K)];
180 let mut buckets = unsafe {
182 Box::<[[MaybeUninit<(Position, Y)>; REDUCED_BUCKET_SIZE]; num_buckets(K)]>::new_uninit()
183 .assume_init()
184 };
185
186 for (&y, position) in ys.iter().zip(Position::ZERO..) {
187 let bucket_index = (u32::from(y) / u32::from(PARAM_BC)) as usize;
188
189 let bucket_offset = unsafe { bucket_offsets.get_unchecked_mut(bucket_index) };
191 let bucket = unsafe { buckets.get_unchecked_mut(bucket_index) };
193
194 if *bucket_offset < REDUCED_BUCKET_SIZE as u16 {
195 bucket[*bucket_offset as usize].write((position, y));
196 *bucket_offset += 1;
197 }
198 }
199
200 for (bucket, initialized) in buckets.iter_mut().zip(bucket_offsets) {
201 bucket[usize::from(initialized)..].write_filled((Position::SENTINEL, Y::SENTINEL));
202 }
203
204 unsafe { Box::from_raw(Box::into_raw(buckets).cast()) }
206}
207
208#[cfg(feature = "parallel")]
214unsafe fn group_by_buckets_from_buckets<'a, const K: u8, I>(
215 iter: I,
216) -> Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>
217where
218 I: Iterator<Item = (&'a [MaybeUninit<Y>; REDUCED_MATCHES_COUNT], usize)> + 'a,
219 [(); num_buckets(K)]:,
220{
221 let mut bucket_offsets = [0_u16; num_buckets(K)];
222 let mut buckets = unsafe {
224 Box::<[[MaybeUninit<(Position, Y)>; REDUCED_BUCKET_SIZE]; num_buckets(K)]>::new_uninit()
225 .assume_init()
226 };
227
228 for ((ys, count), batch_start) in iter.zip((Position::ZERO..).step_by(REDUCED_MATCHES_COUNT)) {
229 let ys = unsafe { ys[..count].assume_init_ref() };
231 for (&y, position) in ys.iter().zip(batch_start..) {
232 let bucket_index = (u32::from(y) / u32::from(PARAM_BC)) as usize;
233
234 let bucket_offset = unsafe { bucket_offsets.get_unchecked_mut(bucket_index) };
236 let bucket = unsafe { buckets.get_unchecked_mut(bucket_index) };
238
239 if *bucket_offset < REDUCED_BUCKET_SIZE as u16 {
240 bucket[*bucket_offset as usize].write((position, y));
241 *bucket_offset += 1;
242 }
243 }
244 }
245
246 for (bucket, initialized) in buckets.iter_mut().zip(bucket_offsets) {
247 bucket[usize::from(initialized)..].write_filled((Position::SENTINEL, Y::SENTINEL));
248 }
249
250 unsafe { Box::from_raw(Box::into_raw(buckets).cast()) }
252}
253
254#[cfg(feature = "alloc")]
255#[derive(Debug, Copy, Clone, Deref)]
256#[repr(align(64))]
257struct CacheLineAligned<T>(T);
258
259#[cfg(feature = "alloc")]
261type LeftTargets = [[CacheLineAligned<[R; PARAM_M as usize]>; PARAM_BC as usize]; 2];
262
263#[cfg(feature = "alloc")]
264fn calculate_left_targets() -> Arc<LeftTargets> {
265 let mut left_targets = Arc::<LeftTargets>::new_uninit();
266 let left_targets_slice = unsafe {
268 mem::transmute::<
269 &mut MaybeUninit<[[CacheLineAligned<[R; PARAM_M as usize]>; PARAM_BC as usize]; 2]>,
270 &mut [[MaybeUninit<CacheLineAligned<[R; PARAM_M as usize]>>; PARAM_BC as usize]; 2],
271 >(Arc::get_mut_unchecked(&mut left_targets))
272 };
273
274 for parity in 0..=1 {
275 for r in 0..PARAM_BC {
276 let c = r / PARAM_C;
277
278 let arr = array::from_fn(|m| {
279 let m = m as u16;
280 R::from(
281 ((c + m) % PARAM_B) * PARAM_C
282 + (((2 * m + parity) * (2 * m + parity) + r) % PARAM_C),
283 )
284 });
285 left_targets_slice[parity as usize][r as usize].write(CacheLineAligned(arr));
286 }
287 }
288
289 unsafe { left_targets.assume_init() }
291}
292
293fn calculate_left_target_on_demand(parity: u32, r: u32, m: u32) -> u32 {
294 let param_b = u32::from(PARAM_B);
295 let param_c = u32::from(PARAM_C);
296
297 ((r / param_c + m) % param_b) * param_c + (((2 * m + parity) * (2 * m + parity) + r) % param_c)
298}
299
300#[cfg(feature = "alloc")]
302#[derive(Debug, Clone)]
303pub struct TablesCache {
304 left_targets: Arc<LeftTargets>,
305}
306
307#[cfg(feature = "alloc")]
308impl Default for TablesCache {
309 fn default() -> Self {
311 Self {
312 left_targets: calculate_left_targets(),
313 }
314 }
315}
316
317#[cfg(feature = "alloc")]
318#[derive(Debug, Copy, Clone)]
319struct Match {
320 left_position: Position,
321 left_y: Y,
322 right_position: Position,
323}
324
325pub(super) fn compute_f1<const K: u8>(x: X, seed: &Seed) -> Y {
327 let skip_bits = u32::from(K) * u32::from(x);
328 let skip_u32s = skip_bits / u32::BITS;
329 let partial_y_offset = skip_bits % u32::BITS;
330
331 const U32S_PER_BLOCK: usize = size_of::<ChaCha8Block>() / size_of::<u32>();
332
333 let initial_state = ChaCha8State::init(seed, &[0; _]);
334 let first_block_counter = skip_u32s / U32S_PER_BLOCK as u32;
335 let u32_in_first_block = skip_u32s as usize % U32S_PER_BLOCK;
336
337 let first_block = initial_state.compute_block(first_block_counter);
338 let hi = first_block[u32_in_first_block].to_be();
339
340 let lo = if u32_in_first_block + 1 == U32S_PER_BLOCK {
342 let second_block = initial_state.compute_block(first_block_counter + 1);
344 second_block[0].to_be()
345 } else {
346 first_block[u32_in_first_block + 1].to_be()
347 };
348
349 let partial_y = (u64::from(hi) << u32::BITS) | u64::from(lo);
350
351 let pre_y = partial_y >> (u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset);
352 let pre_y = pre_y as u32;
353 let pre_y_mask = (u32::MAX << PARAM_EXT) & (u32::MAX >> (u32::BITS - u32::from(K + PARAM_EXT)));
355
356 let pre_ext = u32::from(x) >> (K - PARAM_EXT);
359
360 Y::from((pre_y & pre_y_mask) | pre_ext)
363}
364
365#[cfg(any(feature = "alloc", test))]
366pub(super) fn compute_f1_simd<const K: u8>(
367 xs: Simd<u32, COMPUTE_F1_SIMD_FACTOR>,
368 partial_ys: &[u8; usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize],
369) -> [Y; COMPUTE_F1_SIMD_FACTOR] {
370 let pre_ys_bytes = array::from_fn(|i| {
373 let partial_y_offset = i * usize::from(K);
374 let partial_y_length =
375 (partial_y_offset % u8::BITS as usize + usize::from(K)).div_ceil(u8::BITS as usize);
376 let mut pre_y_bytes = 0u64.to_be_bytes();
377 pre_y_bytes[..partial_y_length].copy_from_slice(
378 &partial_ys[partial_y_offset / u8::BITS as usize..][..partial_y_length],
379 );
380
381 u64::from_be_bytes(pre_y_bytes)
382 });
383 let pre_ys_right_offset = array::from_fn(|i| {
384 let partial_y_offset = i as u32 * u32::from(K);
385 u64::from(u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset % u8::BITS)
386 });
387 let pre_ys = Simd::from_array(pre_ys_bytes) >> Simd::from_array(pre_ys_right_offset);
388
389 let pre_ys_mask = Simd::splat(
391 (u32::MAX << usize::from(PARAM_EXT))
392 & (u32::MAX >> (u32::BITS as usize - usize::from(K + PARAM_EXT))),
393 );
394
395 let pre_exts = xs >> Simd::splat(u32::from(K - PARAM_EXT));
398
399 let ys = (pre_ys.cast() & pre_ys_mask) | pre_exts;
402
403 Y::array_from_repr(ys.to_array())
404}
405
406#[cfg(feature = "alloc")]
413unsafe fn find_matches_in_buckets<'a>(
414 left_bucket_index: u32,
415 left_bucket: &[(Position, Y); REDUCED_BUCKET_SIZE],
416 right_bucket: &[(Position, Y); REDUCED_BUCKET_SIZE],
417 matches: &'a mut [MaybeUninit<Match>; REDUCED_MATCHES_COUNT + PARAM_M as usize * 2],
420 left_targets: &LeftTargets,
421) -> &'a [Match] {
422 let left_base = left_bucket_index * u32::from(PARAM_BC);
423 let right_base = left_base + u32::from(PARAM_BC);
424
425 let mut rmap = Rmap::new();
426 for &(right_position, y) in right_bucket {
427 if right_position == Position::SENTINEL {
429 break;
430 }
431 let r = R::from((u32::from(y) - right_base) as u16);
432 unsafe {
435 rmap.add(r, right_position);
436 }
437 }
438
439 let parity = left_base % 2;
440 let left_targets_parity = &left_targets[parity as usize];
441 let mut next_match_index = 0;
442
443 for &(left_position, y) in left_bucket {
446 if left_position == Position::SENTINEL || next_match_index >= REDUCED_MATCHES_COUNT {
449 break;
451 }
452
453 let r = R::from((u32::from(y) - left_base) as u16);
454 let left_targets_r = unsafe { left_targets_parity.get_unchecked(usize::from(r)) };
456
457 for &r_target in left_targets_r.iter() {
458 let [right_position_a, right_position_b] = unsafe { rmap.get(r_target) };
460
461 if right_position_a != Position::SENTINEL {
462 unsafe { matches.get_unchecked_mut(next_match_index) }.write(Match {
465 left_position,
466 left_y: y,
467 right_position: right_position_a,
468 });
469 next_match_index += 1;
470
471 if right_position_b != Position::SENTINEL {
472 unsafe { matches.get_unchecked_mut(next_match_index) }.write(Match {
475 left_position,
476 left_y: y,
477 right_position: right_position_b,
478 });
479 next_match_index += 1;
480 }
481 }
482 }
483 }
484
485 unsafe { matches[..next_match_index].assume_init_ref() }
487}
488
489pub(super) fn has_match(left_y: Y, right_y: Y) -> bool {
491 let right_r = u32::from(right_y) % u32::from(PARAM_BC);
492 let parity = (u32::from(left_y) / u32::from(PARAM_BC)) % 2;
493 let left_r = u32::from(left_y) % u32::from(PARAM_BC);
494
495 let r_targets = array::from_fn::<_, { PARAM_M as usize }, _>(|i| {
496 calculate_left_target_on_demand(parity, left_r, i as u32)
497 });
498
499 r_targets.contains(&right_r)
500}
501
502#[inline(always)]
503pub(super) fn compute_fn<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
504 y: Y,
505 left_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
506 right_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
507) -> (Y, Metadata<K, TABLE_NUMBER>)
508where
509 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
510 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
511{
512 let left_metadata = u128::from(left_metadata);
513 let right_metadata = u128::from(right_metadata);
514
515 let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
516
517 let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
519 let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
520
521 let num_bytes_with_data =
523 (y_size_bits(K) + parent_metadata_bits * 2).div_ceil(u8::BITS as usize);
524
525 let hash = {
528 let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
530
531 let left_metadata_bits =
533 left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
534
535 if right_bits_start_offset < y_and_left_bits {
538 let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
539 let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
542 let input_a = y_bits | left_metadata_bits | right_bits_a;
543 let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
545
546 let input = [input_a.to_be_bytes(), input_b.to_be_bytes()];
547 let input_len =
548 size_of::<u128>() + right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize);
549 ab_blake3::single_block_hash(&input.as_flattened()[..input_len])
550 .expect("Exactly a single block worth of bytes; qed")
551 } else {
552 let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
553 let input_a = y_bits | left_metadata_bits | right_bits_a;
554
555 ab_blake3::single_block_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
556 .expect("Less than a single block worth of bytes; qed")
557 }
558 };
559
560 let y_output = Y::from(
561 u32::from_be_bytes([hash[0], hash[1], hash[2], hash[3]])
562 >> (u32::BITS as usize - y_size_bits(K)),
563 );
564
565 let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
566
567 let metadata = if TABLE_NUMBER < 4 {
568 (left_metadata << parent_metadata_bits) | right_metadata
569 } else if metadata_size_bits > 0 {
570 let metadata = u128::from_be_bytes(
574 hash[y_size_bits(K) / u8::BITS as usize..][..size_of::<u128>()]
575 .try_into()
576 .expect("Always enough bits for any K; qed"),
577 );
578 let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
580 metadata >> (u128::BITS as usize - metadata_size_bits)
582 } else {
583 0
584 };
585
586 (y_output, Metadata::from(metadata))
587}
588
589#[cfg(any(feature = "alloc", test))]
593fn compute_fn_simd<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
594 left_ys: [Y; COMPUTE_FN_SIMD_FACTOR],
595 left_metadatas: [Metadata<K, PARENT_TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
596 right_metadatas: [Metadata<K, PARENT_TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
597) -> (
598 Simd<u32, COMPUTE_FN_SIMD_FACTOR>,
599 [Metadata<K, TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
600)
601where
602 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
603 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
604{
605 let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
606 let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
607
608 let left_metadatas: [u128; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
611 [
612 #(
613 u128::from(left_metadatas[N]),
614 )*
615 ]
616 });
617 let right_metadatas: [u128; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
618 [
619 #(
620 u128::from(right_metadatas[N]),
621 )*
622 ]
623 });
624
625 let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
627 let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
628
629 let num_bytes_with_data =
631 (y_size_bits(K) + parent_metadata_bits * 2).div_ceil(u8::BITS as usize);
632
633 let hashes: [_; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
638 [
639 #(
640 {
641 let y = left_ys[N];
642 let left_metadata = left_metadatas[N];
643 let right_metadata = right_metadatas[N];
644
645 let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
648
649 let left_metadata_bits =
651 left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
652
653 if right_bits_start_offset < y_and_left_bits {
656 let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
657 let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
660 let input_a = y_bits | left_metadata_bits | right_bits_a;
661 let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
663
664 let input = [input_a.to_be_bytes(), input_b.to_be_bytes()];
665 let input_len =
666 size_of::<u128>() + right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize);
667 ab_blake3::single_block_hash(&input.as_flattened()[..input_len])
668 .expect("Exactly a single block worth of bytes; qed")
669 } else {
670 let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
671 let input_a = y_bits | left_metadata_bits | right_bits_a;
672
673 ab_blake3::single_block_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
674 .expect("Exactly a single block worth of bytes; qed")
675 }
676 },
677 )*
678 ]
679 });
680
681 let y_outputs = Simd::from_array(
682 hashes.map(|hash| u32::from_be_bytes([hash[0], hash[1], hash[2], hash[3]])),
683 ) >> (u32::BITS - y_size_bits(K) as u32);
684
685 let metadatas = if TABLE_NUMBER < 4 {
686 seq!(N in 0..16 {
687 [
688 #(
689 Metadata::from((left_metadatas[N] << parent_metadata_bits) | right_metadatas[N]),
690 )*
691 ]
692 })
693 } else if metadata_size_bits > 0 {
694 seq!(N in 0..16 {
698 [
699 #(
700 {
701 let metadata = u128::from_be_bytes(
702 hashes[N][y_size_bits(K) / u8::BITS as usize..][..size_of::<u128>()]
703 .try_into()
704 .expect("Always enough bits for any K; qed"),
705 );
706 let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
708 Metadata::from(metadata >> (u128::BITS as usize - metadata_size_bits))
710 },
711 )*
712 ]
713 })
714 } else {
715 [Metadata::default(); _]
716 };
717
718 (y_outputs, metadatas)
719}
720
721#[cfg(feature = "alloc")]
724#[inline(always)]
725unsafe fn match_to_result<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
726 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
727 m: &Match,
728) -> (Y, [Position; 2], Metadata<K, TABLE_NUMBER>)
729where
730 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
731 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
732 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
733 [(); 1 << K]:,
734 [(); num_buckets(K)]:,
735 [(); num_buckets(K) - 1]:,
736{
737 let left_metadata = unsafe { parent_table.metadata(m.left_position) };
739 let right_metadata = unsafe { parent_table.metadata(m.right_position) };
741
742 let (y, metadata) =
743 compute_fn::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(m.left_y, left_metadata, right_metadata);
744
745 (y, [m.left_position, m.right_position], metadata)
746}
747
748#[cfg(feature = "alloc")]
751#[inline(always)]
752unsafe fn match_to_result_simd<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
753 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
754 matches: &[Match; COMPUTE_FN_SIMD_FACTOR],
755) -> (
756 Simd<u32, COMPUTE_FN_SIMD_FACTOR>,
757 [[Position; 2]; COMPUTE_FN_SIMD_FACTOR],
758 [Metadata<K, TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
759)
760where
761 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
762 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
763 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
764 [(); 1 << K]:,
765 [(); num_buckets(K)]:,
766 [(); num_buckets(K) - 1]:,
767{
768 let left_ys: [_; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
769 [
770 #(
771 matches[N].left_y,
772 )*
773 ]
774 });
775 let left_metadatas: [_; COMPUTE_FN_SIMD_FACTOR] = unsafe {
777 seq!(N in 0..16 {
778 [
779 #(
780 parent_table.metadata(matches[N].left_position),
781 )*
782 ]
783 })
784 };
785 let right_metadatas: [_; COMPUTE_FN_SIMD_FACTOR] = unsafe {
787 seq!(N in 0..16 {
788 [
789 #(
790 parent_table.metadata(matches[N].right_position),
791 )*
792 ]
793 })
794 };
795
796 let (y_outputs, metadatas) = compute_fn_simd::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(
797 left_ys,
798 left_metadatas,
799 right_metadatas,
800 );
801
802 let positions = seq!(N in 0..16 {
803 [
804 #(
805 [
806 matches[N].left_position,
807 matches[N].right_position,
808 ],
809 )*
810 ]
811 });
812
813 (y_outputs, positions, metadatas)
814}
815
816#[cfg(feature = "alloc")]
820#[inline(always)]
821unsafe fn matches_to_results<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
822 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
823 matches: &[Match],
824 ys: &mut [MaybeUninit<Y>],
825 positions: &mut [MaybeUninit<[Position; 2]>],
826 metadatas: &mut [MaybeUninit<Metadata<K, TABLE_NUMBER>>],
827) where
828 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
829 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
830 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
831 [(); 1 << K]:,
832 [(); num_buckets(K)]:,
833 [(); num_buckets(K) - 1]:,
834{
835 let (grouped_matches, other_matches) = matches.as_chunks::<COMPUTE_FN_SIMD_FACTOR>();
836 let (grouped_ys, other_ys) = ys.split_at_mut(grouped_matches.as_flattened().len());
837 let grouped_ys = grouped_ys.as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>().0;
838 let (grouped_positions, other_positions) =
839 positions.split_at_mut(grouped_matches.as_flattened().len());
840 let grouped_positions = grouped_positions
841 .as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>()
842 .0;
843 let (grouped_metadatas, other_metadatas) =
844 metadatas.split_at_mut(grouped_matches.as_flattened().len());
845 let grouped_metadatas = grouped_metadatas
846 .as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>()
847 .0;
848
849 for (((grouped_matches, grouped_ys), grouped_positions), grouped_metadatas) in grouped_matches
850 .iter()
851 .zip(grouped_ys)
852 .zip(grouped_positions)
853 .zip(grouped_metadatas)
854 {
855 let (ys_group, positions_group, metadatas_group) =
857 unsafe { match_to_result_simd(parent_table, grouped_matches) };
858 let ys_group = Y::array_from_repr(ys_group.to_array());
859 grouped_ys.write_copy_of_slice(&ys_group);
860 grouped_positions.write_copy_of_slice(&positions_group);
861
862 if metadata_size_bits(K, TABLE_NUMBER) > 0 {
864 grouped_metadatas.write_copy_of_slice(&metadatas_group);
865 }
866 }
867 for (((other_match, other_y), other_positions), other_metadata) in other_matches
868 .iter()
869 .zip(other_ys)
870 .zip(other_positions)
871 .zip(other_metadatas)
872 {
873 let (y, p, metadata) = unsafe { match_to_result(parent_table, other_match) };
875 other_y.write(y);
876 other_positions.write(p);
877 if metadata_size_bits(K, TABLE_NUMBER) > 0 {
879 other_metadata.write(metadata);
880 }
881 }
882}
883
884#[cfg(feature = "alloc")]
886#[derive(Debug)]
887pub(super) enum PrunedTable<const K: u8, const TABLE_NUMBER: u8>
888where
889 [(); 1 << K]:,
890 [(); num_buckets(K) - 1]:,
891{
892 First,
893 Other {
895 positions: Box<[MaybeUninit<[Position; 2]>; 1 << K]>,
897 },
898 #[cfg(feature = "parallel")]
900 OtherBuckets {
901 positions: Box<[[MaybeUninit<[Position; 2]>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1]>,
905 },
906}
907
908#[cfg(feature = "alloc")]
909impl<const K: u8, const TABLE_NUMBER: u8> PrunedTable<K, TABLE_NUMBER>
910where
911 [(); 1 << K]:,
912 [(); num_buckets(K) - 1]:,
913{
914 #[inline(always)]
921 pub(super) unsafe fn position(&self, position: Position) -> [Position; 2] {
922 match self {
923 Self::First => {
924 unreachable!("Not the first table");
925 }
926 Self::Other { positions, .. } => {
927 unsafe { positions.get_unchecked(usize::from(position)).assume_init() }
929 }
930 #[cfg(feature = "parallel")]
931 Self::OtherBuckets { positions, .. } => {
932 unsafe {
934 positions
935 .as_flattened()
936 .get_unchecked(usize::from(position))
937 .assume_init()
938 }
939 }
940 }
941 }
942}
943
944#[cfg(feature = "alloc")]
945#[derive(Debug)]
946pub(super) enum Table<const K: u8, const TABLE_NUMBER: u8>
947where
948 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
949 [(); 1 << K]:,
950 [(); num_buckets(K)]:,
951 [(); num_buckets(K) - 1]:,
952{
953 First {
955 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
959 },
960 Other {
962 positions: Box<[MaybeUninit<[Position; 2]>; 1 << K]>,
964 metadatas: Box<[MaybeUninit<Metadata<K, TABLE_NUMBER>>; 1 << K]>,
966 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
970 },
971 #[cfg(feature = "parallel")]
973 OtherBuckets {
974 positions: Box<[[MaybeUninit<[Position; 2]>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1]>,
978 metadatas: Box<
982 [[MaybeUninit<Metadata<K, TABLE_NUMBER>>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1],
983 >,
984 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
988 },
989}
990
991#[cfg(feature = "alloc")]
992impl<const K: u8> Table<K, 1>
993where
994 EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
995 [(); 1 << K]:,
996 [(); num_buckets(K)]:,
997 [(); num_buckets(K) - 1]:,
998{
999 pub(super) fn create(seed: Seed) -> Self
1001 where
1002 EvaluatableUsize<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
1003 {
1004 debug_assert!(
1007 MAX_BUCKET_SIZE >= bucket_size_upper_bound(K, BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS),
1008 "Max bucket size is not sufficiently large"
1009 );
1010
1011 let partial_ys = partial_ys::<K>(seed);
1012
1013 let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1015
1016 for ((ys, xs_batch_start), partial_ys) in ys
1017 .as_chunks_mut::<COMPUTE_F1_SIMD_FACTOR>()
1018 .0
1019 .iter_mut()
1020 .zip((X::ZERO..).step_by(COMPUTE_F1_SIMD_FACTOR))
1021 .zip(
1022 partial_ys
1023 .as_chunks::<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
1024 .0,
1025 )
1026 {
1027 let xs = Simd::splat(u32::from(xs_batch_start))
1028 + Simd::from_array(array::from_fn(|i| i as u32));
1029 let ys_batch = compute_f1_simd::<K>(xs, partial_ys);
1030
1031 ys.write_copy_of_slice(&ys_batch);
1032 }
1033
1034 let ys = unsafe {
1037 let ys_len = ys.len();
1038 let ys = Box::into_raw(ys);
1039 Vec::from_raw_parts(ys.cast(), ys_len, ys_len)
1040 };
1041
1042 let buckets = group_by_buckets::<K>(&ys);
1044
1045 Self::First { buckets }
1046 }
1047
1048 #[cfg(feature = "parallel")]
1050 pub(super) fn create_parallel(seed: Seed) -> Self
1051 where
1052 EvaluatableUsize<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
1053 {
1054 debug_assert!(
1057 MAX_BUCKET_SIZE >= bucket_size_upper_bound(K, BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS),
1058 "Max bucket size is not sufficiently large"
1059 );
1060
1061 let partial_ys = partial_ys::<K>(seed);
1062
1063 let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1065
1066 for ((ys, xs_batch_start), partial_ys) in ys
1068 .as_chunks_mut::<COMPUTE_F1_SIMD_FACTOR>()
1069 .0
1070 .iter_mut()
1071 .zip((X::ZERO..).step_by(COMPUTE_F1_SIMD_FACTOR))
1072 .zip(
1073 partial_ys
1074 .as_chunks::<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
1075 .0,
1076 )
1077 {
1078 let xs = Simd::splat(u32::from(xs_batch_start))
1079 + Simd::from_array(array::from_fn(|i| i as u32));
1080 let ys_batch = compute_f1_simd::<K>(xs, partial_ys);
1081
1082 ys.write_copy_of_slice(&ys_batch);
1083 }
1084
1085 let ys = unsafe {
1088 let ys_len = ys.len();
1089 let ys = Box::into_raw(ys);
1090 Vec::from_raw_parts(ys.cast(), ys_len, ys_len)
1091 };
1092
1093 let buckets = group_by_buckets::<K>(&ys);
1095
1096 Self::First { buckets }
1097 }
1098}
1099
1100#[cfg(feature = "alloc")]
1101mod private {
1102 pub(in super::super) trait SupportedOtherTables {}
1103 pub(in super::super) trait NotLastTable {}
1104}
1105
1106#[cfg(feature = "alloc")]
1107impl<const K: u8> private::SupportedOtherTables for Table<K, 2>
1108where
1109 EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized,
1110 [(); 1 << K]:,
1111 [(); num_buckets(K)]:,
1112 [(); num_buckets(K) - 1]:,
1113{
1114}
1115#[cfg(feature = "alloc")]
1116impl<const K: u8> private::SupportedOtherTables for Table<K, 3>
1117where
1118 EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized,
1119 [(); 1 << K]:,
1120 [(); num_buckets(K)]:,
1121 [(); num_buckets(K) - 1]:,
1122{
1123}
1124#[cfg(feature = "alloc")]
1125impl<const K: u8> private::SupportedOtherTables for Table<K, 4>
1126where
1127 EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized,
1128 [(); 1 << K]:,
1129 [(); num_buckets(K)]:,
1130 [(); num_buckets(K) - 1]:,
1131{
1132}
1133#[cfg(feature = "alloc")]
1134impl<const K: u8> private::SupportedOtherTables for Table<K, 5>
1135where
1136 EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized,
1137 [(); 1 << K]:,
1138 [(); num_buckets(K)]:,
1139 [(); num_buckets(K) - 1]:,
1140{
1141}
1142#[cfg(feature = "alloc")]
1143impl<const K: u8> private::SupportedOtherTables for Table<K, 6>
1144where
1145 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1146 [(); 1 << K]:,
1147 [(); num_buckets(K)]:,
1148 [(); num_buckets(K) - 1]:,
1149{
1150}
1151#[cfg(feature = "alloc")]
1152impl<const K: u8> private::SupportedOtherTables for Table<K, 7>
1153where
1154 EvaluatableUsize<{ metadata_size_bytes(K, 7) }>: Sized,
1155 [(); 1 << K]:,
1156 [(); num_buckets(K)]:,
1157 [(); num_buckets(K) - 1]:,
1158{
1159}
1160
1161#[cfg(feature = "alloc")]
1162impl<const K: u8> private::NotLastTable for Table<K, 1>
1163where
1164 EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
1165 [(); 1 << K]:,
1166 [(); num_buckets(K)]:,
1167 [(); num_buckets(K) - 1]:,
1168{
1169}
1170#[cfg(feature = "alloc")]
1171impl<const K: u8> private::NotLastTable for Table<K, 2>
1172where
1173 EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized,
1174 [(); 1 << K]:,
1175 [(); num_buckets(K)]:,
1176 [(); num_buckets(K) - 1]:,
1177{
1178}
1179#[cfg(feature = "alloc")]
1180impl<const K: u8> private::NotLastTable for Table<K, 3>
1181where
1182 EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized,
1183 [(); 1 << K]:,
1184 [(); num_buckets(K)]:,
1185 [(); num_buckets(K) - 1]:,
1186{
1187}
1188#[cfg(feature = "alloc")]
1189impl<const K: u8> private::NotLastTable for Table<K, 4>
1190where
1191 EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized,
1192 [(); 1 << K]:,
1193 [(); num_buckets(K)]:,
1194 [(); num_buckets(K) - 1]:,
1195{
1196}
1197#[cfg(feature = "alloc")]
1198impl<const K: u8> private::NotLastTable for Table<K, 5>
1199where
1200 EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized,
1201 [(); 1 << K]:,
1202 [(); num_buckets(K)]:,
1203 [(); num_buckets(K) - 1]:,
1204{
1205}
1206#[cfg(feature = "alloc")]
1207impl<const K: u8> private::NotLastTable for Table<K, 6>
1208where
1209 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1210 [(); 1 << K]:,
1211 [(); num_buckets(K)]:,
1212 [(); num_buckets(K) - 1]:,
1213{
1214}
1215
1216#[cfg(feature = "alloc")]
1217impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1218where
1219 Self: private::SupportedOtherTables,
1220 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1221 [(); 1 << K]:,
1222 [(); num_buckets(K)]:,
1223 [(); num_buckets(K) - 1]:,
1224{
1225 pub(super) fn create<const PARENT_TABLE_NUMBER: u8>(
1229 parent_table: Table<K, PARENT_TABLE_NUMBER>,
1230 cache: &TablesCache,
1231 ) -> (Self, PrunedTable<K, PARENT_TABLE_NUMBER>)
1232 where
1233 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
1234 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
1235 {
1236 let left_targets = &*cache.left_targets;
1237 let mut initialized_elements = 0_usize;
1238 let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1240 let mut positions =
1242 unsafe { Box::<[MaybeUninit<[Position; 2]>; 1 << K]>::new_uninit().assume_init() };
1243 let mut metadatas = unsafe {
1245 Box::<[MaybeUninit<Metadata<K, TABLE_NUMBER>>; 1 << K]>::new_uninit().assume_init()
1246 };
1247
1248 for ([left_bucket, right_bucket], left_bucket_index) in
1249 parent_table.buckets().array_windows().zip(0..)
1250 {
1251 let mut matches = [MaybeUninit::uninit(); _];
1252 let matches = unsafe {
1255 find_matches_in_buckets(
1256 left_bucket_index,
1257 left_bucket,
1258 right_bucket,
1259 &mut matches,
1260 left_targets,
1261 )
1262 };
1263 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1265 let (ys, positions, metadatas) = unsafe {
1267 (
1268 ys.split_at_mut_unchecked(initialized_elements).1,
1269 positions.split_at_mut_unchecked(initialized_elements).1,
1270 metadatas.split_at_mut_unchecked(initialized_elements).1,
1271 )
1272 };
1273
1274 let (ys, positions, metadatas) = unsafe {
1276 (
1277 ys.split_at_mut_unchecked(matches.len()).0,
1278 positions.split_at_mut_unchecked(matches.len()).0,
1279 metadatas.split_at_mut_unchecked(matches.len()).0,
1280 )
1281 };
1282
1283 unsafe {
1286 matches_to_results(&parent_table, matches, ys, positions, metadatas);
1287 }
1288
1289 initialized_elements += matches.len();
1290 }
1291
1292 let parent_table = parent_table.prune();
1293
1294 let ys = unsafe {
1297 let ys_len = ys.len();
1298 let ys = Box::into_raw(ys);
1299 Vec::from_raw_parts(ys.cast(), initialized_elements, ys_len)
1300 };
1301
1302 let buckets = group_by_buckets::<K>(&ys);
1304
1305 let table = Self::Other {
1306 positions,
1307 metadatas,
1308 buckets,
1309 };
1310
1311 (table, parent_table)
1312 }
1313
1314 #[cfg(feature = "parallel")]
1318 pub(super) fn create_parallel<const PARENT_TABLE_NUMBER: u8>(
1319 parent_table: Table<K, PARENT_TABLE_NUMBER>,
1320 cache: &TablesCache,
1321 ) -> (Self, PrunedTable<K, PARENT_TABLE_NUMBER>)
1322 where
1323 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
1324 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
1325 {
1326 let ys = unsafe {
1328 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1329 };
1330 let positions = unsafe {
1332 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1333 };
1334 let metadatas = unsafe {
1336 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1337 };
1338 let global_results_counts =
1339 array::from_fn::<_, { num_buckets(K) - 1 }, _>(|_| SyncUnsafeCell::new(0u16));
1340
1341 let left_targets = &*cache.left_targets;
1342
1343 let buckets = parent_table.buckets();
1344 let bucket_batch_size = CACHE_LINE_SIZE / size_of::<u16>();
1347 let bucket_batch_index = AtomicUsize::new(0);
1348
1349 rayon::broadcast(|_ctx| {
1350 loop {
1351 let bucket_batch_index = bucket_batch_index.fetch_add(1, Ordering::Relaxed);
1352
1353 let buckets_batch = buckets
1354 .array_windows()
1355 .enumerate()
1356 .skip(bucket_batch_index * bucket_batch_size)
1357 .take(bucket_batch_size);
1358
1359 if buckets_batch.is_empty() {
1360 break;
1361 }
1362
1363 for (left_bucket_index, [left_bucket, right_bucket]) in buckets_batch {
1364 let mut matches = [MaybeUninit::uninit(); _];
1365 let matches = unsafe {
1368 find_matches_in_buckets(
1369 left_bucket_index as u32,
1370 left_bucket,
1371 right_bucket,
1372 &mut matches,
1373 left_targets,
1374 )
1375 };
1376 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1378
1379 let ys = unsafe { &mut *ys.get_unchecked(left_bucket_index).get() };
1382 let positions =
1385 unsafe { &mut *positions.get_unchecked(left_bucket_index).get() };
1386 let metadatas =
1389 unsafe { &mut *metadatas.get_unchecked(left_bucket_index).get() };
1390 let count = unsafe {
1393 &mut *global_results_counts.get_unchecked(left_bucket_index).get()
1394 };
1395
1396 unsafe {
1399 matches_to_results::<_, TABLE_NUMBER, _>(
1400 &parent_table,
1401 matches,
1402 ys,
1403 positions,
1404 metadatas,
1405 )
1406 };
1407 *count = matches.len() as u16;
1408 }
1409 }
1410 });
1411
1412 let parent_table = parent_table.prune();
1413
1414 let ys = strip_sync_unsafe_cell(ys);
1415 let positions = strip_sync_unsafe_cell(positions);
1416 let metadatas = strip_sync_unsafe_cell(metadatas);
1417
1418 let buckets = unsafe {
1421 group_by_buckets_from_buckets::<K, _>(
1422 ys.iter().zip(
1423 global_results_counts
1424 .into_iter()
1425 .map(|count| usize::from(count.into_inner())),
1426 ),
1427 )
1428 };
1429
1430 let table = Self::OtherBuckets {
1431 positions,
1432 metadatas,
1433 buckets,
1434 };
1435
1436 (table, parent_table)
1437 }
1438
1439 #[inline(always)]
1446 pub(super) unsafe fn position(&self, position: Position) -> [Position; 2] {
1447 match self {
1448 Self::First { .. } => {
1449 unreachable!("Not the first table");
1450 }
1451 Self::Other { positions, .. } => {
1452 unsafe { positions.get_unchecked(usize::from(position)).assume_init() }
1454 }
1455 #[cfg(feature = "parallel")]
1456 Self::OtherBuckets { positions, .. } => {
1457 unsafe {
1459 positions
1460 .as_flattened()
1461 .get_unchecked(usize::from(position))
1462 .assume_init()
1463 }
1464 }
1465 }
1466 }
1467}
1468
1469#[cfg(feature = "alloc")]
1470impl<const K: u8> Table<K, 7>
1471where
1472 Self: private::SupportedOtherTables,
1473 EvaluatableUsize<{ metadata_size_bytes(K, 7) }>: Sized,
1474 [(); 1 << K]:,
1475 [(); num_buckets(K)]:,
1476 [(); num_buckets(K) - 1]:,
1477{
1478 pub(super) fn create_proof_targets(
1481 parent_table: Table<K, 6>,
1482 cache: &TablesCache,
1483 ) -> (
1484 Box<[[Position; 2]; Record::NUM_S_BUCKETS]>,
1485 PrunedTable<K, 6>,
1486 )
1487 where
1488 Table<K, 6>: private::NotLastTable,
1489 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1490 {
1491 let left_targets = &*cache.left_targets;
1492 let mut table_6_proof_targets =
1494 unsafe { Box::<[[Position; 2]; Record::NUM_S_BUCKETS]>::new_zeroed().assume_init() };
1495
1496 for ([left_bucket, right_bucket], left_bucket_index) in
1497 parent_table.buckets().array_windows().zip(0..)
1498 {
1499 let mut matches = [MaybeUninit::uninit(); _];
1500 let matches = unsafe {
1503 find_matches_in_buckets(
1504 left_bucket_index,
1505 left_bucket,
1506 right_bucket,
1507 &mut matches,
1508 left_targets,
1509 )
1510 };
1511 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1513
1514 let (grouped_matches, other_matches) = matches.as_chunks::<COMPUTE_FN_SIMD_FACTOR>();
1515
1516 for grouped_matches in grouped_matches {
1517 let (ys_group, positions_group, _) =
1519 unsafe { match_to_result_simd::<_, 7, _>(&parent_table, grouped_matches) };
1520
1521 let s_buckets = ys_group >> Simd::splat(u32::from(PARAM_EXT));
1522
1523 for (s_bucket, p) in s_buckets.to_array().into_iter().zip(positions_group) {
1524 const {
1525 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1526 }
1527 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1528 continue;
1529 };
1530 let positions = &mut table_6_proof_targets[usize::from(s_bucket)];
1531 if positions == &[Position::ZERO; 2] {
1532 *positions = p;
1533 }
1534 }
1535 }
1536 for other_match in other_matches {
1537 let (y, p, _) = unsafe { match_to_result::<_, 7, _>(&parent_table, other_match) };
1539
1540 let s_bucket = y.first_k_bits();
1541
1542 const {
1543 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1544 }
1545 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1546 continue;
1547 };
1548
1549 let positions = &mut table_6_proof_targets[usize::from(s_bucket)];
1550 if positions == &[Position::ZERO; 2] {
1551 *positions = p;
1552 }
1553 }
1554 }
1555
1556 let parent_table = parent_table.prune();
1557
1558 (table_6_proof_targets, parent_table)
1559 }
1560
1561 #[cfg(feature = "parallel")]
1565 pub(super) fn create_proof_targets_parallel(
1566 parent_table: Table<K, 6>,
1567 cache: &TablesCache,
1568 ) -> (
1569 Box<[[Position; 2]; Record::NUM_S_BUCKETS]>,
1570 PrunedTable<K, 6>,
1571 )
1572 where
1573 Table<K, 6>: private::NotLastTable,
1574 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1575 {
1576 let buckets_positions = unsafe {
1578 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1579 };
1580 let global_results_counts =
1581 array::from_fn::<_, { num_buckets(K) - 1 }, _>(|_| SyncUnsafeCell::new(0u16));
1582
1583 let left_targets = &*cache.left_targets;
1584
1585 let buckets = parent_table.buckets();
1586 let bucket_batch_size = CACHE_LINE_SIZE / size_of::<u16>();
1589 let bucket_batch_index = AtomicUsize::new(0);
1590
1591 rayon::broadcast(|_ctx| {
1592 loop {
1593 let bucket_batch_index = bucket_batch_index.fetch_add(1, Ordering::Relaxed);
1594
1595 let buckets_batch = buckets
1596 .array_windows()
1597 .enumerate()
1598 .skip(bucket_batch_index * bucket_batch_size)
1599 .take(bucket_batch_size);
1600
1601 if buckets_batch.is_empty() {
1602 break;
1603 }
1604
1605 for (left_bucket_index, [left_bucket, right_bucket]) in buckets_batch {
1606 let mut matches = [MaybeUninit::uninit(); _];
1607 let matches = unsafe {
1610 find_matches_in_buckets(
1611 left_bucket_index as u32,
1612 left_bucket,
1613 right_bucket,
1614 &mut matches,
1615 left_targets,
1616 )
1617 };
1618 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1620
1621 let buckets_positions =
1624 unsafe { &mut *buckets_positions.get_unchecked(left_bucket_index).get() };
1625 let count = unsafe {
1628 &mut *global_results_counts.get_unchecked(left_bucket_index).get()
1629 };
1630
1631 let (grouped_matches, other_matches) =
1632 matches.as_chunks::<COMPUTE_FN_SIMD_FACTOR>();
1633
1634 let mut reduced_count = 0_usize;
1635 for grouped_matches in grouped_matches {
1636 let (ys_group, positions_group, _) = unsafe {
1638 match_to_result_simd::<_, 7, _>(&parent_table, grouped_matches)
1639 };
1640
1641 let s_buckets = ys_group >> Simd::splat(u32::from(PARAM_EXT));
1642 let s_buckets = s_buckets.to_array();
1643
1644 for (s_bucket, p) in s_buckets.into_iter().zip(positions_group) {
1645 const {
1646 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1647 }
1648 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1649 continue;
1650 };
1651
1652 buckets_positions[reduced_count].write((s_bucket, p));
1653 reduced_count += 1;
1654 }
1655 }
1656 for other_match in other_matches {
1657 let (y, p, _) =
1659 unsafe { match_to_result::<_, 7, _>(&parent_table, other_match) };
1660
1661 let s_bucket = y.first_k_bits();
1662
1663 const {
1664 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1665 }
1666 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1667 continue;
1668 };
1669
1670 buckets_positions[reduced_count].write((s_bucket, p));
1671 reduced_count += 1;
1672 }
1673
1674 *count = reduced_count as u16;
1675 }
1676 }
1677 });
1678
1679 let parent_table = parent_table.prune();
1680
1681 let buckets_positions = strip_sync_unsafe_cell(buckets_positions);
1682
1683 let mut table_6_proof_targets =
1685 unsafe { Box::<[[Position; 2]; Record::NUM_S_BUCKETS]>::new_zeroed().assume_init() };
1686
1687 for (bucket, results_count) in buckets_positions.iter().zip(
1688 global_results_counts
1689 .into_iter()
1690 .map(|count| usize::from(count.into_inner())),
1691 ) {
1692 for &(s_bucket, p) in unsafe { bucket[..results_count].assume_init_ref() } {
1694 let positions = &mut table_6_proof_targets[usize::from(s_bucket)];
1695 if positions == &[Position::ZERO; 2] {
1696 *positions = p;
1697 }
1698 }
1699 }
1700
1701 (table_6_proof_targets, parent_table)
1702 }
1703}
1704
1705#[cfg(feature = "alloc")]
1706impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1707where
1708 Self: private::NotLastTable,
1709 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1710 [(); 1 << K]:,
1711 [(); num_buckets(K)]:,
1712 [(); num_buckets(K) - 1]:,
1713{
1714 #[inline(always)]
1719 unsafe fn metadata(&self, position: Position) -> Metadata<K, TABLE_NUMBER> {
1720 match self {
1721 Self::First { .. } => {
1722 Metadata::from(X::from(u32::from(position)))
1724 }
1725 Self::Other { metadatas, .. } => {
1726 unsafe { metadatas.get_unchecked(usize::from(position)).assume_init() }
1728 }
1729 #[cfg(feature = "parallel")]
1730 Self::OtherBuckets { metadatas, .. } => {
1731 unsafe {
1733 metadatas
1734 .as_flattened()
1735 .get_unchecked(usize::from(position))
1736 .assume_init()
1737 }
1738 }
1739 }
1740 }
1741}
1742
1743#[cfg(feature = "alloc")]
1744impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1745where
1746 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1747 [(); 1 << K]:,
1748 [(); num_buckets(K)]:,
1749 [(); num_buckets(K) - 1]:,
1750{
1751 #[inline(always)]
1752 fn prune(self) -> PrunedTable<K, TABLE_NUMBER> {
1753 match self {
1754 Self::First { .. } => PrunedTable::First,
1755 Self::Other { positions, .. } => PrunedTable::Other { positions },
1756 #[cfg(feature = "parallel")]
1757 Self::OtherBuckets { positions, .. } => PrunedTable::OtherBuckets { positions },
1758 }
1759 }
1760
1761 #[inline(always)]
1763 pub(super) fn buckets(&self) -> &[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)] {
1764 match self {
1765 Self::First { buckets, .. } => buckets,
1766 Self::Other { buckets, .. } => buckets,
1767 #[cfg(feature = "parallel")]
1768 Self::OtherBuckets { buckets, .. } => buckets,
1769 }
1770 }
1771}