1#[cfg(any(feature = "alloc", test))]
2mod rmap;
3#[cfg(test)]
4mod tests;
5pub(super) mod types;
6
7use crate::chiapos::Seed;
8use crate::chiapos::constants::{PARAM_B, PARAM_BC, PARAM_C, PARAM_EXT, PARAM_M};
9#[cfg(feature = "alloc")]
10use crate::chiapos::table::rmap::Rmap;
11use crate::chiapos::table::types::{Metadata, X, Y};
12#[cfg(feature = "alloc")]
13use crate::chiapos::table::types::{Position, R};
14use crate::chiapos::utils::EvaluatableUsize;
15use ab_chacha8::{ChaCha8Block, ChaCha8State};
16#[cfg(feature = "alloc")]
17use ab_core_primitives::pieces::Record;
18#[cfg(feature = "alloc")]
19use alloc::boxed::Box;
20#[cfg(feature = "alloc")]
21use alloc::vec;
22#[cfg(feature = "alloc")]
23use alloc::vec::Vec;
24#[cfg(feature = "alloc")]
25use chacha20::cipher::{Iv, KeyIvInit, StreamCipher};
26#[cfg(feature = "alloc")]
27use chacha20::{ChaCha8, Key};
28use core::array;
29#[cfg(feature = "parallel")]
30use core::cell::SyncUnsafeCell;
31#[cfg(feature = "alloc")]
32use core::mem;
33#[cfg(feature = "alloc")]
34use core::mem::MaybeUninit;
35#[cfg(any(feature = "alloc", test))]
36use core::simd::prelude::*;
37#[cfg(feature = "parallel")]
38use core::sync::atomic::{AtomicUsize, Ordering};
39#[cfg(feature = "alloc")]
40use derive_more::Deref;
41#[cfg(feature = "alloc")]
42use rclite::Arc;
43#[cfg(any(feature = "alloc", test))]
44use seq_macro::seq;
45
46pub(super) const COMPUTE_F1_SIMD_FACTOR: usize = 8;
47#[cfg(any(feature = "alloc", test))]
48const COMPUTE_FN_SIMD_FACTOR: usize = 16;
49const MAX_BUCKET_SIZE: usize = 512;
50#[cfg(any(feature = "alloc", test))]
51const BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS: u8 = 128;
52const REDUCED_BUCKET_SIZE: usize = 272;
57const REDUCED_MATCHES_COUNT: usize = 288;
62#[cfg(feature = "parallel")]
63const CACHE_LINE_SIZE: usize = 64;
64
65const _: () = {
66 debug_assert!(REDUCED_BUCKET_SIZE <= MAX_BUCKET_SIZE);
67 debug_assert!(REDUCED_MATCHES_COUNT <= MAX_BUCKET_SIZE);
68};
69
70pub(super) const fn y_size_bits(k: u8) -> usize {
72 k as usize + PARAM_EXT as usize
73}
74
75pub const fn metadata_size_bytes(k: u8, table_number: u8) -> usize {
77 metadata_size_bits(k, table_number).div_ceil(u8::BITS as usize)
78}
79
80pub(super) const fn metadata_size_bits(k: u8, table_number: u8) -> usize {
82 k as usize
83 * match table_number {
84 1 => 1,
85 2 => 2,
86 3 | 4 => 4,
87 5 => 3,
88 6 => 2,
89 7 => 0,
90 _ => unreachable!(),
91 }
92}
93
94pub const fn num_buckets(k: u8) -> usize {
96 2_usize
97 .pow(y_size_bits(k) as u32)
98 .div_ceil(usize::from(PARAM_BC))
99}
100
101#[cfg(feature = "parallel")]
102#[inline(always)]
103fn strip_sync_unsafe_cell<const N: usize, T>(value: Box<[SyncUnsafeCell<T>; N]>) -> Box<[T; N]> {
104 unsafe { Box::from_raw(Box::into_raw(value).cast()) }
106}
107
108#[cfg(feature = "alloc")]
111fn partial_ys<const K: u8>(seed: Seed) -> Vec<u8> {
112 let output_len_bits = usize::from(K) * (1 << K);
113 let mut output = vec![0; output_len_bits.div_ceil(u8::BITS as usize)];
114
115 let key = Key::from(seed);
116 let iv = Iv::<ChaCha8>::default();
117
118 let mut cipher = ChaCha8::new(&key, &iv);
119
120 cipher.write_keystream(&mut output);
121
122 output
123}
124
125#[cfg(feature = "alloc")]
133const fn bucket_size_upper_bound(k: u8, security_bits: u8) -> usize {
134 const LAMBDA: u64 = PARAM_BC as u64 / 2u64.pow(PARAM_EXT as u32);
137 const LN2_NUM: u128 = 693147;
140 const LN2_DEN: u128 = 1000000;
141
142 let ks = k as u128 + security_bits as u128;
144 let num = 3u128 * LAMBDA as u128 * ks * LN2_NUM;
147 let den = LN2_DEN;
149
150 let ceil_div = num.div_ceil(den);
151
152 let mut low = 0u64;
157 let mut high = u64::MAX;
158 while low < high {
159 let mid = low + (high - low) / 2;
160 let left = (mid as u128) * (mid as u128);
161 if left >= ceil_div {
162 high = mid;
163 } else {
164 low = mid + 1;
165 }
166 }
167 let add_term = low;
168
169 (LAMBDA + add_term) as usize
170}
171
172#[cfg(feature = "alloc")]
173fn group_by_buckets<const K: u8>(
174 ys: &[Y],
175) -> Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>
176where
177 [(); num_buckets(K)]:,
178{
179 let mut bucket_offsets = [0_u16; num_buckets(K)];
180 let mut buckets = unsafe {
182 Box::<[[MaybeUninit<(Position, Y)>; REDUCED_BUCKET_SIZE]; num_buckets(K)]>::new_uninit()
183 .assume_init()
184 };
185
186 for (&y, position) in ys.iter().zip(Position::ZERO..) {
187 let bucket_index = (u32::from(y) / u32::from(PARAM_BC)) as usize;
188
189 let bucket_offset = unsafe { bucket_offsets.get_unchecked_mut(bucket_index) };
191 let bucket = unsafe { buckets.get_unchecked_mut(bucket_index) };
193
194 if *bucket_offset < REDUCED_BUCKET_SIZE as u16 {
195 bucket[*bucket_offset as usize].write((position, y));
196 *bucket_offset += 1;
197 }
198 }
199
200 for (bucket, initialized) in buckets.iter_mut().zip(bucket_offsets) {
201 bucket[usize::from(initialized)..].write_filled((Position::SENTINEL, Y::SENTINEL));
202 }
203
204 unsafe { Box::from_raw(Box::into_raw(buckets).cast()) }
206}
207
208#[cfg(feature = "parallel")]
214unsafe fn group_by_buckets_from_buckets<'a, const K: u8, I>(
215 iter: I,
216) -> Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>
217where
218 I: Iterator<Item = (&'a [MaybeUninit<Y>; REDUCED_MATCHES_COUNT], usize)> + 'a,
219 [(); num_buckets(K)]:,
220{
221 let mut bucket_offsets = [0_u16; num_buckets(K)];
222 let mut buckets = unsafe {
224 Box::<[[MaybeUninit<(Position, Y)>; REDUCED_BUCKET_SIZE]; num_buckets(K)]>::new_uninit()
225 .assume_init()
226 };
227
228 for ((ys, count), batch_start) in iter.zip((Position::ZERO..).step_by(REDUCED_MATCHES_COUNT)) {
229 let ys = unsafe { ys[..count].assume_init_ref() };
231 for (&y, position) in ys.iter().zip(batch_start..) {
232 let bucket_index = (u32::from(y) / u32::from(PARAM_BC)) as usize;
233
234 let bucket_offset = unsafe { bucket_offsets.get_unchecked_mut(bucket_index) };
236 let bucket = unsafe { buckets.get_unchecked_mut(bucket_index) };
238
239 if *bucket_offset < REDUCED_BUCKET_SIZE as u16 {
240 bucket[*bucket_offset as usize].write((position, y));
241 *bucket_offset += 1;
242 }
243 }
244 }
245
246 for (bucket, initialized) in buckets.iter_mut().zip(bucket_offsets) {
247 bucket[usize::from(initialized)..].write_filled((Position::SENTINEL, Y::SENTINEL));
248 }
249
250 unsafe { Box::from_raw(Box::into_raw(buckets).cast()) }
252}
253
254#[cfg(feature = "alloc")]
255#[derive(Debug, Copy, Clone, Deref)]
256#[repr(align(64))]
257struct CacheLineAligned<T>(T);
258
259#[cfg(feature = "alloc")]
261type LeftTargets = [[CacheLineAligned<[R; PARAM_M as usize]>; PARAM_BC as usize]; 2];
262
263#[cfg(feature = "alloc")]
264fn calculate_left_targets() -> Arc<LeftTargets> {
265 let mut left_targets = Arc::<LeftTargets>::new_uninit();
266 let left_targets_slice = unsafe {
268 mem::transmute::<
269 &mut MaybeUninit<[[CacheLineAligned<[R; PARAM_M as usize]>; PARAM_BC as usize]; 2]>,
270 &mut [[MaybeUninit<CacheLineAligned<[R; PARAM_M as usize]>>; PARAM_BC as usize]; 2],
271 >(Arc::get_mut_unchecked(&mut left_targets))
272 };
273
274 for parity in 0..=1 {
275 for r in 0..PARAM_BC {
276 let c = r / PARAM_C;
277
278 let arr = array::from_fn(|m| {
279 let m = m as u16;
280 R::from(
281 ((c + m) % PARAM_B) * PARAM_C
282 + (((2 * m + parity) * (2 * m + parity) + r) % PARAM_C),
283 )
284 });
285 left_targets_slice[parity as usize][r as usize].write(CacheLineAligned(arr));
286 }
287 }
288
289 unsafe { Arc::assume_init(left_targets) }
291}
292
293fn calculate_left_target_on_demand(parity: u32, r: u32, m: u32) -> u32 {
294 let param_b = u32::from(PARAM_B);
295 let param_c = u32::from(PARAM_C);
296
297 ((r / param_c + m) % param_b) * param_c + (((2 * m + parity) * (2 * m + parity) + r) % param_c)
298}
299
300#[cfg(feature = "alloc")]
302#[derive(Debug, Clone)]
303pub struct TablesCache {
304 left_targets: Arc<LeftTargets>,
305}
306
307#[cfg(feature = "alloc")]
308impl Default for TablesCache {
309 fn default() -> Self {
311 Self {
312 left_targets: calculate_left_targets(),
313 }
314 }
315}
316
317#[cfg(feature = "alloc")]
318#[derive(Debug, Copy, Clone)]
319struct Match {
320 left_position: Position,
321 left_y: Y,
322 right_position: Position,
323}
324
325pub(super) fn compute_f1<const K: u8>(x: X, seed: &Seed) -> Y {
327 let skip_bits = u32::from(K) * u32::from(x);
328 let skip_u32s = skip_bits / u32::BITS;
329 let partial_y_offset = skip_bits % u32::BITS;
330
331 const U32S_PER_BLOCK: usize = size_of::<ChaCha8Block>() / size_of::<u32>();
332
333 let initial_state = ChaCha8State::init(seed, &[0; _]);
334 let first_block_counter = skip_u32s / U32S_PER_BLOCK as u32;
335 let u32_in_first_block = skip_u32s as usize % U32S_PER_BLOCK;
336
337 let first_block = initial_state.compute_block(first_block_counter);
338 let hi = first_block[u32_in_first_block].to_be();
339
340 let lo = if u32_in_first_block + 1 == U32S_PER_BLOCK {
342 let second_block = initial_state.compute_block(first_block_counter + 1);
344 second_block[0].to_be()
345 } else {
346 first_block[u32_in_first_block + 1].to_be()
347 };
348
349 let partial_y = (u64::from(hi) << u32::BITS) | u64::from(lo);
350
351 let pre_y = partial_y >> (u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset);
352 let pre_y = pre_y as u32;
353 let pre_y_mask = (u32::MAX << PARAM_EXT) & (u32::MAX >> (u32::BITS - u32::from(K + PARAM_EXT)));
355
356 let pre_ext = u32::from(x) >> (K - PARAM_EXT);
359
360 Y::from((pre_y & pre_y_mask) | pre_ext)
363}
364
365#[cfg(any(feature = "alloc", test))]
366pub(super) fn compute_f1_simd<const K: u8>(
367 xs: Simd<u32, COMPUTE_F1_SIMD_FACTOR>,
368 partial_ys: &[u8; usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize],
369) -> [Y; COMPUTE_F1_SIMD_FACTOR] {
370 let pre_ys_bytes = array::from_fn(|i| {
373 let partial_y_offset = i * usize::from(K);
374 let partial_y_length =
375 (partial_y_offset % u8::BITS as usize + usize::from(K)).div_ceil(u8::BITS as usize);
376 let mut pre_y_bytes = 0u64.to_be_bytes();
377 pre_y_bytes[..partial_y_length].copy_from_slice(
378 &partial_ys[partial_y_offset / u8::BITS as usize..][..partial_y_length],
379 );
380
381 u64::from_be_bytes(pre_y_bytes)
382 });
383 let pre_ys_right_offset = array::from_fn(|i| {
384 let partial_y_offset = i as u32 * u32::from(K);
385 u64::from(u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset % u8::BITS)
386 });
387 let pre_ys = Simd::from_array(pre_ys_bytes) >> Simd::from_array(pre_ys_right_offset);
388
389 let pre_ys_mask = Simd::splat(
391 (u32::MAX << usize::from(PARAM_EXT))
392 & (u32::MAX >> (u32::BITS as usize - usize::from(K + PARAM_EXT))),
393 );
394
395 let pre_exts = xs >> Simd::splat(u32::from(K - PARAM_EXT));
398
399 let ys = (pre_ys.cast() & pre_ys_mask) | pre_exts;
402
403 Y::array_from_repr(ys.to_array())
404}
405
406#[cfg(feature = "alloc")]
413unsafe fn find_matches_in_buckets<'a>(
414 left_bucket_index: u32,
415 left_bucket: &[(Position, Y); REDUCED_BUCKET_SIZE],
416 right_bucket: &[(Position, Y); REDUCED_BUCKET_SIZE],
417 matches: &'a mut [MaybeUninit<Match>; REDUCED_MATCHES_COUNT + PARAM_M as usize * 2],
420 left_targets: &LeftTargets,
421) -> &'a [Match] {
422 let left_base = left_bucket_index * u32::from(PARAM_BC);
423 let right_base = left_base + u32::from(PARAM_BC);
424
425 let mut rmap = Rmap::new();
426 for &(right_position, y) in right_bucket {
427 if right_position == Position::SENTINEL {
428 break;
429 }
430 let r = R::from((u32::from(y) - right_base) as u16);
431 unsafe {
434 rmap.add(r, right_position);
435 }
436 }
437
438 let parity = left_base % 2;
439 let left_targets_parity = &left_targets[parity as usize];
440 let mut next_match_index = 0;
441
442 for &(left_position, y) in left_bucket {
445 if left_position == Position::SENTINEL || next_match_index >= REDUCED_MATCHES_COUNT {
447 break;
449 }
450
451 let r = R::from((u32::from(y) - left_base) as u16);
452 let left_targets_r = unsafe { left_targets_parity.get_unchecked(usize::from(r)) };
454
455 for &r_target in left_targets_r.iter() {
456 let [right_position_a, right_position_b] = unsafe { rmap.get(r_target) };
458
459 if right_position_a != Position::ZERO {
461 unsafe { matches.get_unchecked_mut(next_match_index) }.write(Match {
464 left_position,
465 left_y: y,
466 right_position: right_position_a,
467 });
468 next_match_index += 1;
469
470 if right_position_b != Position::ZERO {
471 unsafe { matches.get_unchecked_mut(next_match_index) }.write(Match {
474 left_position,
475 left_y: y,
476 right_position: right_position_b,
477 });
478 next_match_index += 1;
479 }
480 }
481 }
482 }
483
484 unsafe { matches[..next_match_index].assume_init_ref() }
486}
487
488pub(super) fn has_match(left_y: Y, right_y: Y) -> bool {
490 let right_r = u32::from(right_y) % u32::from(PARAM_BC);
491 let parity = (u32::from(left_y) / u32::from(PARAM_BC)) % 2;
492 let left_r = u32::from(left_y) % u32::from(PARAM_BC);
493
494 let r_targets = array::from_fn::<_, { PARAM_M as usize }, _>(|i| {
495 calculate_left_target_on_demand(parity, left_r, i as u32)
496 });
497
498 r_targets.contains(&right_r)
499}
500
501#[inline(always)]
502pub(super) fn compute_fn<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
503 y: Y,
504 left_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
505 right_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
506) -> (Y, Metadata<K, TABLE_NUMBER>)
507where
508 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
509 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
510{
511 let left_metadata = u128::from(left_metadata);
512 let right_metadata = u128::from(right_metadata);
513
514 let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
515
516 let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
518 let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
519
520 let num_bytes_with_data =
522 (y_size_bits(K) + parent_metadata_bits * 2).div_ceil(u8::BITS as usize);
523
524 let hash = {
527 let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
529
530 let left_metadata_bits =
532 left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
533
534 if right_bits_start_offset < y_and_left_bits {
537 let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
538 let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
541 let input_a = y_bits | left_metadata_bits | right_bits_a;
542 let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
544
545 let input = [input_a.to_be_bytes(), input_b.to_be_bytes()];
546 let input_len =
547 size_of::<u128>() + right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize);
548 ab_blake3::single_block_hash(&input.as_flattened()[..input_len])
549 .expect("Exactly a single block worth of bytes; qed")
550 } else {
551 let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
552 let input_a = y_bits | left_metadata_bits | right_bits_a;
553
554 ab_blake3::single_block_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
555 .expect("Less than a single block worth of bytes; qed")
556 }
557 };
558
559 let y_output = Y::from(
560 u32::from_be_bytes([hash[0], hash[1], hash[2], hash[3]])
561 >> (u32::BITS as usize - y_size_bits(K)),
562 );
563
564 let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
565
566 let metadata = if TABLE_NUMBER < 4 {
567 (left_metadata << parent_metadata_bits) | right_metadata
568 } else if metadata_size_bits > 0 {
569 let metadata = u128::from_be_bytes(
573 hash[y_size_bits(K) / u8::BITS as usize..][..size_of::<u128>()]
574 .try_into()
575 .expect("Always enough bits for any K; qed"),
576 );
577 let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
579 metadata >> (u128::BITS as usize - metadata_size_bits)
581 } else {
582 0
583 };
584
585 (y_output, Metadata::from(metadata))
586}
587
588#[cfg(any(feature = "alloc", test))]
592fn compute_fn_simd<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
593 left_ys: [Y; COMPUTE_FN_SIMD_FACTOR],
594 left_metadatas: [Metadata<K, PARENT_TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
595 right_metadatas: [Metadata<K, PARENT_TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
596) -> (
597 Simd<u32, COMPUTE_FN_SIMD_FACTOR>,
598 [Metadata<K, TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
599)
600where
601 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
602 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
603{
604 let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
605 let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
606
607 let left_metadatas: [u128; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
610 [
611 #(
612 u128::from(left_metadatas[N]),
613 )*
614 ]
615 });
616 let right_metadatas: [u128; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
617 [
618 #(
619 u128::from(right_metadatas[N]),
620 )*
621 ]
622 });
623
624 let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
626 let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
627
628 let num_bytes_with_data =
630 (y_size_bits(K) + parent_metadata_bits * 2).div_ceil(u8::BITS as usize);
631
632 let hashes: [_; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
637 [
638 #(
639 {
640 let y = left_ys[N];
641 let left_metadata = left_metadatas[N];
642 let right_metadata = right_metadatas[N];
643
644 let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
647
648 let left_metadata_bits =
650 left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
651
652 if right_bits_start_offset < y_and_left_bits {
655 let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
656 let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
659 let input_a = y_bits | left_metadata_bits | right_bits_a;
660 let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
662
663 let input = [input_a.to_be_bytes(), input_b.to_be_bytes()];
664 let input_len =
665 size_of::<u128>() + right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize);
666 ab_blake3::single_block_hash(&input.as_flattened()[..input_len])
667 .expect("Exactly a single block worth of bytes; qed")
668 } else {
669 let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
670 let input_a = y_bits | left_metadata_bits | right_bits_a;
671
672 ab_blake3::single_block_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
673 .expect("Exactly a single block worth of bytes; qed")
674 }
675 },
676 )*
677 ]
678 });
679
680 let y_outputs = Simd::from_array(
681 hashes.map(|hash| u32::from_be_bytes([hash[0], hash[1], hash[2], hash[3]])),
682 ) >> (u32::BITS - y_size_bits(K) as u32);
683
684 let metadatas = if TABLE_NUMBER < 4 {
685 seq!(N in 0..16 {
686 [
687 #(
688 Metadata::from((left_metadatas[N] << parent_metadata_bits) | right_metadatas[N]),
689 )*
690 ]
691 })
692 } else if metadata_size_bits > 0 {
693 seq!(N in 0..16 {
697 [
698 #(
699 {
700 let metadata = u128::from_be_bytes(
701 hashes[N][y_size_bits(K) / u8::BITS as usize..][..size_of::<u128>()]
702 .try_into()
703 .expect("Always enough bits for any K; qed"),
704 );
705 let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
707 Metadata::from(metadata >> (u128::BITS as usize - metadata_size_bits))
709 },
710 )*
711 ]
712 })
713 } else {
714 [Metadata::default(); _]
715 };
716
717 (y_outputs, metadatas)
718}
719
720#[cfg(feature = "alloc")]
723#[inline(always)]
724unsafe fn match_to_result<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
725 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
726 m: &Match,
727) -> (Y, [Position; 2], Metadata<K, TABLE_NUMBER>)
728where
729 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
730 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
731 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
732 [(); 1 << K]:,
733 [(); num_buckets(K)]:,
734 [(); num_buckets(K) - 1]:,
735{
736 let left_metadata = unsafe { parent_table.metadata(m.left_position) };
738 let right_metadata = unsafe { parent_table.metadata(m.right_position) };
740
741 let (y, metadata) =
742 compute_fn::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(m.left_y, left_metadata, right_metadata);
743
744 (y, [m.left_position, m.right_position], metadata)
745}
746
747#[cfg(feature = "alloc")]
750#[inline(always)]
751unsafe fn match_to_result_simd<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
752 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
753 matches: &[Match; COMPUTE_FN_SIMD_FACTOR],
754) -> (
755 Simd<u32, COMPUTE_FN_SIMD_FACTOR>,
756 [[Position; 2]; COMPUTE_FN_SIMD_FACTOR],
757 [Metadata<K, TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
758)
759where
760 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
761 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
762 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
763 [(); 1 << K]:,
764 [(); num_buckets(K)]:,
765 [(); num_buckets(K) - 1]:,
766{
767 let left_ys: [_; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
768 [
769 #(
770 matches[N].left_y,
771 )*
772 ]
773 });
774 let left_metadatas: [_; COMPUTE_FN_SIMD_FACTOR] = unsafe {
776 seq!(N in 0..16 {
777 [
778 #(
779 parent_table.metadata(matches[N].left_position),
780 )*
781 ]
782 })
783 };
784 let right_metadatas: [_; COMPUTE_FN_SIMD_FACTOR] = unsafe {
786 seq!(N in 0..16 {
787 [
788 #(
789 parent_table.metadata(matches[N].right_position),
790 )*
791 ]
792 })
793 };
794
795 let (y_outputs, metadatas) = compute_fn_simd::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(
796 left_ys,
797 left_metadatas,
798 right_metadatas,
799 );
800
801 let positions = seq!(N in 0..16 {
802 [
803 #(
804 [
805 matches[N].left_position,
806 matches[N].right_position,
807 ],
808 )*
809 ]
810 });
811
812 (y_outputs, positions, metadatas)
813}
814
815#[cfg(feature = "alloc")]
819#[inline(always)]
820unsafe fn matches_to_results<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
821 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
822 matches: &[Match],
823 ys: &mut [MaybeUninit<Y>],
824 positions: &mut [MaybeUninit<[Position; 2]>],
825 metadatas: &mut [MaybeUninit<Metadata<K, TABLE_NUMBER>>],
826) where
827 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
828 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
829 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
830 [(); 1 << K]:,
831 [(); num_buckets(K)]:,
832 [(); num_buckets(K) - 1]:,
833{
834 let (grouped_matches, other_matches) = matches.as_chunks::<COMPUTE_FN_SIMD_FACTOR>();
835 let (grouped_ys, other_ys) = ys.split_at_mut(grouped_matches.as_flattened().len());
836 let grouped_ys = grouped_ys.as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>().0;
837 let (grouped_positions, other_positions) =
838 positions.split_at_mut(grouped_matches.as_flattened().len());
839 let grouped_positions = grouped_positions
840 .as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>()
841 .0;
842 let (grouped_metadatas, other_metadatas) =
843 metadatas.split_at_mut(grouped_matches.as_flattened().len());
844 let grouped_metadatas = grouped_metadatas
845 .as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>()
846 .0;
847
848 for (((grouped_matches, grouped_ys), grouped_positions), grouped_metadatas) in grouped_matches
849 .iter()
850 .zip(grouped_ys)
851 .zip(grouped_positions)
852 .zip(grouped_metadatas)
853 {
854 let (ys_group, positions_group, metadatas_group) =
856 unsafe { match_to_result_simd(parent_table, grouped_matches) };
857 let ys_group = Y::array_from_repr(ys_group.to_array());
858 grouped_ys.write_copy_of_slice(&ys_group);
859 grouped_positions.write_copy_of_slice(&positions_group);
860
861 if metadata_size_bits(K, TABLE_NUMBER) > 0 {
863 grouped_metadatas.write_copy_of_slice(&metadatas_group);
864 }
865 }
866 for (((other_match, other_y), other_positions), other_metadata) in other_matches
867 .iter()
868 .zip(other_ys)
869 .zip(other_positions)
870 .zip(other_metadatas)
871 {
872 let (y, p, metadata) = unsafe { match_to_result(parent_table, other_match) };
874 other_y.write(y);
875 other_positions.write(p);
876 if metadata_size_bits(K, TABLE_NUMBER) > 0 {
878 other_metadata.write(metadata);
879 }
880 }
881}
882
883#[cfg(feature = "alloc")]
885#[derive(Debug)]
886pub(super) enum PrunedTable<const K: u8, const TABLE_NUMBER: u8>
887where
888 [(); 1 << K]:,
889 [(); num_buckets(K) - 1]:,
890{
891 First,
892 Other {
894 positions: Box<[MaybeUninit<[Position; 2]>; 1 << K]>,
896 },
897 #[cfg(feature = "parallel")]
899 OtherBuckets {
900 positions: Box<[[MaybeUninit<[Position; 2]>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1]>,
904 },
905}
906
907#[cfg(feature = "alloc")]
908impl<const K: u8, const TABLE_NUMBER: u8> PrunedTable<K, TABLE_NUMBER>
909where
910 [(); 1 << K]:,
911 [(); num_buckets(K) - 1]:,
912{
913 #[inline(always)]
920 pub(super) unsafe fn position(&self, position: Position) -> [Position; 2] {
921 match self {
922 Self::First => {
923 unreachable!("Not the first table");
924 }
925 Self::Other { positions, .. } => {
926 unsafe { positions.get_unchecked(usize::from(position)).assume_init() }
928 }
929 #[cfg(feature = "parallel")]
930 Self::OtherBuckets { positions, .. } => {
931 unsafe {
933 positions
934 .as_flattened()
935 .get_unchecked(usize::from(position))
936 .assume_init()
937 }
938 }
939 }
940 }
941}
942
943#[cfg(feature = "alloc")]
944#[derive(Debug)]
945pub(super) enum Table<const K: u8, const TABLE_NUMBER: u8>
946where
947 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
948 [(); 1 << K]:,
949 [(); num_buckets(K)]:,
950 [(); num_buckets(K) - 1]:,
951{
952 First {
954 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
958 },
959 Other {
961 positions: Box<[MaybeUninit<[Position; 2]>; 1 << K]>,
963 metadatas: Box<[MaybeUninit<Metadata<K, TABLE_NUMBER>>; 1 << K]>,
965 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
969 },
970 #[cfg(feature = "parallel")]
972 OtherBuckets {
973 positions: Box<[[MaybeUninit<[Position; 2]>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1]>,
977 metadatas: Box<
981 [[MaybeUninit<Metadata<K, TABLE_NUMBER>>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1],
982 >,
983 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
987 },
988}
989
990#[cfg(feature = "alloc")]
991impl<const K: u8> Table<K, 1>
992where
993 EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
994 [(); 1 << K]:,
995 [(); num_buckets(K)]:,
996 [(); num_buckets(K) - 1]:,
997{
998 pub(super) fn create(seed: Seed) -> Self
1000 where
1001 EvaluatableUsize<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
1002 {
1003 debug_assert!(
1006 MAX_BUCKET_SIZE >= bucket_size_upper_bound(K, BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS),
1007 "Max bucket size is not sufficiently large"
1008 );
1009
1010 let partial_ys = partial_ys::<K>(seed);
1011
1012 let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1014
1015 for ((ys, xs_batch_start), partial_ys) in ys
1016 .as_chunks_mut::<COMPUTE_F1_SIMD_FACTOR>()
1017 .0
1018 .iter_mut()
1019 .zip((X::ZERO..).step_by(COMPUTE_F1_SIMD_FACTOR))
1020 .zip(
1021 partial_ys
1022 .as_chunks::<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
1023 .0,
1024 )
1025 {
1026 let xs = Simd::splat(u32::from(xs_batch_start))
1027 + Simd::from_array(array::from_fn(|i| i as u32));
1028 let ys_batch = compute_f1_simd::<K>(xs, partial_ys);
1029
1030 ys.write_copy_of_slice(&ys_batch);
1031 }
1032
1033 let ys = unsafe {
1036 let ys_len = ys.len();
1037 let ys = Box::into_raw(ys);
1038 Vec::from_raw_parts(ys.cast(), ys_len, ys_len)
1039 };
1040
1041 let buckets = group_by_buckets::<K>(&ys);
1043
1044 Self::First { buckets }
1045 }
1046
1047 #[cfg(feature = "parallel")]
1049 pub(super) fn create_parallel(seed: Seed) -> Self
1050 where
1051 EvaluatableUsize<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
1052 {
1053 debug_assert!(
1056 MAX_BUCKET_SIZE >= bucket_size_upper_bound(K, BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS),
1057 "Max bucket size is not sufficiently large"
1058 );
1059
1060 let partial_ys = partial_ys::<K>(seed);
1061
1062 let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1064
1065 for ((ys, xs_batch_start), partial_ys) in ys
1067 .as_chunks_mut::<COMPUTE_F1_SIMD_FACTOR>()
1068 .0
1069 .iter_mut()
1070 .zip((X::ZERO..).step_by(COMPUTE_F1_SIMD_FACTOR))
1071 .zip(
1072 partial_ys
1073 .as_chunks::<{ usize::from(K) * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
1074 .0,
1075 )
1076 {
1077 let xs = Simd::splat(u32::from(xs_batch_start))
1078 + Simd::from_array(array::from_fn(|i| i as u32));
1079 let ys_batch = compute_f1_simd::<K>(xs, partial_ys);
1080
1081 ys.write_copy_of_slice(&ys_batch);
1082 }
1083
1084 let ys = unsafe {
1087 let ys_len = ys.len();
1088 let ys = Box::into_raw(ys);
1089 Vec::from_raw_parts(ys.cast(), ys_len, ys_len)
1090 };
1091
1092 let buckets = group_by_buckets::<K>(&ys);
1094
1095 Self::First { buckets }
1096 }
1097}
1098
1099#[cfg(feature = "alloc")]
1100mod private {
1101 pub(in super::super) trait SupportedOtherTables {}
1102 pub(in super::super) trait NotLastTable {}
1103}
1104
1105#[cfg(feature = "alloc")]
1106impl<const K: u8> private::SupportedOtherTables for Table<K, 2>
1107where
1108 EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized,
1109 [(); 1 << K]:,
1110 [(); num_buckets(K)]:,
1111 [(); num_buckets(K) - 1]:,
1112{
1113}
1114#[cfg(feature = "alloc")]
1115impl<const K: u8> private::SupportedOtherTables for Table<K, 3>
1116where
1117 EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized,
1118 [(); 1 << K]:,
1119 [(); num_buckets(K)]:,
1120 [(); num_buckets(K) - 1]:,
1121{
1122}
1123#[cfg(feature = "alloc")]
1124impl<const K: u8> private::SupportedOtherTables for Table<K, 4>
1125where
1126 EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized,
1127 [(); 1 << K]:,
1128 [(); num_buckets(K)]:,
1129 [(); num_buckets(K) - 1]:,
1130{
1131}
1132#[cfg(feature = "alloc")]
1133impl<const K: u8> private::SupportedOtherTables for Table<K, 5>
1134where
1135 EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized,
1136 [(); 1 << K]:,
1137 [(); num_buckets(K)]:,
1138 [(); num_buckets(K) - 1]:,
1139{
1140}
1141#[cfg(feature = "alloc")]
1142impl<const K: u8> private::SupportedOtherTables for Table<K, 6>
1143where
1144 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1145 [(); 1 << K]:,
1146 [(); num_buckets(K)]:,
1147 [(); num_buckets(K) - 1]:,
1148{
1149}
1150#[cfg(feature = "alloc")]
1151impl<const K: u8> private::SupportedOtherTables for Table<K, 7>
1152where
1153 EvaluatableUsize<{ metadata_size_bytes(K, 7) }>: Sized,
1154 [(); 1 << K]:,
1155 [(); num_buckets(K)]:,
1156 [(); num_buckets(K) - 1]:,
1157{
1158}
1159
1160#[cfg(feature = "alloc")]
1161impl<const K: u8> private::NotLastTable for Table<K, 1>
1162where
1163 EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
1164 [(); 1 << K]:,
1165 [(); num_buckets(K)]:,
1166 [(); num_buckets(K) - 1]:,
1167{
1168}
1169#[cfg(feature = "alloc")]
1170impl<const K: u8> private::NotLastTable for Table<K, 2>
1171where
1172 EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized,
1173 [(); 1 << K]:,
1174 [(); num_buckets(K)]:,
1175 [(); num_buckets(K) - 1]:,
1176{
1177}
1178#[cfg(feature = "alloc")]
1179impl<const K: u8> private::NotLastTable for Table<K, 3>
1180where
1181 EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized,
1182 [(); 1 << K]:,
1183 [(); num_buckets(K)]:,
1184 [(); num_buckets(K) - 1]:,
1185{
1186}
1187#[cfg(feature = "alloc")]
1188impl<const K: u8> private::NotLastTable for Table<K, 4>
1189where
1190 EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized,
1191 [(); 1 << K]:,
1192 [(); num_buckets(K)]:,
1193 [(); num_buckets(K) - 1]:,
1194{
1195}
1196#[cfg(feature = "alloc")]
1197impl<const K: u8> private::NotLastTable for Table<K, 5>
1198where
1199 EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized,
1200 [(); 1 << K]:,
1201 [(); num_buckets(K)]:,
1202 [(); num_buckets(K) - 1]:,
1203{
1204}
1205#[cfg(feature = "alloc")]
1206impl<const K: u8> private::NotLastTable for Table<K, 6>
1207where
1208 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1209 [(); 1 << K]:,
1210 [(); num_buckets(K)]:,
1211 [(); num_buckets(K) - 1]:,
1212{
1213}
1214
1215#[cfg(feature = "alloc")]
1216impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1217where
1218 Self: private::SupportedOtherTables,
1219 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1220 [(); 1 << K]:,
1221 [(); num_buckets(K)]:,
1222 [(); num_buckets(K) - 1]:,
1223{
1224 pub(super) fn create<const PARENT_TABLE_NUMBER: u8>(
1228 parent_table: Table<K, PARENT_TABLE_NUMBER>,
1229 cache: &TablesCache,
1230 ) -> (Self, PrunedTable<K, PARENT_TABLE_NUMBER>)
1231 where
1232 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
1233 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
1234 {
1235 let left_targets = &*cache.left_targets;
1236 let mut initialized_elements = 0_usize;
1237 let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1239 let mut positions =
1241 unsafe { Box::<[MaybeUninit<[Position; 2]>; 1 << K]>::new_uninit().assume_init() };
1242 let mut metadatas = unsafe {
1244 Box::<[MaybeUninit<Metadata<K, TABLE_NUMBER>>; 1 << K]>::new_uninit().assume_init()
1245 };
1246
1247 for ([left_bucket, right_bucket], left_bucket_index) in
1248 parent_table.buckets().array_windows().zip(0..)
1249 {
1250 let mut matches = [MaybeUninit::uninit(); _];
1251 let matches = unsafe {
1254 find_matches_in_buckets(
1255 left_bucket_index,
1256 left_bucket,
1257 right_bucket,
1258 &mut matches,
1259 left_targets,
1260 )
1261 };
1262 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1264 let (ys, positions, metadatas) = unsafe {
1266 (
1267 ys.split_at_mut_unchecked(initialized_elements).1,
1268 positions.split_at_mut_unchecked(initialized_elements).1,
1269 metadatas.split_at_mut_unchecked(initialized_elements).1,
1270 )
1271 };
1272
1273 let (ys, positions, metadatas) = unsafe {
1275 (
1276 ys.split_at_mut_unchecked(matches.len()).0,
1277 positions.split_at_mut_unchecked(matches.len()).0,
1278 metadatas.split_at_mut_unchecked(matches.len()).0,
1279 )
1280 };
1281
1282 unsafe {
1285 matches_to_results(&parent_table, matches, ys, positions, metadatas);
1286 }
1287
1288 initialized_elements += matches.len();
1289 }
1290
1291 let parent_table = parent_table.prune();
1292
1293 let ys = unsafe {
1296 let ys_len = ys.len();
1297 let ys = Box::into_raw(ys);
1298 Vec::from_raw_parts(ys.cast(), initialized_elements, ys_len)
1299 };
1300
1301 let buckets = group_by_buckets::<K>(&ys);
1303
1304 let table = Self::Other {
1305 positions,
1306 metadatas,
1307 buckets,
1308 };
1309
1310 (table, parent_table)
1311 }
1312
1313 #[cfg(feature = "parallel")]
1317 pub(super) fn create_parallel<const PARENT_TABLE_NUMBER: u8>(
1318 parent_table: Table<K, PARENT_TABLE_NUMBER>,
1319 cache: &TablesCache,
1320 ) -> (Self, PrunedTable<K, PARENT_TABLE_NUMBER>)
1321 where
1322 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
1323 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
1324 {
1325 let ys = unsafe {
1327 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1328 };
1329 let positions = unsafe {
1331 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1332 };
1333 let metadatas = unsafe {
1335 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1336 };
1337 let global_results_counts =
1338 array::from_fn::<_, { num_buckets(K) - 1 }, _>(|_| SyncUnsafeCell::new(0u16));
1339
1340 let left_targets = &*cache.left_targets;
1341
1342 let buckets = parent_table.buckets();
1343 let bucket_batch_size = CACHE_LINE_SIZE / size_of::<u16>();
1346 let bucket_batch_index = AtomicUsize::new(0);
1347
1348 rayon::broadcast(|_ctx| {
1349 loop {
1350 let bucket_batch_index = bucket_batch_index.fetch_add(1, Ordering::Relaxed);
1351
1352 let buckets_batch = buckets
1353 .array_windows()
1354 .enumerate()
1355 .skip(bucket_batch_index * bucket_batch_size)
1356 .take(bucket_batch_size);
1357
1358 if buckets_batch.is_empty() {
1359 break;
1360 }
1361
1362 for (left_bucket_index, [left_bucket, right_bucket]) in buckets_batch {
1363 let mut matches = [MaybeUninit::uninit(); _];
1364 let matches = unsafe {
1367 find_matches_in_buckets(
1368 left_bucket_index as u32,
1369 left_bucket,
1370 right_bucket,
1371 &mut matches,
1372 left_targets,
1373 )
1374 };
1375 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1377
1378 let ys = unsafe { &mut *ys.get_unchecked(left_bucket_index).get() };
1381 let positions =
1384 unsafe { &mut *positions.get_unchecked(left_bucket_index).get() };
1385 let metadatas =
1388 unsafe { &mut *metadatas.get_unchecked(left_bucket_index).get() };
1389 let count = unsafe {
1392 &mut *global_results_counts.get_unchecked(left_bucket_index).get()
1393 };
1394
1395 unsafe {
1398 matches_to_results::<_, TABLE_NUMBER, _>(
1399 &parent_table,
1400 matches,
1401 ys,
1402 positions,
1403 metadatas,
1404 )
1405 };
1406 *count = matches.len() as u16;
1407 }
1408 }
1409 });
1410
1411 let parent_table = parent_table.prune();
1412
1413 let ys = strip_sync_unsafe_cell(ys);
1414 let positions = strip_sync_unsafe_cell(positions);
1415 let metadatas = strip_sync_unsafe_cell(metadatas);
1416
1417 let buckets = unsafe {
1420 group_by_buckets_from_buckets::<K, _>(
1421 ys.iter().zip(
1422 global_results_counts
1423 .into_iter()
1424 .map(|count| usize::from(count.into_inner())),
1425 ),
1426 )
1427 };
1428
1429 let table = Self::OtherBuckets {
1430 positions,
1431 metadatas,
1432 buckets,
1433 };
1434
1435 (table, parent_table)
1436 }
1437
1438 #[inline(always)]
1445 pub(super) unsafe fn position(&self, position: Position) -> [Position; 2] {
1446 match self {
1447 Self::First { .. } => {
1448 unreachable!("Not the first table");
1449 }
1450 Self::Other { positions, .. } => {
1451 unsafe { positions.get_unchecked(usize::from(position)).assume_init() }
1453 }
1454 #[cfg(feature = "parallel")]
1455 Self::OtherBuckets { positions, .. } => {
1456 unsafe {
1458 positions
1459 .as_flattened()
1460 .get_unchecked(usize::from(position))
1461 .assume_init()
1462 }
1463 }
1464 }
1465 }
1466}
1467
1468#[cfg(feature = "alloc")]
1469impl<const K: u8> Table<K, 7>
1470where
1471 Self: private::SupportedOtherTables,
1472 EvaluatableUsize<{ metadata_size_bytes(K, 7) }>: Sized,
1473 [(); 1 << K]:,
1474 [(); num_buckets(K)]:,
1475 [(); num_buckets(K) - 1]:,
1476{
1477 pub(super) fn create_proof_targets(
1480 parent_table: Table<K, 6>,
1481 cache: &TablesCache,
1482 ) -> (
1483 Box<[[Position; 2]; Record::NUM_S_BUCKETS]>,
1484 PrunedTable<K, 6>,
1485 )
1486 where
1487 Table<K, 6>: private::NotLastTable,
1488 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1489 {
1490 let left_targets = &*cache.left_targets;
1491 let mut table_6_proof_targets =
1493 unsafe { Box::<[[Position; 2]; Record::NUM_S_BUCKETS]>::new_zeroed().assume_init() };
1494
1495 for ([left_bucket, right_bucket], left_bucket_index) in
1496 parent_table.buckets().array_windows().zip(0..)
1497 {
1498 let mut matches = [MaybeUninit::uninit(); _];
1499 let matches = unsafe {
1502 find_matches_in_buckets(
1503 left_bucket_index,
1504 left_bucket,
1505 right_bucket,
1506 &mut matches,
1507 left_targets,
1508 )
1509 };
1510 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1512
1513 let (grouped_matches, other_matches) = matches.as_chunks::<COMPUTE_FN_SIMD_FACTOR>();
1514
1515 for grouped_matches in grouped_matches {
1516 let (ys_group, positions_group, _) =
1518 unsafe { match_to_result_simd::<_, 7, _>(&parent_table, grouped_matches) };
1519
1520 let s_buckets = ys_group >> Simd::splat(u32::from(PARAM_EXT));
1521
1522 for (s_bucket, p) in s_buckets.to_array().into_iter().zip(positions_group) {
1523 const {
1524 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1525 }
1526 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1527 continue;
1528 };
1529 let positions = &mut table_6_proof_targets[usize::from(s_bucket)];
1530 if positions[1] == Position::ZERO {
1532 *positions = p;
1533 }
1534 }
1535 }
1536 for other_match in other_matches {
1537 let (y, p, _) = unsafe { match_to_result::<_, 7, _>(&parent_table, other_match) };
1539
1540 let s_bucket = y.first_k_bits();
1541
1542 const {
1543 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1544 }
1545 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1546 continue;
1547 };
1548
1549 let positions = &mut table_6_proof_targets[usize::from(s_bucket)];
1550 if positions[1] == Position::ZERO {
1552 *positions = p;
1553 }
1554 }
1555 }
1556
1557 let parent_table = parent_table.prune();
1558
1559 (table_6_proof_targets, parent_table)
1560 }
1561
1562 #[cfg(feature = "parallel")]
1566 pub(super) fn create_proof_targets_parallel(
1567 parent_table: Table<K, 6>,
1568 cache: &TablesCache,
1569 ) -> (
1570 Box<[[Position; 2]; Record::NUM_S_BUCKETS]>,
1571 PrunedTable<K, 6>,
1572 )
1573 where
1574 Table<K, 6>: private::NotLastTable,
1575 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1576 {
1577 let buckets_positions = unsafe {
1579 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1580 };
1581 let global_results_counts =
1582 array::from_fn::<_, { num_buckets(K) - 1 }, _>(|_| SyncUnsafeCell::new(0u16));
1583
1584 let left_targets = &*cache.left_targets;
1585
1586 let buckets = parent_table.buckets();
1587 let bucket_batch_size = CACHE_LINE_SIZE / size_of::<u16>();
1590 let bucket_batch_index = AtomicUsize::new(0);
1591
1592 rayon::broadcast(|_ctx| {
1593 loop {
1594 let bucket_batch_index = bucket_batch_index.fetch_add(1, Ordering::Relaxed);
1595
1596 let buckets_batch = buckets
1597 .array_windows()
1598 .enumerate()
1599 .skip(bucket_batch_index * bucket_batch_size)
1600 .take(bucket_batch_size);
1601
1602 if buckets_batch.is_empty() {
1603 break;
1604 }
1605
1606 for (left_bucket_index, [left_bucket, right_bucket]) in buckets_batch {
1607 let mut matches = [MaybeUninit::uninit(); _];
1608 let matches = unsafe {
1611 find_matches_in_buckets(
1612 left_bucket_index as u32,
1613 left_bucket,
1614 right_bucket,
1615 &mut matches,
1616 left_targets,
1617 )
1618 };
1619 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1621
1622 let buckets_positions =
1625 unsafe { &mut *buckets_positions.get_unchecked(left_bucket_index).get() };
1626 let count = unsafe {
1629 &mut *global_results_counts.get_unchecked(left_bucket_index).get()
1630 };
1631
1632 let (grouped_matches, other_matches) =
1633 matches.as_chunks::<COMPUTE_FN_SIMD_FACTOR>();
1634
1635 let mut reduced_count = 0_usize;
1636 for grouped_matches in grouped_matches {
1637 let (ys_group, positions_group, _) = unsafe {
1639 match_to_result_simd::<_, 7, _>(&parent_table, grouped_matches)
1640 };
1641
1642 let s_buckets = ys_group >> Simd::splat(u32::from(PARAM_EXT));
1643 let s_buckets = s_buckets.to_array();
1644
1645 for (s_bucket, p) in s_buckets.into_iter().zip(positions_group) {
1646 const {
1647 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1648 }
1649 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1650 continue;
1651 };
1652
1653 buckets_positions[reduced_count].write((s_bucket, p));
1654 reduced_count += 1;
1655 }
1656 }
1657 for other_match in other_matches {
1658 let (y, p, _) =
1660 unsafe { match_to_result::<_, 7, _>(&parent_table, other_match) };
1661
1662 let s_bucket = y.first_k_bits();
1663
1664 const {
1665 assert!(Record::NUM_S_BUCKETS == (u16::MAX as usize) + 1);
1666 }
1667 let Ok(s_bucket) = u16::try_from(s_bucket) else {
1668 continue;
1669 };
1670
1671 buckets_positions[reduced_count].write((s_bucket, p));
1672 reduced_count += 1;
1673 }
1674
1675 *count = reduced_count as u16;
1676 }
1677 }
1678 });
1679
1680 let parent_table = parent_table.prune();
1681
1682 let buckets_positions = strip_sync_unsafe_cell(buckets_positions);
1683
1684 let mut table_6_proof_targets =
1686 unsafe { Box::<[[Position; 2]; Record::NUM_S_BUCKETS]>::new_zeroed().assume_init() };
1687
1688 for (bucket, results_count) in buckets_positions.iter().zip(
1689 global_results_counts
1690 .into_iter()
1691 .map(|count| usize::from(count.into_inner())),
1692 ) {
1693 for &(s_bucket, p) in unsafe { bucket[..results_count].assume_init_ref() } {
1695 let positions = &mut table_6_proof_targets[usize::from(s_bucket)];
1696 if positions[1] == Position::ZERO {
1698 *positions = p;
1699 }
1700 }
1701 }
1702
1703 (table_6_proof_targets, parent_table)
1704 }
1705}
1706
1707#[cfg(feature = "alloc")]
1708impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1709where
1710 Self: private::NotLastTable,
1711 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1712 [(); 1 << K]:,
1713 [(); num_buckets(K)]:,
1714 [(); num_buckets(K) - 1]:,
1715{
1716 #[inline(always)]
1721 unsafe fn metadata(&self, position: Position) -> Metadata<K, TABLE_NUMBER> {
1722 match self {
1723 Self::First { .. } => {
1724 Metadata::from(X::from(u32::from(position)))
1726 }
1727 Self::Other { metadatas, .. } => {
1728 unsafe { metadatas.get_unchecked(usize::from(position)).assume_init() }
1730 }
1731 #[cfg(feature = "parallel")]
1732 Self::OtherBuckets { metadatas, .. } => {
1733 unsafe {
1735 metadatas
1736 .as_flattened()
1737 .get_unchecked(usize::from(position))
1738 .assume_init()
1739 }
1740 }
1741 }
1742 }
1743}
1744
1745#[cfg(feature = "alloc")]
1746impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1747where
1748 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1749 [(); 1 << K]:,
1750 [(); num_buckets(K)]:,
1751 [(); num_buckets(K) - 1]:,
1752{
1753 #[inline(always)]
1754 fn prune(self) -> PrunedTable<K, TABLE_NUMBER> {
1755 match self {
1756 Self::First { .. } => PrunedTable::First,
1757 Self::Other { positions, .. } => PrunedTable::Other { positions },
1758 #[cfg(feature = "parallel")]
1759 Self::OtherBuckets { positions, .. } => PrunedTable::OtherBuckets { positions },
1760 }
1761 }
1762
1763 #[inline(always)]
1765 pub(super) fn buckets(&self) -> &[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)] {
1766 match self {
1767 Self::First { buckets, .. } => buckets,
1768 Self::Other { buckets, .. } => buckets,
1769 #[cfg(feature = "parallel")]
1770 Self::OtherBuckets { buckets, .. } => buckets,
1771 }
1772 }
1773}