Skip to main content

ab_farmer_components/
sector.rs

1//! Sector-related data structures
2//!
3//! Sectors and corresponding metadata created by functions in [`plotting`](crate::plotting) module
4//! have a specific structure, represented by data structured in this module.
5//!
6//! It is typically not needed to construct these data structures explicitly outside of this crate,
7//! instead they will be returned as a result of certain operations (like plotting).
8
9use ab_core_primitives::checksum::Blake3Checksummed;
10use ab_core_primitives::hashes::Blake3Hash;
11use ab_core_primitives::pieces::{PieceHeader, PieceOffset, Record};
12use ab_core_primitives::sectors::{SBucket, SectorIndex};
13use ab_core_primitives::segments::{HistorySize, SegmentIndex};
14use ab_io_type::trivial_type::TrivialType;
15use parity_scale_codec::{Decode, Encode};
16use rayon::prelude::*;
17use std::ops::{Deref, DerefMut};
18use thiserror::Error;
19use tracing::debug;
20
21/// Size of the part of the plot containing record chunks (s-buckets).
22///
23/// Total size of the plot can be computed with [`sector_size()`].
24#[inline]
25pub const fn sector_record_chunks_size(pieces_in_sector: u16) -> usize {
26    pieces_in_sector as usize * Record::SIZE
27}
28
29/// Size of the part of the plot containing record metadata.
30///
31/// Total size of the plot can be computed with [`sector_size()`].
32#[inline]
33pub const fn sector_record_metadata_size(pieces_in_sector: u16) -> usize {
34    pieces_in_sector as usize * RecordMetadata::encoded_size()
35}
36
37/// Exact sector plot size (sector contents map, record chunks, record metadata).
38///
39/// NOTE: Each sector also has corresponding fixed size metadata whose size can be obtained with
40/// [`SectorMetadataChecksummed::encoded_size()`], size of the record chunks (s-buckets) with
41/// [`sector_record_chunks_size()`] and size of record roots and proofs with
42/// [`sector_record_metadata_size()`]. This function just combines those three together for
43/// convenience.
44#[inline]
45pub const fn sector_size(pieces_in_sector: u16) -> usize {
46    sector_record_chunks_size(pieces_in_sector)
47        + sector_record_metadata_size(pieces_in_sector)
48        + SectorContentsMap::encoded_size(pieces_in_sector)
49        + Blake3Hash::SIZE
50}
51
52/// Metadata of the plotted sector
53#[derive(Debug, Encode, Decode, Clone)]
54pub struct SectorMetadata {
55    /// Sector index
56    pub sector_index: SectorIndex,
57    /// Number of pieces stored in this sector
58    pub pieces_in_sector: u16,
59    /// S-bucket sizes in a sector
60    pub s_bucket_sizes: Box<[u16; Record::NUM_S_BUCKETS]>,
61    // TODO: Should this be constrained to the last segment of the super segment?
62    /// Size of the blockchain history at the time of sector creation
63    pub history_size: HistorySize,
64}
65
66impl SectorMetadata {
67    /// Returns offsets of each s-bucket relatively to the beginning of the sector (in chunks)
68    pub fn s_bucket_offsets(&self) -> Box<[u32; Record::NUM_S_BUCKETS]> {
69        let s_bucket_offsets = self
70            .s_bucket_sizes
71            .iter()
72            .map({
73                let mut base_offset = 0;
74
75                move |s_bucket_size| {
76                    let offset = base_offset;
77                    base_offset += u32::from(*s_bucket_size);
78                    offset
79                }
80            })
81            .collect::<Box<_>>();
82
83        assert_eq!(s_bucket_offsets.len(), Record::NUM_S_BUCKETS);
84        // SAFETY: Number of elements checked above
85        unsafe {
86            Box::from_raw(Box::into_raw(s_bucket_offsets).cast::<[u32; Record::NUM_S_BUCKETS]>())
87        }
88    }
89}
90
91/// Same as [`SectorMetadata`], but with checksums verified during SCALE encoding/decoding
92#[derive(Debug, Clone, Encode, Decode)]
93pub struct SectorMetadataChecksummed(Blake3Checksummed<SectorMetadata>);
94
95impl From<SectorMetadata> for SectorMetadataChecksummed {
96    #[inline]
97    fn from(value: SectorMetadata) -> Self {
98        Self(Blake3Checksummed(value))
99    }
100}
101
102impl Deref for SectorMetadataChecksummed {
103    type Target = SectorMetadata;
104
105    #[inline]
106    fn deref(&self) -> &Self::Target {
107        &self.0.0
108    }
109}
110
111impl DerefMut for SectorMetadataChecksummed {
112    #[inline]
113    fn deref_mut(&mut self) -> &mut Self::Target {
114        &mut self.0.0
115    }
116}
117
118impl SectorMetadataChecksummed {
119    /// Size of encoded checksummed sector metadata.
120    ///
121    /// For sector plot size use [`sector_size()`].
122    #[inline]
123    pub fn encoded_size() -> usize {
124        let default = SectorMetadataChecksummed::from(SectorMetadata {
125            sector_index: SectorIndex::ZERO,
126            pieces_in_sector: 0,
127            // TODO: Should have been just `::new()`, but https://github.com/rust-lang/rust/issues/53827
128            // SAFETY: Data structure filled with zeroes is a valid invariant
129            s_bucket_sizes: unsafe { Box::new_zeroed().assume_init() },
130            history_size: HistorySize::from(SegmentIndex::ZERO),
131        });
132
133        default.encoded_size()
134    }
135}
136
137/// Root and proof corresponding to the same record
138#[derive(Debug, Default, Copy, Clone, Encode, Decode)]
139#[repr(C)]
140pub(crate) struct RecordMetadata {
141    /// Piece header
142    pub(crate) piece_header: PieceHeader,
143    /// Checksum (hash) of the whole piece
144    pub(crate) piece_checksum: Blake3Hash,
145}
146
147const {
148    assert!(align_of::<RecordMetadata>() == 1);
149}
150
151impl RecordMetadata {
152    pub(crate) const fn encoded_size() -> usize {
153        size_of::<Self>()
154    }
155}
156
157/// Raw sector before it is transformed and written to plot, used during plotting
158#[derive(Debug, Clone)]
159pub(crate) struct RawSector {
160    /// List of records, likely downloaded from the network
161    pub(crate) records: Vec<Record>,
162    /// Metadata (root and proof) corresponding to the same record
163    pub(crate) metadata: Vec<RecordMetadata>,
164}
165
166impl RawSector {
167    /// Create new raw sector with internal vectors being allocated and filled with default values
168    pub(crate) fn new(pieces_in_sector: u16) -> Self {
169        Self {
170            records: Record::new_zero_vec(usize::from(pieces_in_sector)),
171            metadata: vec![RecordMetadata::default(); usize::from(pieces_in_sector)],
172        }
173    }
174}
175
176/// S-buckets at which proofs were found.
177///
178/// S-buckets are grouped by 8, within each `u8` bits right to left (LSB) indicate the presence
179/// of a proof for corresponding s-bucket, so that the whole array of bytes can be thought as a
180/// large set of bits.
181///
182/// There will be at most [`Record::NUM_CHUNKS`] proofs produced/bits set to `1`.
183pub type FoundProofs = [u8; Record::NUM_S_BUCKETS / u8::BITS as usize];
184
185/// Error happening when trying to create [`SectorContentsMap`] from bytes
186#[derive(Debug, Error, Copy, Clone, Eq, PartialEq)]
187pub enum SectorContentsMapFromBytesError {
188    /// Invalid bytes length
189    #[error("Invalid bytes length, expected {expected}, actual {actual}")]
190    InvalidBytesLength {
191        /// Expected length
192        expected: usize,
193        /// Actual length
194        actual: usize,
195    },
196    /// Checksum mismatch
197    #[error("Checksum mismatch")]
198    ChecksumMismatch,
199}
200
201/// Error happening when trying to encode [`SectorContentsMap`] into bytes
202#[derive(Debug, Error, Copy, Clone, Eq, PartialEq)]
203pub enum SectorContentsMapEncodeIntoError {
204    /// Invalid bytes length
205    #[error("Invalid bytes length, expected {expected}, actual {actual}")]
206    InvalidBytesLength {
207        /// Expected length
208        expected: usize,
209        /// Actual length
210        actual: usize,
211    },
212}
213
214/// Error happening when trying to create [`SectorContentsMap`] from bytes
215#[derive(Debug, Error, Copy, Clone, Eq, PartialEq)]
216pub enum SectorContentsMapIterationError {
217    /// S-bucket provided is out of range
218    #[error("S-bucket provided {provided} is out of range, max {max}")]
219    SBucketOutOfRange {
220        /// Provided s-bucket
221        provided: usize,
222        /// Max s-bucket
223        max: usize,
224    },
225}
226
227/// Map of sector contents.
228///
229/// Abstraction on top of bitfields that allow making sense of sector contents that contain
230/// encoded (meaning erasure coded and encoded with existing PoSpace proof) chunks used at the same
231/// time both in records (before writing to plot) and s-buckets (written into the plot) format
232#[derive(Debug, Clone, Eq, PartialEq)]
233pub struct SectorContentsMap {
234    /// Bitfields for each record, each bit is `true` if record chunk at corresponding position was
235    /// used
236    record_chunks_used: Vec<FoundProofs>,
237}
238
239impl SectorContentsMap {
240    /// Create new sector contents map initialized with zeroes to store data for `pieces_in_sector`
241    /// records
242    pub fn new(pieces_in_sector: u16) -> Self {
243        Self {
244            record_chunks_used: vec![[0; _]; usize::from(pieces_in_sector)],
245        }
246    }
247
248    /// Reconstruct sector contents map from bytes.
249    ///
250    /// Returns error if length of the vector doesn't match [`Self::encoded_size()`] for
251    /// `pieces_in_sector`.
252    pub fn from_bytes(
253        bytes: &[u8],
254        pieces_in_sector: u16,
255    ) -> Result<Self, SectorContentsMapFromBytesError> {
256        if bytes.len() != Self::encoded_size(pieces_in_sector) {
257            return Err(SectorContentsMapFromBytesError::InvalidBytesLength {
258                expected: Self::encoded_size(pieces_in_sector),
259                actual: bytes.len(),
260            });
261        }
262
263        let (single_records_bit_arrays, expected_checksum) =
264            bytes.split_at(bytes.len() - Blake3Hash::SIZE);
265        // SAFETY: All bit patterns are valid
266        let expected_checksum = unsafe {
267            Blake3Hash::from_bytes(expected_checksum).expect("No alignment requirements; qed")
268        };
269        // Verify checksum
270        let actual_checksum = Blake3Hash::from(blake3::hash(single_records_bit_arrays));
271        if &actual_checksum != expected_checksum {
272            debug!(
273                %actual_checksum,
274                %expected_checksum,
275                "Hash doesn't match, corrupted bytes"
276            );
277
278            return Err(SectorContentsMapFromBytesError::ChecksumMismatch);
279        }
280
281        let mut record_chunks_used = vec![[0; _]; pieces_in_sector.into()];
282
283        record_chunks_used
284            .as_flattened_mut()
285            .copy_from_slice(single_records_bit_arrays);
286
287        Ok(Self { record_chunks_used })
288    }
289
290    /// Size of sector contents map when encoded and stored in the plot for specified number of
291    /// pieces in sector
292    pub const fn encoded_size(pieces_in_sector: u16) -> usize {
293        size_of::<FoundProofs>() * pieces_in_sector as usize + Blake3Hash::SIZE
294    }
295
296    /// Encode internal contents into `output`
297    pub fn encode_into(&self, output: &mut [u8]) -> Result<(), SectorContentsMapEncodeIntoError> {
298        if output.len() != Self::encoded_size(self.record_chunks_used.len() as u16) {
299            return Err(SectorContentsMapEncodeIntoError::InvalidBytesLength {
300                expected: Self::encoded_size(self.record_chunks_used.len() as u16),
301                actual: output.len(),
302            });
303        }
304
305        let slice = self.record_chunks_used.as_flattened();
306        // Write data and checksum
307        output[..slice.len()].copy_from_slice(slice);
308        output[slice.len()..].copy_from_slice(blake3::hash(slice).as_bytes());
309
310        Ok(())
311    }
312
313    /// Iterate over individual record chunks (s-buckets) that were used
314    pub fn iter_record_chunks_used(&self) -> &[FoundProofs] {
315        &self.record_chunks_used
316    }
317
318    /// Iterate mutably over individual record chunks (s-buckets) that were used
319    pub fn iter_record_chunks_used_mut(&mut self) -> &mut [FoundProofs] {
320        &mut self.record_chunks_used
321    }
322
323    /// Returns sizes of each s-bucket
324    pub fn s_bucket_sizes(&self) -> Box<[u16; Record::NUM_S_BUCKETS]> {
325        // Rayon doesn't support iteration over custom types yet
326        let s_bucket_sizes = (u16::from(SBucket::ZERO)..=u16::from(SBucket::MAX))
327            .into_par_iter()
328            .map(SBucket::from)
329            .map(|s_bucket| {
330                self.iter_s_bucket_piece_offsets(s_bucket)
331                    .expect("S-bucket guaranteed to be in range; qed")
332                    .count() as u16
333            })
334            .collect::<Box<_>>();
335
336        assert_eq!(s_bucket_sizes.len(), Record::NUM_S_BUCKETS);
337
338        // SAFETY: Number of elements checked above
339        unsafe {
340            Box::from_raw(Box::into_raw(s_bucket_sizes).cast::<[u16; Record::NUM_S_BUCKETS]>())
341        }
342    }
343
344    /// Creates an iterator of `(s_bucket, chunk_location)`, where `s_bucket` is the position of the
345    /// chunk in the erasure coded record and `chunk_location` is the offset of the chunk in the
346    /// plot (across all s-buckets).
347    pub fn iter_record_chunk_to_plot(
348        &self,
349        piece_offset: PieceOffset,
350    ) -> impl Iterator<Item = (SBucket, usize)> + '_ {
351        // Iterate over all s-buckets
352        (SBucket::ZERO..=SBucket::MAX)
353            // In each s-bucket map all records used
354            .flat_map(|s_bucket| {
355                self.iter_s_bucket_piece_offsets(s_bucket)
356                    .expect("S-bucket guaranteed to be in range; qed")
357                    .map(move |current_piece_offset| (s_bucket, current_piece_offset))
358            })
359            // We've got contents of all s-buckets in a flat iterator, enumerating them so it is
360            // possible to find in the plot later if desired
361            .enumerate()
362            // Everything about the piece offset we care about
363            .filter_map(move |(chunk_location, (s_bucket, current_piece_offset))| {
364                // In case record for `piece_offset` is found, return necessary information
365                (current_piece_offset == piece_offset).then_some((s_bucket, chunk_location))
366            })
367            // Tiny optimization in case we have found chunks for all records already
368            .take(Record::NUM_CHUNKS)
369    }
370
371    /// Creates an iterator of `Option<chunk_offset>`, where each entry corresponds
372    /// s-bucket/position of the chunk in the erasure coded record, `chunk_offset` is the offset of
373    /// the chunk in the corresponding s-bucket.
374    ///
375    /// Similar to `Self::iter_record_chunk_to_plot()`, but runs in parallel, returns entries for
376    /// all s-buckets and offsets are within corresponding s-buckets rather than the whole plot.
377    pub fn par_iter_record_chunk_to_plot(
378        &self,
379        piece_offset: PieceOffset,
380    ) -> impl IndexedParallelIterator<Item = Option<usize>> + '_ {
381        let piece_offset = usize::from(piece_offset);
382        (u16::from(SBucket::ZERO)..=u16::from(SBucket::MAX))
383            .into_par_iter()
384            .map(SBucket::from)
385            // In each s-bucket map all records used
386            .map(move |s_bucket| {
387                let byte_offset = usize::from(s_bucket) / u8::BITS as usize;
388                let bit_mask = 1 << (usize::from(s_bucket) % u8::BITS as usize);
389
390                if self.record_chunks_used[piece_offset][byte_offset] & bit_mask == 0 {
391                    return None;
392                }
393
394                // How many other record chunks we have in s-bucket before piece offset we care
395                // about
396                let chunk_offset = self
397                    .record_chunks_used
398                    .iter()
399                    .take(piece_offset)
400                    .filter(move |record_chunks_used| {
401                        record_chunks_used[byte_offset] & bit_mask != 0
402                    })
403                    .count();
404
405                Some(chunk_offset)
406            })
407    }
408
409    /// Creates an iterator of piece offsets to which corresponding chunks belong.
410    ///
411    /// Returns error if `s_bucket` is outside of [`Record::NUM_S_BUCKETS`] range.
412    pub fn iter_s_bucket_piece_offsets(
413        &self,
414        s_bucket: SBucket,
415    ) -> Result<impl Iterator<Item = PieceOffset> + '_, SectorContentsMapIterationError> {
416        let s_bucket = usize::from(s_bucket);
417
418        if s_bucket >= Record::NUM_S_BUCKETS {
419            return Err(SectorContentsMapIterationError::SBucketOutOfRange {
420                provided: s_bucket,
421                max: Record::NUM_S_BUCKETS,
422            });
423        }
424
425        Ok((PieceOffset::ZERO..)
426            .zip(&self.record_chunks_used)
427            .filter_map(move |(piece_offset, record_chunks_used)| {
428                let byte_offset = s_bucket / u8::BITS as usize;
429                let bit_mask = 1 << (s_bucket % u8::BITS as usize);
430
431                (record_chunks_used[byte_offset] & bit_mask != 0).then_some(piece_offset)
432            }))
433    }
434
435    /// Iterate over chunks of s-bucket indicating if record chunk is used at corresponding
436    /// position.
437    ///
438    /// ## Panics
439    /// Panics if `s_bucket` is outside of [`Record::NUM_S_BUCKETS`] range.
440    pub fn iter_s_bucket_used_record_chunks_used(
441        &self,
442        s_bucket: SBucket,
443    ) -> Result<impl Iterator<Item = bool> + '_, SectorContentsMapIterationError> {
444        let s_bucket = usize::from(s_bucket);
445
446        if s_bucket >= Record::NUM_S_BUCKETS {
447            return Err(SectorContentsMapIterationError::SBucketOutOfRange {
448                provided: s_bucket,
449                max: Record::NUM_S_BUCKETS,
450            });
451        }
452
453        Ok(self
454            .record_chunks_used
455            .iter()
456            .map(move |record_chunks_used| {
457                let byte_offset = s_bucket / u8::BITS as usize;
458                let bit_mask = 1 << (s_bucket % u8::BITS as usize);
459
460                record_chunks_used[byte_offset] & bit_mask != 0
461            }))
462    }
463}