1use crate::objects::{BlockObject, GlobalObject};
2use ab_core_primitives::block::BlockNumber;
3use ab_core_primitives::hashes::Blake3Hash;
4use ab_core_primitives::pieces::Record;
5use ab_core_primitives::segments::{
6 ArchivedBlockProgress, ArchivedHistorySegment, LastArchivedBlock, RecordedHistorySegment,
7 SegmentHeader, SegmentIndex, SegmentRoot,
8};
9use ab_erasure_coding::ErasureCoding;
10use ab_merkle_tree::balanced::BalancedMerkleTree;
11use alloc::collections::VecDeque;
12use alloc::vec;
13use alloc::vec::Vec;
14use core::cmp::Ordering;
15use core::num::NonZeroU32;
16use core::ops::Deref;
17use parity_scale_codec::{Decode, Encode, Input, Output};
18#[cfg(feature = "parallel")]
19use rayon::prelude::*;
20
21struct ArchivedHistorySegmentOutput<'a> {
22 segment: &'a mut ArchivedHistorySegment,
23 offset: usize,
24}
25
26impl Output for ArchivedHistorySegmentOutput<'_> {
27 #[inline]
28 fn write(&mut self, mut bytes: &[u8]) {
29 while !bytes.is_empty() {
30 let piece = self
31 .segment
32 .get_mut(self.offset / Record::SIZE)
33 .expect("Encoding never exceeds the segment size; qed");
34 let output = &mut piece.record_mut().as_flattened_mut()[self.offset % Record::SIZE..];
35 let bytes_to_write = output.len().min(bytes.len());
36 output[..bytes_to_write].copy_from_slice(&bytes[..bytes_to_write]);
37 self.offset += bytes_to_write;
38 bytes = &bytes[bytes_to_write..];
39 }
40 }
41}
42
43#[derive(Debug, Default, Clone, Eq, PartialEq)]
45pub struct Segment {
46 pub items: Vec<SegmentItem>,
48}
49
50impl Encode for Segment {
51 #[inline(always)]
52 fn size_hint(&self) -> usize {
53 RecordedHistorySegment::SIZE
54 }
55
56 #[inline]
57 fn encode_to<O: Output + ?Sized>(&self, dest: &mut O) {
58 for item in &self.items {
59 item.encode_to(dest);
60 }
61 }
62}
63
64impl Decode for Segment {
65 #[inline]
66 fn decode<I: Input>(input: &mut I) -> Result<Self, parity_scale_codec::Error> {
67 let mut items = Vec::new();
68 loop {
69 match input.remaining_len()? {
70 Some(0) => {
71 break;
72 }
73 Some(_) => {
74 }
76 None => {
77 return Err(
78 "Source doesn't report remaining length, decoding not possible".into(),
79 );
80 }
81 }
82
83 match SegmentItem::decode(input) {
84 Ok(item) => {
85 items.push(item);
86 }
87 Err(error) => {
88 return Err(error.chain("Could not decode `Segment::items`"));
89 }
90 }
91 }
92
93 Ok(Self { items })
94 }
95}
96
97#[derive(Debug, Clone, Eq, PartialEq)]
100pub struct BlockBytes(Vec<u8>);
101
102impl Deref for BlockBytes {
103 type Target = [u8];
104
105 #[inline(always)]
106 fn deref(&self) -> &Self::Target {
107 &self.0
108 }
109}
110
111impl From<BlockBytes> for Vec<u8> {
112 #[inline(always)]
113 fn from(value: BlockBytes) -> Self {
114 value.0
115 }
116}
117
118impl Encode for BlockBytes {
119 #[inline(always)]
120 fn size_hint(&self) -> usize {
121 size_of::<u32>() + self.0.len()
122 }
123
124 #[inline]
125 fn encode_to<O: Output + ?Sized>(&self, dest: &mut O) {
126 let length = u32::try_from(self.0.len())
127 .expect("All constructors guarantee the size doesn't exceed `u32`; qed");
128
129 length.encode_to(dest);
130 dest.write(&self.0);
131 }
132}
133
134impl Decode for BlockBytes {
135 #[inline]
136 fn decode<I: Input>(input: &mut I) -> Result<Self, parity_scale_codec::Error> {
137 let length = u32::decode(input)?;
138 if length as usize > (RecordedHistorySegment::SIZE - size_of::<u32>()) {
139 return Err("Segment item size is impossibly large".into());
140 }
141 let mut bytes = vec![0; length as usize];
145 input.read(&mut bytes)?;
146 Ok(Self(bytes))
147 }
148}
149
150impl BlockBytes {
151 #[inline(always)]
152 fn truncate(&mut self, size: usize) {
153 self.0.truncate(size)
154 }
155}
156
157#[derive(Debug, Clone, Eq, PartialEq, Encode, Decode)]
159pub enum SegmentItem {
160 #[codec(index = 0)]
162 Padding,
163 #[codec(index = 1)]
165 Block {
166 bytes: BlockBytes,
168 #[doc(hidden)]
170 #[codec(skip)]
171 block_objects: Vec<BlockObject>,
172 },
173 #[codec(index = 2)]
175 BlockStart {
176 bytes: BlockBytes,
178 #[doc(hidden)]
180 #[codec(skip)]
181 block_objects: Vec<BlockObject>,
182 },
183 #[codec(index = 3)]
185 BlockContinuation {
186 bytes: BlockBytes,
188 #[doc(hidden)]
190 #[codec(skip)]
191 block_objects: Vec<BlockObject>,
192 },
193 #[codec(index = 4)]
195 ParentSegmentHeader(SegmentHeader),
196}
197
198#[derive(Debug, Clone, Eq, PartialEq)]
201pub struct NewArchivedSegment {
202 pub segment_header: SegmentHeader,
204 pub pieces: ArchivedHistorySegment,
206}
207
208#[derive(Debug, Clone, Eq, PartialEq)]
210pub struct ArchiveBlockOutcome {
211 pub archived_segments: Vec<NewArchivedSegment>,
214
215 pub global_objects: Vec<GlobalObject>,
218}
219
220#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, thiserror::Error)]
222pub enum ArchiverInstantiationError {
223 #[error("Invalid last archived block, its size {0} bytes is the same as the encoded block")]
226 InvalidLastArchivedBlock(u32),
227 #[error(
229 "Invalid block, its size {block_bytes} bytes is smaller than the already archived block \
230 {archived_block_bytes} bytes"
231 )]
232 InvalidBlockSmallSize {
233 block_bytes: u32,
235 archived_block_bytes: u32,
237 },
238}
239
240#[derive(Debug, Clone)]
251pub struct Archiver {
252 buffer: VecDeque<SegmentItem>,
255 erasure_coding: ErasureCoding,
257 segment_index: SegmentIndex,
259 prev_segment_header_hash: Blake3Hash,
261 last_archived_block: Option<LastArchivedBlock>,
263}
264
265impl Archiver {
266 pub fn new(erasure_coding: ErasureCoding) -> Self {
268 Self {
269 buffer: VecDeque::default(),
270 erasure_coding,
271 segment_index: SegmentIndex::ZERO,
272 prev_segment_header_hash: Blake3Hash::default(),
273 last_archived_block: None,
274 }
275 }
276
277 pub fn with_initial_state(
281 erasure_coding: ErasureCoding,
282 segment_header: SegmentHeader,
283 encoded_block: &[u8],
284 mut block_objects: Vec<BlockObject>,
285 ) -> Result<Self, ArchiverInstantiationError> {
286 let mut archiver = Self::new(erasure_coding);
287
288 archiver.segment_index = segment_header.segment_index() + SegmentIndex::ONE;
289 archiver.prev_segment_header_hash = segment_header.hash();
290 archiver.last_archived_block = Some(segment_header.last_archived_block);
291
292 archiver
294 .buffer
295 .push_back(SegmentItem::ParentSegmentHeader(segment_header));
296
297 if let Some(archived_block_bytes) = archiver
298 .last_archived_block
299 .expect("Just inserted; qed")
300 .partial_archived()
301 {
302 let archived_block_bytes = archived_block_bytes.get();
303 let encoded_block_bytes = u32::try_from(encoded_block.len())
304 .expect("Blocks length is never bigger than u32; qed");
305
306 match encoded_block_bytes.cmp(&archived_block_bytes) {
307 Ordering::Less => {
308 return Err(ArchiverInstantiationError::InvalidBlockSmallSize {
309 block_bytes: encoded_block_bytes,
310 archived_block_bytes,
311 });
312 }
313 Ordering::Equal => {
314 return Err(ArchiverInstantiationError::InvalidLastArchivedBlock(
315 encoded_block_bytes,
316 ));
317 }
318 Ordering::Greater => {
319 block_objects.retain_mut(|block_object: &mut BlockObject| {
322 if block_object.offset >= archived_block_bytes {
323 block_object.offset -= archived_block_bytes;
324 true
325 } else {
326 false
327 }
328 });
329 archiver.buffer.push_back(SegmentItem::BlockContinuation {
330 bytes: BlockBytes(
331 encoded_block[(archived_block_bytes as usize)..].to_vec(),
332 ),
333 block_objects,
334 });
335 }
336 }
337 }
338
339 Ok(archiver)
340 }
341
342 pub fn last_archived_block_number(&self) -> Option<BlockNumber> {
344 self.last_archived_block
345 .map(|last_archived_block| last_archived_block.number())
346 }
347
348 pub fn add_block(
353 &mut self,
354 bytes: Vec<u8>,
355 block_objects: Vec<BlockObject>,
356 ) -> Option<ArchiveBlockOutcome> {
357 if !(1..u32::MAX as usize).contains(&bytes.len()) {
358 return None;
359 }
360
361 self.buffer.push_back(SegmentItem::Block {
363 bytes: BlockBytes(bytes),
364 block_objects,
365 });
366
367 let mut archived_segments = Vec::new();
368 let mut object_mapping = Vec::new();
369
370 while let Some(mut segment) = self.produce_segment() {
372 object_mapping.extend(Self::produce_object_mappings(
374 self.segment_index,
375 segment.items.iter_mut(),
376 ));
377 archived_segments.push(self.produce_archived_segment(segment));
378 }
379
380 object_mapping.extend(self.produce_next_segment_mappings());
382
383 Some(ArchiveBlockOutcome {
384 archived_segments,
385 global_objects: object_mapping,
386 })
387 }
388
389 fn produce_segment(&mut self) -> Option<Segment> {
392 let mut segment = Segment {
393 items: Vec::with_capacity(self.buffer.len()),
394 };
395
396 let mut last_archived_block = self.last_archived_block;
397
398 let mut segment_size = segment.encoded_size();
399
400 while RecordedHistorySegment::SIZE.saturating_sub(segment_size) >= 6 {
405 let segment_item = match self.buffer.pop_front() {
406 Some(segment_item) => segment_item,
407 None => {
408 for segment_item in segment.items.into_iter().rev() {
410 self.buffer.push_front(segment_item);
411 }
412
413 return None;
414 }
415 };
416
417 let segment_item_encoded_size = segment_item.encoded_size();
418 segment_size += segment_item_encoded_size;
419
420 let spill_over = segment_size.saturating_sub(RecordedHistorySegment::SIZE);
422
423 let segment_item = match segment_item {
424 SegmentItem::Padding => {
425 unreachable!("Buffer never contains SegmentItem::Padding; qed");
426 }
427 SegmentItem::Block {
428 mut bytes,
429 mut block_objects,
430 } => {
431 let last_archived_block =
432 if let Some(last_archived_block) = &mut last_archived_block {
433 last_archived_block
436 .number
437 .replace(last_archived_block.number() + BlockNumber::ONE);
438 last_archived_block.set_complete();
439 last_archived_block
440 } else {
441 last_archived_block.insert(LastArchivedBlock {
443 number: BlockNumber::ZERO.into(),
444 archived_progress: ArchivedBlockProgress::new_complete(),
445 })
446 };
447
448 if spill_over == 0 {
449 SegmentItem::Block {
450 bytes,
451 block_objects,
452 }
453 } else {
454 let split_point = bytes.len() - spill_over;
455
456 {
457 let continuation_bytes = bytes[split_point..].to_vec();
458 let continuation_block_objects = block_objects
459 .extract_if(.., |block_object: &mut BlockObject| {
460 if block_object.offset >= split_point as u32 {
461 block_object.offset -= split_point as u32;
462 true
463 } else {
464 false
465 }
466 })
467 .collect();
468
469 self.buffer.push_front(SegmentItem::BlockContinuation {
472 bytes: BlockBytes(continuation_bytes),
473 block_objects: continuation_block_objects,
474 });
475 }
476
477 bytes.truncate(split_point);
478 let archived_bytes = u32::try_from(split_point)
480 .ok()
481 .and_then(NonZeroU32::new)
482 .expect(
483 "`::add_block()` method ensures block is not empty and doesn't \
484 exceed `u32::MAX`; qed",
485 );
486 last_archived_block.set_partial_archived(archived_bytes);
487
488 SegmentItem::BlockStart {
489 bytes,
490 block_objects,
491 }
492 }
493 }
494 SegmentItem::BlockStart { .. } => {
495 unreachable!("Buffer never contains SegmentItem::BlockStart; qed");
496 }
497 SegmentItem::BlockContinuation {
498 mut bytes,
499 mut block_objects,
500 } => {
501 let last_archived_block = last_archived_block.as_mut().expect(
502 "Block continuation implies that there are some bytes archived \
503 already; qed",
504 );
505
506 let previously_archived_bytes = last_archived_block.partial_archived().expect(
507 "Block continuation implies that there are some bytes archived \
508 already; qed",
509 );
510
511 if spill_over == 0 {
512 last_archived_block.set_complete();
513
514 SegmentItem::BlockContinuation {
515 bytes,
516 block_objects,
517 }
518 } else {
519 let split_point = bytes.len() - spill_over;
520
521 {
522 let continuation_bytes = bytes[split_point..].to_vec();
523 let continuation_block_objects = block_objects
524 .extract_if(.., |block_object: &mut BlockObject| {
525 if block_object.offset >= split_point as u32 {
526 block_object.offset -= split_point as u32;
527 true
528 } else {
529 false
530 }
531 })
532 .collect();
533 self.buffer.push_front(SegmentItem::BlockContinuation {
536 bytes: BlockBytes(continuation_bytes),
537 block_objects: continuation_block_objects,
538 });
539 }
540
541 bytes.truncate(split_point);
542 let archived_bytes = previously_archived_bytes.get()
544 + u32::try_from(split_point).expect(
545 "`::add_block()` method ensures block length doesn't \
546 exceed `u32::MAX`; qed",
547 );
548 let archived_bytes = NonZeroU32::new(archived_bytes).expect(
549 "Spillover means non-zero length of the block was archived; qed",
550 );
551 last_archived_block.set_partial_archived(archived_bytes);
552
553 SegmentItem::BlockContinuation {
554 bytes,
555 block_objects,
556 }
557 }
558 }
559 SegmentItem::ParentSegmentHeader(parent_segment_header) => {
560 SegmentItem::ParentSegmentHeader(parent_segment_header)
562 }
563 };
564
565 segment.items.push(segment_item);
566 }
567
568 self.last_archived_block = last_archived_block;
569
570 Some(segment)
571 }
572
573 fn produce_next_segment_mappings(&mut self) -> Vec<GlobalObject> {
579 Self::produce_object_mappings(self.segment_index, self.buffer.iter_mut())
580 }
581
582 fn produce_object_mappings<'a>(
587 segment_index: SegmentIndex,
588 items: impl Iterator<Item = &'a mut SegmentItem>,
589 ) -> Vec<GlobalObject> {
590 let source_piece_indexes =
591 &segment_index.segment_piece_indexes()[..RecordedHistorySegment::NUM_RAW_RECORDS];
592
593 let mut corrected_object_mapping = Vec::new();
594 let mut base_offset_in_segment = Segment::default().encoded_size();
595 for segment_item in items {
596 match segment_item {
597 SegmentItem::Padding => {
598 unreachable!(
599 "Segment during archiving never contains SegmentItem::Padding; qed"
600 );
601 }
602 SegmentItem::Block {
603 bytes: _,
604 block_objects,
605 }
606 | SegmentItem::BlockStart {
607 bytes: _,
608 block_objects,
609 }
610 | SegmentItem::BlockContinuation {
611 bytes: _,
612 block_objects,
613 } => {
614 for block_object in block_objects.drain(..) {
615 let offset_in_segment = base_offset_in_segment
617 + 1
618 + u32::encoded_fixed_size().expect("Fixed size; qed")
619 + block_object.offset as usize;
620 let raw_piece_offset = (offset_in_segment % Record::SIZE)
621 .try_into()
622 .expect("Offset within piece should always fit in 32-bit integer; qed");
623 corrected_object_mapping.push(GlobalObject {
624 hash: block_object.hash,
625 piece_index: source_piece_indexes[offset_in_segment / Record::SIZE],
626 offset: raw_piece_offset,
627 });
628 }
629 }
630 SegmentItem::ParentSegmentHeader(_) => {
631 }
633 }
634
635 base_offset_in_segment += segment_item.encoded_size();
636 }
637
638 corrected_object_mapping
639 }
640
641 fn produce_archived_segment(&mut self, segment: Segment) -> NewArchivedSegment {
643 let mut pieces = {
644 let mut pieces = ArchivedHistorySegment::default();
645
646 segment.encode_to(&mut ArchivedHistorySegmentOutput {
647 segment: &mut pieces,
648 offset: 0,
649 });
650 drop(segment);
652
653 let (source_shards, parity_shards) =
654 pieces.split_at_mut(RecordedHistorySegment::NUM_RAW_RECORDS);
655
656 self.erasure_coding
657 .extend(
658 source_shards.iter().map(|shard| shard.record()),
659 parity_shards.iter_mut().map(|shard| shard.record_mut()),
660 )
661 .expect("Statically correct parameters; qed");
662
663 pieces
664 };
665
666 let record_roots = {
668 #[cfg(not(feature = "parallel"))]
669 let source_pieces = pieces.iter_mut();
670 #[cfg(feature = "parallel")]
671 let source_pieces = pieces.par_iter_mut();
672
673 let iter = source_pieces.map(|piece| {
679 let [source_chunks_root, parity_chunks_root] = {
680 let mut parity_chunks = Record::new_boxed();
681
682 self.erasure_coding
683 .extend(piece.record().iter(), parity_chunks.iter_mut())
684 .expect(
685 "Erasure coding instance is deliberately configured to support this \
686 input; qed",
687 );
688
689 let source_chunks_root = BalancedMerkleTree::compute_root_only(piece.record());
690 let parity_chunks_root = BalancedMerkleTree::compute_root_only(&parity_chunks);
691
692 [source_chunks_root, parity_chunks_root]
693 };
694
695 let record_root = BalancedMerkleTree::compute_root_only(&[
696 source_chunks_root,
697 parity_chunks_root,
698 ]);
699
700 piece.root_mut().copy_from_slice(&record_root);
701 piece
702 .parity_chunks_root_mut()
703 .copy_from_slice(&parity_chunks_root);
704
705 record_root
706 });
707
708 iter.collect::<Vec<_>>()
709 };
710
711 let segment_merkle_tree =
712 BalancedMerkleTree::<{ ArchivedHistorySegment::NUM_PIECES }>::new_boxed(
713 record_roots
714 .as_slice()
715 .try_into()
716 .expect("Statically guaranteed to have correct length; qed"),
717 );
718
719 let segment_root = SegmentRoot::from(segment_merkle_tree.root());
720
721 pieces
723 .iter_mut()
724 .zip(segment_merkle_tree.all_proofs())
725 .for_each(|(piece, record_proof)| {
726 piece.proof_mut().copy_from_slice(&record_proof);
727 });
728
729 let segment_header = SegmentHeader {
731 segment_index: self.segment_index.into(),
732 segment_root,
733 prev_segment_header_hash: self.prev_segment_header_hash,
734 last_archived_block: self
735 .last_archived_block
736 .expect("Never empty by the time segment is produced; qed"),
737 };
738
739 self.segment_index += SegmentIndex::ONE;
741 self.prev_segment_header_hash = segment_header.hash();
742
743 self.buffer
746 .push_front(SegmentItem::ParentSegmentHeader(segment_header));
747
748 NewArchivedSegment {
749 segment_header,
750 pieces: pieces.to_shared(),
751 }
752 }
753}