1use crate::objects::{BlockObject, GlobalObject};
2use ab_core_primitives::block::BlockNumber;
3use ab_core_primitives::hashes::Blake3Hash;
4use ab_core_primitives::pieces::{PiecePosition, Record};
5use ab_core_primitives::segments::{
6 ArchivedBlockProgress, ArchivedHistorySegment, LastArchivedBlock, LocalSegmentIndex,
7 RecordedHistorySegment, SegmentHeader, SegmentRoot,
8};
9use ab_erasure_coding::ErasureCoding;
10use ab_merkle_tree::balanced::BalancedMerkleTree;
11use alloc::collections::VecDeque;
12use alloc::vec;
13use alloc::vec::Vec;
14use core::cmp::Ordering;
15use core::num::NonZeroU32;
16use core::ops::Deref;
17use parity_scale_codec::{Decode, Encode, Input, Output};
18#[cfg(feature = "parallel")]
19use rayon::prelude::*;
20
21struct ArchivedHistorySegmentOutput<'a> {
22 segment: &'a mut ArchivedHistorySegment,
23 offset: usize,
24}
25
26impl Output for ArchivedHistorySegmentOutput<'_> {
27 #[inline]
28 fn write(&mut self, mut bytes: &[u8]) {
29 while !bytes.is_empty() {
30 let piece = self
31 .segment
32 .get_mut(self.offset / Record::SIZE)
33 .expect("Encoding never exceeds the segment size; qed");
34 let output = &mut piece.record_mut().as_flattened_mut()[self.offset % Record::SIZE..];
35 let bytes_to_write = output.len().min(bytes.len());
36 output[..bytes_to_write].copy_from_slice(&bytes[..bytes_to_write]);
37 self.offset += bytes_to_write;
38 bytes = &bytes[bytes_to_write..];
39 }
40 }
41}
42
43#[derive(Debug, Default, Clone, Eq, PartialEq)]
45pub struct Segment {
46 pub items: Vec<SegmentItem>,
48}
49
50impl Encode for Segment {
51 #[inline(always)]
52 fn size_hint(&self) -> usize {
53 RecordedHistorySegment::SIZE
54 }
55
56 #[inline]
57 fn encode_to<O: Output + ?Sized>(&self, dest: &mut O) {
58 for item in &self.items {
59 item.encode_to(dest);
60 }
61 }
62}
63
64impl Decode for Segment {
65 #[inline]
66 fn decode<I: Input>(input: &mut I) -> Result<Self, parity_scale_codec::Error> {
67 let mut items = Vec::new();
68 loop {
69 match input.remaining_len()? {
70 Some(0) => {
71 break;
72 }
73 Some(_) => {
74 }
76 None => {
77 return Err(
78 "Source doesn't report remaining length, decoding not possible".into(),
79 );
80 }
81 }
82
83 match SegmentItem::decode(input) {
84 Ok(item) => {
85 items.push(item);
86 }
87 Err(error) => {
88 return Err(error.chain("Could not decode `Segment::items`"));
89 }
90 }
91 }
92
93 Ok(Self { items })
94 }
95}
96
97#[derive(Debug, Clone, Eq, PartialEq)]
100pub struct BlockBytes(Vec<u8>);
101
102impl Deref for BlockBytes {
103 type Target = [u8];
104
105 #[inline(always)]
106 fn deref(&self) -> &Self::Target {
107 &self.0
108 }
109}
110
111impl From<BlockBytes> for Vec<u8> {
112 #[inline(always)]
113 fn from(value: BlockBytes) -> Self {
114 value.0
115 }
116}
117
118impl Encode for BlockBytes {
119 #[inline(always)]
120 fn size_hint(&self) -> usize {
121 size_of::<u32>() + self.0.len()
122 }
123
124 #[inline]
125 fn encode_to<O: Output + ?Sized>(&self, dest: &mut O) {
126 let length = u32::try_from(self.0.len())
127 .expect("All constructors guarantee the size doesn't exceed `u32`; qed");
128
129 length.encode_to(dest);
130 dest.write(&self.0);
131 }
132}
133
134impl Decode for BlockBytes {
135 #[inline]
136 fn decode<I: Input>(input: &mut I) -> Result<Self, parity_scale_codec::Error> {
137 let length = u32::decode(input)?;
138 if length as usize > (RecordedHistorySegment::SIZE - size_of::<u32>()) {
139 return Err("Segment item size is impossibly large".into());
140 }
141 let mut bytes = vec![0; length as usize];
145 input.read(&mut bytes)?;
146 Ok(Self(bytes))
147 }
148}
149
150impl BlockBytes {
151 #[inline(always)]
152 fn truncate(&mut self, size: usize) {
153 self.0.truncate(size)
154 }
155}
156
157#[derive(Debug, Clone, Eq, PartialEq, Encode, Decode)]
159pub enum SegmentItem {
160 #[codec(index = 0)]
162 Padding,
163 #[codec(index = 1)]
165 Block {
166 bytes: BlockBytes,
168 #[doc(hidden)]
170 #[codec(skip)]
171 block_objects: Vec<BlockObject>,
172 },
173 #[codec(index = 2)]
175 BlockStart {
176 bytes: BlockBytes,
178 #[doc(hidden)]
180 #[codec(skip)]
181 block_objects: Vec<BlockObject>,
182 },
183 #[codec(index = 3)]
185 BlockContinuation {
186 bytes: BlockBytes,
188 #[doc(hidden)]
190 #[codec(skip)]
191 block_objects: Vec<BlockObject>,
192 },
193 #[codec(index = 4)]
195 ParentSegmentHeader(SegmentHeader),
196}
197
198#[derive(Debug, Clone, Eq, PartialEq)]
201pub struct NewArchivedSegment {
202 pub segment_header: SegmentHeader,
204 pub pieces: ArchivedHistorySegment,
206}
207
208#[derive(Debug, Clone, Eq, PartialEq)]
210pub struct ArchiveBlockOutcome {
211 pub archived_segments: Vec<NewArchivedSegment>,
214
215 pub global_objects: Vec<GlobalObject>,
218}
219
220#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, thiserror::Error)]
222pub enum ArchiverInstantiationError {
223 #[error("Invalid last archived block, its size {0} bytes is the same as the encoded block")]
226 InvalidLastArchivedBlock(u32),
227 #[error(
229 "Invalid block, its size {block_bytes} bytes is smaller than the already archived block \
230 {archived_block_bytes} bytes"
231 )]
232 InvalidBlockSmallSize {
233 block_bytes: u32,
235 archived_block_bytes: u32,
237 },
238}
239
240#[derive(Debug, Clone)]
251pub struct Archiver {
252 buffer: VecDeque<SegmentItem>,
255 erasure_coding: ErasureCoding,
257 segment_index: LocalSegmentIndex,
259 prev_segment_header_hash: Blake3Hash,
261 last_archived_block: Option<LastArchivedBlock>,
263}
264
265impl Archiver {
266 pub fn new(erasure_coding: ErasureCoding) -> Self {
268 Self {
269 buffer: VecDeque::default(),
270 erasure_coding,
271 segment_index: LocalSegmentIndex::ZERO,
272 prev_segment_header_hash: Blake3Hash::default(),
273 last_archived_block: None,
274 }
275 }
276
277 pub fn with_initial_state(
281 erasure_coding: ErasureCoding,
282 segment_header: SegmentHeader,
283 encoded_block: &[u8],
284 mut block_objects: Vec<BlockObject>,
285 ) -> Result<Self, ArchiverInstantiationError> {
286 let mut archiver = Self::new(erasure_coding);
287
288 archiver.segment_index = segment_header.local_segment_index() + LocalSegmentIndex::ONE;
289 archiver.prev_segment_header_hash = segment_header.hash();
290 archiver.last_archived_block = Some(segment_header.last_archived_block);
291
292 archiver
294 .buffer
295 .push_back(SegmentItem::ParentSegmentHeader(segment_header));
296
297 if let Some(archived_block_bytes) = archiver
298 .last_archived_block
299 .expect("Just inserted; qed")
300 .partial_archived()
301 {
302 let archived_block_bytes = archived_block_bytes.get();
303 let encoded_block_bytes = u32::try_from(encoded_block.len())
304 .expect("Blocks length is never bigger than u32; qed");
305
306 match encoded_block_bytes.cmp(&archived_block_bytes) {
307 Ordering::Less => {
308 return Err(ArchiverInstantiationError::InvalidBlockSmallSize {
309 block_bytes: encoded_block_bytes,
310 archived_block_bytes,
311 });
312 }
313 Ordering::Equal => {
314 return Err(ArchiverInstantiationError::InvalidLastArchivedBlock(
315 encoded_block_bytes,
316 ));
317 }
318 Ordering::Greater => {
319 block_objects.retain_mut(|block_object: &mut BlockObject| {
322 if block_object.offset >= archived_block_bytes {
323 block_object.offset -= archived_block_bytes;
324 true
325 } else {
326 false
327 }
328 });
329 archiver.buffer.push_back(SegmentItem::BlockContinuation {
330 bytes: BlockBytes(
331 encoded_block[(archived_block_bytes as usize)..].to_vec(),
332 ),
333 block_objects,
334 });
335 }
336 }
337 }
338
339 Ok(archiver)
340 }
341
342 pub fn last_archived_block_number(&self) -> Option<BlockNumber> {
344 self.last_archived_block
345 .map(|last_archived_block| last_archived_block.number())
346 }
347
348 pub fn add_block(
353 &mut self,
354 bytes: Vec<u8>,
355 block_objects: Vec<BlockObject>,
356 ) -> Option<ArchiveBlockOutcome> {
357 if !(1..u32::MAX as usize).contains(&bytes.len()) {
358 return None;
359 }
360
361 self.buffer.push_back(SegmentItem::Block {
363 bytes: BlockBytes(bytes),
364 block_objects,
365 });
366
367 let mut archived_segments = Vec::new();
368 let mut object_mapping = Vec::new();
369
370 while let Some(mut segment) = self.produce_segment() {
372 object_mapping.extend(Self::produce_object_mappings(segment.items.iter_mut()));
374 archived_segments.push(self.produce_archived_segment(segment));
375 }
376
377 object_mapping.extend(self.produce_next_segment_mappings());
379
380 Some(ArchiveBlockOutcome {
381 archived_segments,
382 global_objects: object_mapping,
383 })
384 }
385
386 fn produce_segment(&mut self) -> Option<Segment> {
389 let mut segment = Segment {
390 items: Vec::with_capacity(self.buffer.len()),
391 };
392
393 let mut last_archived_block = self.last_archived_block;
394
395 let mut segment_size = segment.encoded_size();
396
397 while RecordedHistorySegment::SIZE.saturating_sub(segment_size) >= 6 {
402 let segment_item = match self.buffer.pop_front() {
403 Some(segment_item) => segment_item,
404 None => {
405 for segment_item in segment.items.into_iter().rev() {
407 self.buffer.push_front(segment_item);
408 }
409
410 return None;
411 }
412 };
413
414 let segment_item_encoded_size = segment_item.encoded_size();
415 segment_size += segment_item_encoded_size;
416
417 let spill_over = segment_size.saturating_sub(RecordedHistorySegment::SIZE);
419
420 let segment_item = match segment_item {
421 SegmentItem::Padding => {
422 unreachable!("Buffer never contains SegmentItem::Padding; qed");
423 }
424 SegmentItem::Block {
425 mut bytes,
426 mut block_objects,
427 } => {
428 let last_archived_block =
429 if let Some(last_archived_block) = &mut last_archived_block {
430 last_archived_block
433 .number
434 .replace(last_archived_block.number() + BlockNumber::ONE);
435 last_archived_block.set_complete();
436 last_archived_block
437 } else {
438 last_archived_block.insert(LastArchivedBlock {
440 number: BlockNumber::ZERO.into(),
441 archived_progress: ArchivedBlockProgress::new_complete(),
442 })
443 };
444
445 if spill_over == 0 {
446 SegmentItem::Block {
447 bytes,
448 block_objects,
449 }
450 } else {
451 let split_point = bytes.len() - spill_over;
452
453 {
454 let continuation_bytes = bytes[split_point..].to_vec();
455 let continuation_block_objects = block_objects
456 .extract_if(.., |block_object: &mut BlockObject| {
457 if block_object.offset >= split_point as u32 {
458 block_object.offset -= split_point as u32;
459 true
460 } else {
461 false
462 }
463 })
464 .collect();
465
466 self.buffer.push_front(SegmentItem::BlockContinuation {
469 bytes: BlockBytes(continuation_bytes),
470 block_objects: continuation_block_objects,
471 });
472 }
473
474 bytes.truncate(split_point);
475 let archived_bytes = u32::try_from(split_point)
477 .ok()
478 .and_then(NonZeroU32::new)
479 .expect(
480 "`::add_block()` method ensures block is not empty and doesn't \
481 exceed `u32::MAX`; qed",
482 );
483 last_archived_block.set_partial_archived(archived_bytes);
484
485 SegmentItem::BlockStart {
486 bytes,
487 block_objects,
488 }
489 }
490 }
491 SegmentItem::BlockStart { .. } => {
492 unreachable!("Buffer never contains SegmentItem::BlockStart; qed");
493 }
494 SegmentItem::BlockContinuation {
495 mut bytes,
496 mut block_objects,
497 } => {
498 let last_archived_block = last_archived_block.as_mut().expect(
499 "Block continuation implies that there are some bytes archived \
500 already; qed",
501 );
502
503 let previously_archived_bytes = last_archived_block.partial_archived().expect(
504 "Block continuation implies that there are some bytes archived \
505 already; qed",
506 );
507
508 if spill_over == 0 {
509 last_archived_block.set_complete();
510
511 SegmentItem::BlockContinuation {
512 bytes,
513 block_objects,
514 }
515 } else {
516 let split_point = bytes.len() - spill_over;
517
518 {
519 let continuation_bytes = bytes[split_point..].to_vec();
520 let continuation_block_objects = block_objects
521 .extract_if(.., |block_object: &mut BlockObject| {
522 if block_object.offset >= split_point as u32 {
523 block_object.offset -= split_point as u32;
524 true
525 } else {
526 false
527 }
528 })
529 .collect();
530 self.buffer.push_front(SegmentItem::BlockContinuation {
533 bytes: BlockBytes(continuation_bytes),
534 block_objects: continuation_block_objects,
535 });
536 }
537
538 bytes.truncate(split_point);
539 let archived_bytes = previously_archived_bytes.get()
541 + u32::try_from(split_point).expect(
542 "`::add_block()` method ensures block length doesn't \
543 exceed `u32::MAX`; qed",
544 );
545 let archived_bytes = NonZeroU32::new(archived_bytes).expect(
546 "Spillover means non-zero length of the block was archived; qed",
547 );
548 last_archived_block.set_partial_archived(archived_bytes);
549
550 SegmentItem::BlockContinuation {
551 bytes,
552 block_objects,
553 }
554 }
555 }
556 SegmentItem::ParentSegmentHeader(parent_segment_header) => {
557 SegmentItem::ParentSegmentHeader(parent_segment_header)
559 }
560 };
561
562 segment.items.push(segment_item);
563 }
564
565 self.last_archived_block = last_archived_block;
566
567 Some(segment)
568 }
569
570 fn produce_next_segment_mappings(&mut self) -> Vec<GlobalObject> {
576 Self::produce_object_mappings(self.buffer.iter_mut())
577 }
578
579 fn produce_object_mappings<'a>(
584 items: impl Iterator<Item = &'a mut SegmentItem>,
585 ) -> Vec<GlobalObject> {
586 let mut corrected_object_mapping = Vec::new();
587 let mut base_offset_in_segment = Segment::default().encoded_size();
588 for segment_item in items {
589 match segment_item {
590 SegmentItem::Padding => {
591 unreachable!(
592 "Segment during archiving never contains SegmentItem::Padding; qed"
593 );
594 }
595 SegmentItem::Block {
596 bytes: _,
597 block_objects,
598 }
599 | SegmentItem::BlockStart {
600 bytes: _,
601 block_objects,
602 }
603 | SegmentItem::BlockContinuation {
604 bytes: _,
605 block_objects,
606 } => {
607 for block_object in block_objects.drain(..) {
608 let offset_in_segment = base_offset_in_segment
610 + 1
611 + u32::encoded_fixed_size().expect("Fixed size; qed")
612 + block_object.offset as usize;
613 let raw_piece_offset = (offset_in_segment % Record::SIZE)
614 .try_into()
615 .expect("Offset within piece should always fit in 32-bit integer; qed");
616 corrected_object_mapping.push(GlobalObject {
617 hash: block_object.hash,
618 piece_position: PiecePosition::from(
619 (offset_in_segment / Record::SIZE) as u8,
620 ),
621 offset: raw_piece_offset,
622 });
623 }
624 }
625 SegmentItem::ParentSegmentHeader(_) => {
626 }
628 }
629
630 base_offset_in_segment += segment_item.encoded_size();
631 }
632
633 corrected_object_mapping
634 }
635
636 fn produce_archived_segment(&mut self, segment: Segment) -> NewArchivedSegment {
638 let mut pieces = {
639 let mut pieces = ArchivedHistorySegment::default();
640
641 segment.encode_to(&mut ArchivedHistorySegmentOutput {
642 segment: &mut pieces,
643 offset: 0,
644 });
645 drop(segment);
647
648 let (source_shards, parity_shards) =
649 pieces.split_at_mut(RecordedHistorySegment::NUM_RAW_RECORDS);
650
651 self.erasure_coding
652 .extend(
653 source_shards.iter().map(|shard| shard.record()),
654 parity_shards.iter_mut().map(|shard| shard.record_mut()),
655 )
656 .expect("Statically correct parameters; qed");
657
658 pieces
659 };
660
661 let record_roots = {
663 #[cfg(not(feature = "parallel"))]
664 let source_pieces = pieces.iter_mut();
665 #[cfg(feature = "parallel")]
666 let source_pieces = pieces.par_iter_mut();
667
668 let iter = source_pieces.map(|piece| {
674 let [source_chunks_root, parity_chunks_root] = {
675 let mut parity_chunks = Record::new_boxed();
676
677 self.erasure_coding
678 .extend(piece.record().iter(), parity_chunks.iter_mut())
679 .expect(
680 "Erasure coding instance is deliberately configured to support this \
681 input; qed",
682 );
683
684 let source_chunks_root = BalancedMerkleTree::compute_root_only(piece.record());
685 let parity_chunks_root = BalancedMerkleTree::compute_root_only(&parity_chunks);
686
687 [source_chunks_root, parity_chunks_root]
688 };
689
690 let record_root = BalancedMerkleTree::compute_root_only(&[
691 source_chunks_root,
692 parity_chunks_root,
693 ]);
694
695 piece.root_mut().copy_from_slice(&record_root);
696 piece
697 .parity_chunks_root_mut()
698 .copy_from_slice(&parity_chunks_root);
699
700 record_root
701 });
702
703 iter.collect::<Vec<_>>()
704 };
705
706 let segment_merkle_tree =
707 BalancedMerkleTree::<{ ArchivedHistorySegment::NUM_PIECES }>::new_boxed(
708 record_roots
709 .as_slice()
710 .try_into()
711 .expect("Statically guaranteed to have correct length; qed"),
712 );
713
714 let segment_root = SegmentRoot::from(segment_merkle_tree.root());
715
716 pieces
718 .iter_mut()
719 .zip(segment_merkle_tree.all_proofs())
720 .for_each(|(piece, record_proof)| {
721 piece.proof_mut().copy_from_slice(&record_proof);
722 });
723
724 let segment_header = SegmentHeader {
726 segment_index: self.segment_index.into(),
727 segment_root,
728 prev_segment_header_hash: self.prev_segment_header_hash,
729 last_archived_block: self
730 .last_archived_block
731 .expect("Never empty by the time segment is produced; qed"),
732 };
733
734 self.segment_index += LocalSegmentIndex::ONE;
736 self.prev_segment_header_hash = segment_header.hash();
737
738 self.buffer
741 .push_front(SegmentItem::ParentSegmentHeader(segment_header));
742
743 NewArchivedSegment {
744 segment_header,
745 pieces: pieces.to_shared(),
746 }
747 }
748}