ab_aligned_buffer/
lib.rs

1#![feature(box_vec_non_null, pointer_is_aligned_to, ptr_as_ref_unchecked)]
2#![no_std]
3
4#[cfg(test)]
5mod tests;
6
7extern crate alloc;
8
9use ab_io_type::MAX_ALIGNMENT;
10use alloc::alloc::realloc;
11use alloc::boxed::Box;
12use core::alloc::Layout;
13use core::mem::MaybeUninit;
14use core::ptr::NonNull;
15use core::slice;
16use core::sync::atomic::{AtomicU32, Ordering};
17
18const _: () = {
19    assert!(
20        align_of::<u128>() == size_of::<u128>(),
21        "Size and alignment are both 16 bytes"
22    );
23    assert!(
24        align_of::<u128>() == MAX_ALIGNMENT as usize,
25        "Alignment of u128 is max alignment"
26    );
27    assert!(size_of::<u128>() >= size_of::<AtomicU32>());
28    assert!(align_of::<u128>() >= align_of::<AtomicU32>());
29};
30
31#[repr(C, align(16))]
32struct ConstInnerBuffer {
33    strong_count: AtomicU32,
34}
35
36const _: () = {
37    assert!(align_of::<ConstInnerBuffer>() == align_of::<u128>());
38    assert!(size_of::<ConstInnerBuffer>() == size_of::<u128>());
39};
40
41static EMPTY_SHARED_ALIGNED_BUFFER: SharedAlignedBuffer = SharedAlignedBuffer {
42    inner: InnerBuffer {
43        buffer: NonNull::from_ref({
44            static BUFFER: MaybeUninit<ConstInnerBuffer> = MaybeUninit::new(ConstInnerBuffer {
45                strong_count: AtomicU32::new(1),
46            });
47
48            &BUFFER
49        })
50        .cast::<MaybeUninit<u128>>(),
51        capacity: 0,
52        len: 0,
53    },
54};
55
56#[derive(Debug)]
57struct InnerBuffer {
58    // The first bytes are allocated for `strong_count`
59    buffer: NonNull<MaybeUninit<u128>>,
60    capacity: u32,
61    len: u32,
62}
63
64// SAFETY: Heap-allocated memory buffer can be used from any thread
65unsafe impl Send for InnerBuffer {}
66// SAFETY: Heap-allocated memory buffer can be used from any thread
67unsafe impl Sync for InnerBuffer {}
68
69impl Default for InnerBuffer {
70    #[inline(always)]
71    fn default() -> Self {
72        EMPTY_SHARED_ALIGNED_BUFFER.inner.clone()
73    }
74}
75
76impl Clone for InnerBuffer {
77    #[inline(always)]
78    fn clone(&self) -> Self {
79        self.strong_count_ref().fetch_add(1, Ordering::AcqRel);
80
81        Self {
82            buffer: self.buffer,
83            capacity: self.capacity,
84            len: self.len,
85        }
86    }
87}
88
89impl Drop for InnerBuffer {
90    #[inline(always)]
91    fn drop(&mut self) {
92        if self.strong_count_ref().fetch_sub(1, Ordering::AcqRel) == 1 {
93            // SAFETY: Created from `Box` in constructor
94            let _ = unsafe {
95                Box::from_non_null(NonNull::slice_from_raw_parts(
96                    self.buffer,
97                    1 + (self.capacity as usize).div_ceil(size_of::<u128>()),
98                ))
99            };
100        }
101    }
102}
103
104impl InnerBuffer {
105    /// Allocates a new buffer + one `u128` worth of memory at the beginning for
106    /// `strong_count` in case it is later converted to [`SharedAlignedBuffer`].
107    ///
108    /// `strong_count` field is automatically initialized as `1`.
109    #[inline(always)]
110    fn allocate(capacity: u32) -> Self {
111        let buffer = Box::into_non_null(Box::<[u128]>::new_uninit_slice(
112            1 + (capacity as usize).div_ceil(size_of::<u128>()),
113        ));
114        // SAFETY: The first bytes are allocated for `strong_count`, which is a correctly aligned
115        // copy type
116        unsafe { buffer.cast::<AtomicU32>().write(AtomicU32::new(1)) };
117        Self {
118            buffer: buffer.cast::<MaybeUninit<u128>>(),
119            capacity,
120            len: 0,
121        }
122    }
123
124    #[inline(always)]
125    fn resize(&mut self, capacity: u32) {
126        // SAFETY: Non-null correctly aligned pointer, correct size
127        let layout = Layout::for_value(unsafe {
128            slice::from_raw_parts(
129                self.buffer.as_ptr(),
130                1 + (self.capacity as usize).div_ceil(size_of::<u128>()),
131            )
132        });
133
134        // `size_of::<u128>()` is added because the first bytes are allocated for `strong_count`
135        let new_size = size_of::<u128>() + (capacity as usize).next_multiple_of(layout.align());
136
137        // SAFETY: Allocated with global allocator, correct layout, non-zero size that is a
138        // multiple of alignment
139        let new_ptr = unsafe {
140            realloc(self.buffer.as_ptr().cast::<u8>(), layout, new_size).cast::<MaybeUninit<u128>>()
141        };
142        let Some(new_ptr) = NonNull::new(new_ptr) else {
143            panic!("Realloc from {} to {new_size} have failed", self.capacity());
144        };
145
146        self.buffer = new_ptr;
147        self.capacity = capacity;
148    }
149
150    #[inline(always)]
151    fn len(&self) -> u32 {
152        self.len
153    }
154
155    /// `len` bytes must be initialized
156    #[inline(always)]
157    unsafe fn set_len(&mut self, len: u32) {
158        self.len = len;
159    }
160
161    #[inline(always)]
162    fn capacity(&self) -> u32 {
163        self.capacity
164    }
165
166    #[inline(always)]
167    fn strong_count_ref(&self) -> &AtomicU32 {
168        // SAFETY: The first bytes are allocated for `strong_count`, which is a correctly aligned
169        // copy type initialized in the constructor
170        unsafe { self.buffer.as_ptr().cast::<AtomicU32>().as_ref_unchecked() }
171    }
172
173    #[inline(always)]
174    fn as_slice(&self) -> &[u8] {
175        let len = self.len() as usize;
176        // SAFETY: Not null and length is a protected invariant of the implementation
177        unsafe { slice::from_raw_parts(self.as_ptr(), len) }
178    }
179
180    #[inline(always)]
181    fn as_mut_slice(&mut self) -> &mut [u8] {
182        let len = self.len() as usize;
183        // SAFETY: Not null and length is a protected invariant of the implementation
184        unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), len) }
185    }
186
187    #[inline(always)]
188    fn as_ptr(&self) -> *const u8 {
189        // SAFETY: Constructor allocates the first element for `strong_count`
190        unsafe { self.buffer.as_ptr().cast_const().add(1).cast::<u8>() }
191    }
192
193    #[inline(always)]
194    fn as_mut_ptr(&mut self) -> *mut u8 {
195        // SAFETY: Constructor allocates the first element for `strong_count`
196        unsafe { self.buffer.as_ptr().add(1).cast::<u8>() }
197    }
198}
199
200/// Owned aligned buffer for executor purposes.
201///
202/// See [`SharedAlignedBuffer`] for a version that can be cheaply cloned, while reusing the original
203/// allocation.
204///
205/// Data is aligned to 16 bytes (128 bits), which is the largest alignment required by primitive
206/// types and by extension any type that implements `TrivialType`/`IoType`.
207#[derive(Debug)]
208pub struct OwnedAlignedBuffer {
209    inner: InnerBuffer,
210}
211
212impl Clone for OwnedAlignedBuffer {
213    #[inline(always)]
214    fn clone(&self) -> Self {
215        let mut new_instance = Self::with_capacity(self.capacity());
216        new_instance.copy_from_slice(self.as_slice());
217        new_instance
218    }
219}
220
221impl OwnedAlignedBuffer {
222    /// Create a new instance with at least specified capacity.
223    ///
224    /// NOTE: Actual capacity might be larger due to alignment requirements.
225    #[inline(always)]
226    pub fn with_capacity(capacity: u32) -> Self {
227        Self {
228            inner: InnerBuffer::allocate(capacity),
229        }
230    }
231
232    /// Create a new instance from provided bytes.
233    ///
234    /// # Panics
235    /// If `bytes.len()` doesn't fit into `u32`
236    #[inline(always)]
237    pub fn from_bytes(bytes: &[u8]) -> Self {
238        let mut instance = Self::with_capacity(0);
239        instance.copy_from_slice(bytes);
240        instance
241    }
242
243    #[inline(always)]
244    pub fn as_slice(&self) -> &[u8] {
245        self.inner.as_slice()
246    }
247
248    #[inline(always)]
249    pub fn as_mut_slice(&mut self) -> &mut [u8] {
250        self.inner.as_mut_slice()
251    }
252
253    #[inline(always)]
254    pub fn as_ptr(&self) -> *const u8 {
255        self.inner.as_ptr()
256    }
257
258    #[inline(always)]
259    pub fn as_mut_ptr(&mut self) -> *mut u8 {
260        self.inner.as_mut_ptr()
261    }
262
263    #[inline(always)]
264    pub fn into_shared(self) -> SharedAlignedBuffer {
265        SharedAlignedBuffer { inner: self.inner }
266    }
267
268    /// Ensure capacity of the buffer is at least `capacity`.
269    ///
270    /// Will re-allocate if necessary.
271    #[inline(always)]
272    pub fn ensure_capacity(&mut self, capacity: u32) {
273        if capacity > self.capacity() {
274            self.inner.resize(capacity)
275        }
276    }
277
278    /// Will re-allocate if capacity is not enough to store provided bytes.
279    ///
280    /// # Panics
281    /// If `bytes.len()` doesn't fit into `u32`
282    #[inline(always)]
283    pub fn copy_from_slice(&mut self, bytes: &[u8]) {
284        let Ok(len) = u32::try_from(bytes.len()) else {
285            panic!("Too many bytes {}", bytes.len());
286        };
287
288        if len > self.capacity() {
289            self.inner
290                .resize(len.max(self.capacity().saturating_mul(2)));
291        }
292
293        // SAFETY: Sufficient capacity guaranteed above, natural alignment of bytes is 1 for input
294        // and output, non-overlapping allocations guaranteed by type system
295        unsafe {
296            self.as_mut_ptr()
297                .copy_from_nonoverlapping(bytes.as_ptr(), bytes.len());
298
299            self.inner.set_len(len);
300        }
301    }
302
303    /// Will re-allocate if capacity is not enough to store provided bytes.
304    ///
305    /// Returns `false` if `self.len() + bytes.len()` doesn't fit into `u32`.
306    #[inline(always)]
307    #[must_use]
308    pub fn append(&mut self, bytes: &[u8]) -> bool {
309        let Ok(len) = u32::try_from(bytes.len()) else {
310            return false;
311        };
312
313        let Some(new_len) = self.len().checked_add(len) else {
314            return false;
315        };
316
317        if new_len > self.capacity() {
318            self.inner
319                .resize(new_len.max(self.capacity().saturating_mul(2)));
320        }
321
322        // SAFETY: Sufficient capacity guaranteed above, natural alignment of bytes is 1 for input
323        // and output, non-overlapping allocations guaranteed by type system
324        unsafe {
325            self.as_mut_ptr()
326                .add(self.len() as usize)
327                .copy_from_nonoverlapping(bytes.as_ptr(), bytes.len());
328
329            self.inner.set_len(new_len);
330        }
331
332        true
333    }
334
335    #[inline(always)]
336    pub fn is_empty(&self) -> bool {
337        self.inner.len() == 0
338    }
339
340    #[inline(always)]
341    pub fn len(&self) -> u32 {
342        self.inner.len()
343    }
344
345    #[inline(always)]
346    pub fn capacity(&self) -> u32 {
347        self.inner.capacity()
348    }
349
350    /// Set the length of the useful data to specified value.
351    ///
352    /// # Safety
353    /// There must be `new_len` bytes initialized in the buffer.
354    ///
355    /// # Panics
356    /// If `bytes.len()` doesn't fit into `u32`
357    #[inline(always)]
358    pub unsafe fn set_len(&mut self, new_len: u32) {
359        debug_assert!(
360            new_len <= self.capacity(),
361            "Too many bytes {} > {}",
362            new_len,
363            self.capacity()
364        );
365        // SAFETY: Guaranteed by method contract
366        unsafe {
367            self.inner.set_len(new_len);
368        }
369    }
370}
371
372/// Shared aligned buffer for executor purposes.
373///
374/// See [`OwnedAlignedBuffer`] for a version that can be mutated.
375///
376/// Data is aligned to 16 bytes (128 bits), which is the largest alignment required by primitive
377/// types and by extension any type that implements `TrivialType`/`IoType`.
378///
379/// NOTE: Counter for number of shared instances is `u32` and will wrap around if exceeded breaking
380/// internal invariants (which is extremely unlikely, but still).
381#[derive(Debug, Default, Clone)]
382pub struct SharedAlignedBuffer {
383    inner: InnerBuffer,
384}
385
386// SAFETY: Heap-allocated memory buffer and atomic can be used from any thread
387unsafe impl Send for SharedAlignedBuffer {}
388// SAFETY: Heap-allocated memory buffer and atomic can be used from any thread
389unsafe impl Sync for SharedAlignedBuffer {}
390
391impl SharedAlignedBuffer {
392    /// Static reference to an empty buffer
393    #[inline(always)]
394    pub fn empty_ref() -> &'static Self {
395        &EMPTY_SHARED_ALIGNED_BUFFER
396    }
397
398    /// Create a new instance from provided bytes.
399    ///
400    /// # Panics
401    /// If `bytes.len()` doesn't fit into `u32`
402    #[inline(always)]
403    pub fn from_bytes(bytes: &[u8]) -> Self {
404        OwnedAlignedBuffer::from_bytes(bytes).into_shared()
405    }
406
407    /// Convert into owned buffer.
408    ///
409    /// If this is the last shared instance, then allocation will be reused, otherwise new
410    /// allocation will be created.
411    ///
412    /// Returns `None` if there exit other shared instances.
413    #[inline(always)]
414    pub fn into_owned(self) -> OwnedAlignedBuffer {
415        if self.inner.strong_count_ref().load(Ordering::Acquire) == 1 {
416            OwnedAlignedBuffer { inner: self.inner }
417        } else {
418            OwnedAlignedBuffer::from_bytes(self.as_slice())
419        }
420    }
421
422    #[inline(always)]
423    pub fn as_slice(&self) -> &[u8] {
424        self.inner.as_slice()
425    }
426
427    #[inline(always)]
428    pub fn as_ptr(&self) -> *const u8 {
429        self.inner.as_ptr()
430    }
431
432    #[inline(always)]
433    pub fn is_empty(&self) -> bool {
434        self.inner.len() == 0
435    }
436
437    #[inline(always)]
438    pub fn len(&self) -> u32 {
439        self.inner.len()
440    }
441}