Skip to main content

ab_aligned_buffer/
lib.rs

1//! Efficient abstraction for memory buffers aligned to 16 bytes (`u128`) with both owned and shared
2//! variants.
3//!
4//! [`OwnedAlignedBuffer`] represents a memory location aligned to 16 bytes that can be modified.
5//!
6//! [`SharedAlignedBuffer`] can't be modified but supports cheap reference-counting clones (like
7//! `Arc`, but much more efficient).
8//!
9//! Does not require a standard library (`no_std`) but does require allocator and atomics.
10
11#![feature(const_block_items, box_vec_non_null)]
12#![cfg_attr(test, feature(pointer_is_aligned_to))]
13#![no_std]
14
15#[cfg(test)]
16mod tests;
17
18extern crate alloc;
19
20use alloc::alloc::realloc;
21use alloc::boxed::Box;
22use core::alloc::Layout;
23use core::mem::MaybeUninit;
24use core::ops::{Deref, DerefMut};
25use core::ptr::NonNull;
26use core::slice;
27use core::sync::atomic::{AtomicU32, Ordering};
28use stable_deref_trait::{CloneStableDeref, StableDeref};
29use yoke::CloneableCart;
30
31const {
32    assert!(
33        align_of::<u128>() == size_of::<u128>(),
34        "Size and alignment are both 16 bytes"
35    );
36    assert!(size_of::<u128>() >= size_of::<AtomicU32>());
37    assert!(align_of::<u128>() >= align_of::<AtomicU32>());
38}
39
40#[repr(C, align(16))]
41struct ConstInnerBuffer {
42    strong_count: AtomicU32,
43}
44
45const {
46    assert!(align_of::<ConstInnerBuffer>() == align_of::<u128>());
47    assert!(size_of::<ConstInnerBuffer>() == size_of::<u128>());
48}
49
50static EMPTY_SHARED_ALIGNED_BUFFER: SharedAlignedBuffer = SharedAlignedBuffer {
51    inner: InnerBuffer {
52        buffer: NonNull::from_ref({
53            static BUFFER: MaybeUninit<ConstInnerBuffer> = MaybeUninit::new(ConstInnerBuffer {
54                strong_count: AtomicU32::new(1),
55            });
56
57            &BUFFER
58        })
59        .cast::<MaybeUninit<u128>>(),
60        capacity: 0,
61        len: 0,
62    },
63};
64
65#[derive(Debug)]
66struct InnerBuffer {
67    // The first bytes are allocated for `strong_count`
68    buffer: NonNull<MaybeUninit<u128>>,
69    capacity: u32,
70    len: u32,
71}
72
73// SAFETY: Heap-allocated memory buffer can be used from any thread
74unsafe impl Send for InnerBuffer {}
75// SAFETY: Heap-allocated memory buffer can be used from any thread
76unsafe impl Sync for InnerBuffer {}
77
78impl Default for InnerBuffer {
79    #[inline(always)]
80    fn default() -> Self {
81        EMPTY_SHARED_ALIGNED_BUFFER.inner.clone()
82    }
83}
84
85impl Clone for InnerBuffer {
86    #[inline(always)]
87    fn clone(&self) -> Self {
88        self.strong_count_ref().fetch_add(1, Ordering::AcqRel);
89
90        Self {
91            buffer: self.buffer,
92            capacity: self.capacity,
93            len: self.len,
94        }
95    }
96}
97
98impl Drop for InnerBuffer {
99    #[inline(always)]
100    fn drop(&mut self) {
101        if self.strong_count_ref().fetch_sub(1, Ordering::AcqRel) == 1 {
102            // SAFETY: Created from `Box` in constructor
103            let _: Box<_> = unsafe {
104                Box::from_non_null(NonNull::slice_from_raw_parts(
105                    self.buffer,
106                    1 + (self.capacity as usize).div_ceil(size_of::<u128>()),
107                ))
108            };
109        }
110    }
111}
112
113impl InnerBuffer {
114    /// Allocates a new buffer + one `u128` worth of memory at the beginning for
115    /// `strong_count` in case it is later converted to [`SharedAlignedBuffer`].
116    ///
117    /// `strong_count` field is automatically initialized as `1`.
118    #[inline(always)]
119    fn allocate(capacity: u32) -> Self {
120        let buffer = Box::into_non_null(Box::<[u128]>::new_uninit_slice(
121            1 + (capacity as usize).div_ceil(size_of::<u128>()),
122        ));
123        // SAFETY: The first bytes are allocated for `strong_count`, which is a correctly aligned
124        // copy type
125        unsafe {
126            buffer.cast::<AtomicU32>().write(AtomicU32::new(1));
127        }
128        Self {
129            buffer: buffer.cast::<MaybeUninit<u128>>(),
130            capacity,
131            len: 0,
132        }
133    }
134
135    #[inline(always)]
136    fn resize(&mut self, capacity: u32) {
137        // SAFETY: Non-null correctly aligned pointer, correct size
138        let layout = Layout::for_value(unsafe {
139            slice::from_raw_parts(
140                self.buffer.as_ptr(),
141                1 + (self.capacity as usize).div_ceil(size_of::<u128>()),
142            )
143        });
144
145        // `size_of::<u128>()` is added because the first bytes are allocated for `strong_count`
146        let new_size = size_of::<u128>() + (capacity as usize).next_multiple_of(layout.align());
147
148        #[expect(
149            clippy::cast_ptr_alignment,
150            reason = "Cast from correct alignment to bytes and back due to API requirements"
151        )]
152        // SAFETY: Allocated with global allocator, correct layout, non-zero size that is a
153        // multiple of alignment
154        let new_ptr = unsafe {
155            realloc(self.buffer.as_ptr().cast::<u8>(), layout, new_size).cast::<MaybeUninit<u128>>()
156        };
157        let Some(new_ptr) = NonNull::new(new_ptr) else {
158            panic!("Realloc from {} to {new_size} has failed", self.capacity());
159        };
160
161        self.buffer = new_ptr;
162        self.capacity = capacity;
163    }
164
165    #[inline(always)]
166    const fn len(&self) -> u32 {
167        self.len
168    }
169
170    /// `len` bytes must be initialized
171    #[inline(always)]
172    unsafe fn set_len(&mut self, len: u32) {
173        debug_assert!(
174            len <= self.capacity(),
175            "Too many bytes {} > {}",
176            len,
177            self.capacity()
178        );
179        self.len = len;
180    }
181
182    #[inline(always)]
183    const fn capacity(&self) -> u32 {
184        self.capacity
185    }
186
187    #[inline(always)]
188    const fn strong_count_ref(&self) -> &AtomicU32 {
189        // SAFETY: The first bytes are allocated for `strong_count`, which is a correctly aligned
190        // copy type initialized in the constructor
191        unsafe { self.buffer.as_ptr().cast::<AtomicU32>().as_ref_unchecked() }
192    }
193
194    #[inline(always)]
195    const fn as_slice(&self) -> &[u8] {
196        let len = self.len() as usize;
197        // SAFETY: Not null and length is a protected invariant of the implementation
198        unsafe { slice::from_raw_parts(self.as_ptr(), len) }
199    }
200
201    #[inline(always)]
202    const fn as_mut_slice(&mut self) -> &mut [u8] {
203        let len = self.len() as usize;
204        // SAFETY: Not null and length is a protected invariant of the implementation
205        unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), len) }
206    }
207
208    #[inline(always)]
209    const fn as_ptr(&self) -> *const u8 {
210        // SAFETY: Constructor allocates the first element for `strong_count`
211        unsafe { self.buffer.as_ptr().cast_const().add(1).cast::<u8>() }
212    }
213
214    #[inline(always)]
215    const fn as_mut_ptr(&mut self) -> *mut u8 {
216        // SAFETY: Constructor allocates the first element for `strong_count`
217        unsafe { self.buffer.as_ptr().add(1).cast::<u8>() }
218    }
219}
220
221/// Owned aligned buffer for executor purposes.
222///
223/// See [`SharedAlignedBuffer`] for a version that can be cheaply cloned while reusing the original
224/// allocation.
225///
226/// Data is aligned to 16 bytes (128 bits), which is the largest alignment required by primitive
227/// types and by extension any type that implements `TrivialType`/`IoType`.
228#[derive(Debug)]
229pub struct OwnedAlignedBuffer {
230    inner: InnerBuffer,
231}
232
233impl Deref for OwnedAlignedBuffer {
234    type Target = [u8];
235
236    #[inline(always)]
237    fn deref(&self) -> &Self::Target {
238        self.as_slice()
239    }
240}
241
242impl DerefMut for OwnedAlignedBuffer {
243    #[inline(always)]
244    fn deref_mut(&mut self) -> &mut Self::Target {
245        self.as_mut_slice()
246    }
247}
248
249// SAFETY: Heap-allocated data structure, points to the same memory if moved
250unsafe impl StableDeref for OwnedAlignedBuffer {}
251
252impl Clone for OwnedAlignedBuffer {
253    #[inline(always)]
254    fn clone(&self) -> Self {
255        let mut new_instance = Self::with_capacity(self.capacity());
256        new_instance.copy_from_slice(self.as_slice());
257        new_instance
258    }
259}
260
261impl OwnedAlignedBuffer {
262    /// Create a new instance with at least specified capacity.
263    ///
264    /// NOTE: Actual capacity might be larger due to alignment requirements.
265    #[inline(always)]
266    pub fn with_capacity(capacity: u32) -> Self {
267        Self {
268            inner: InnerBuffer::allocate(capacity),
269        }
270    }
271
272    /// Create a new instance from provided bytes.
273    ///
274    /// # Panics
275    /// If `bytes.len()` doesn't fit into `u32`
276    #[inline(always)]
277    pub fn from_bytes(bytes: &[u8]) -> Self {
278        let mut instance = Self::with_capacity(0);
279        instance.copy_from_slice(bytes);
280        instance
281    }
282
283    #[inline(always)]
284    pub const fn as_slice(&self) -> &[u8] {
285        self.inner.as_slice()
286    }
287
288    #[inline(always)]
289    pub const fn as_mut_slice(&mut self) -> &mut [u8] {
290        self.inner.as_mut_slice()
291    }
292
293    #[inline(always)]
294    pub const fn as_ptr(&self) -> *const u8 {
295        self.inner.as_ptr()
296    }
297
298    #[inline(always)]
299    pub const fn as_mut_ptr(&mut self) -> *mut u8 {
300        self.inner.as_mut_ptr()
301    }
302
303    #[inline(always)]
304    pub fn into_shared(self) -> SharedAlignedBuffer {
305        SharedAlignedBuffer { inner: self.inner }
306    }
307
308    /// Ensure capacity of the buffer is at least `capacity`.
309    ///
310    /// Will re-allocate if necessary.
311    #[inline(always)]
312    pub fn ensure_capacity(&mut self, capacity: u32) {
313        if capacity > self.capacity() {
314            self.inner.resize(capacity);
315        }
316    }
317
318    /// Will re-allocate if capacity is not enough to store provided bytes.
319    ///
320    /// # Panics
321    /// If `bytes.len()` doesn't fit into `u32`
322    #[inline(always)]
323    pub fn copy_from_slice(&mut self, bytes: &[u8]) {
324        let Ok(len) = u32::try_from(bytes.len()) else {
325            panic!("Too many bytes {}", bytes.len());
326        };
327
328        if len > self.capacity() {
329            self.inner
330                .resize(len.max(self.capacity().saturating_mul(2)));
331        }
332
333        // SAFETY: Sufficient capacity guaranteed above, natural alignment of bytes is 1 for input
334        // and output, non-overlapping allocations guaranteed by the type system
335        unsafe {
336            self.as_mut_ptr()
337                .copy_from_nonoverlapping(bytes.as_ptr(), bytes.len());
338
339            self.inner.set_len(len);
340        }
341    }
342
343    /// Will re-allocate if capacity is not enough to store provided bytes.
344    ///
345    /// Returns `false` if `self.len() + bytes.len()` doesn't fit into `u32`.
346    #[inline(always)]
347    #[must_use]
348    pub fn append(&mut self, bytes: &[u8]) -> bool {
349        let Ok(len) = u32::try_from(bytes.len()) else {
350            return false;
351        };
352
353        let Some(new_len) = self.len().checked_add(len) else {
354            return false;
355        };
356
357        if new_len > self.capacity() {
358            self.inner
359                .resize(new_len.max(self.capacity().saturating_mul(2)));
360        }
361
362        // SAFETY: Sufficient capacity guaranteed above, natural alignment of bytes is 1 for input
363        // and output, non-overlapping allocations guaranteed by the type system
364        unsafe {
365            self.as_mut_ptr()
366                .add(self.len() as usize)
367                .copy_from_nonoverlapping(bytes.as_ptr(), bytes.len());
368
369            self.inner.set_len(new_len);
370        }
371
372        true
373    }
374
375    #[inline(always)]
376    pub const fn is_empty(&self) -> bool {
377        self.inner.len() == 0
378    }
379
380    #[inline(always)]
381    pub const fn len(&self) -> u32 {
382        self.inner.len()
383    }
384
385    #[inline(always)]
386    pub const fn capacity(&self) -> u32 {
387        self.inner.capacity()
388    }
389
390    /// Set the length of the useful data to a specified value.
391    ///
392    /// # Safety
393    /// There must be `new_len` bytes initialized in the buffer.
394    ///
395    /// # Panics
396    /// If `bytes.len()` doesn't fit into `u32`
397    #[inline(always)]
398    pub unsafe fn set_len(&mut self, new_len: u32) {
399        // SAFETY: Guaranteed by method contract
400        unsafe {
401            self.inner.set_len(new_len);
402        }
403    }
404}
405
406/// Shared aligned buffer for executor purposes.
407///
408/// See [`OwnedAlignedBuffer`] for a version that can be mutated.
409///
410/// Data is aligned to 16 bytes (128 bits), which is the largest alignment required by primitive
411/// types and by extension any type that implements `TrivialType`/`IoType`.
412///
413/// NOTE: Counter for the number of shared instances is `u32` and will wrap around if exceeded
414/// breaking internal invariants (which is extremely unlikely, but still).
415#[derive(Debug, Default, Clone)]
416pub struct SharedAlignedBuffer {
417    inner: InnerBuffer,
418}
419
420impl Deref for SharedAlignedBuffer {
421    type Target = [u8];
422
423    #[inline(always)]
424    fn deref(&self) -> &Self::Target {
425        self.as_slice()
426    }
427}
428
429// SAFETY: Heap-allocated data structure, points to the same memory if moved
430unsafe impl StableDeref for SharedAlignedBuffer {}
431// SAFETY: Inner buffer is exactly the same and points to the same memory after clone
432unsafe impl CloneStableDeref for SharedAlignedBuffer {}
433// SAFETY: Inner buffer is exactly the same and points to the same memory after clone
434unsafe impl CloneableCart for SharedAlignedBuffer {}
435
436impl SharedAlignedBuffer {
437    /// Static reference to an empty buffer
438    #[inline(always)]
439    pub const fn empty_ref() -> &'static Self {
440        &EMPTY_SHARED_ALIGNED_BUFFER
441    }
442
443    /// Create a new instance from provided bytes.
444    ///
445    /// # Panics
446    /// If `bytes.len()` doesn't fit into `u32`
447    #[inline(always)]
448    pub fn from_bytes(bytes: &[u8]) -> Self {
449        OwnedAlignedBuffer::from_bytes(bytes).into_shared()
450    }
451
452    /// Convert into owned buffer.
453    ///
454    /// If this is the last shared instance, then allocation will be reused, otherwise a new
455    /// allocation will be created.
456    ///
457    /// Returns `None` if there exit other shared instances.
458    #[inline(always)]
459    pub fn into_owned(self) -> OwnedAlignedBuffer {
460        if self.inner.strong_count_ref().load(Ordering::Acquire) == 1 {
461            OwnedAlignedBuffer { inner: self.inner }
462        } else {
463            OwnedAlignedBuffer::from_bytes(self.as_slice())
464        }
465    }
466
467    #[inline(always)]
468    pub const fn as_slice(&self) -> &[u8] {
469        self.inner.as_slice()
470    }
471
472    #[inline(always)]
473    pub const fn as_ptr(&self) -> *const u8 {
474        self.inner.as_ptr()
475    }
476
477    #[inline(always)]
478    pub const fn is_empty(&self) -> bool {
479        self.inner.len() == 0
480    }
481
482    #[inline(always)]
483    pub const fn len(&self) -> u32 {
484        self.inner.len()
485    }
486}