Skip to main content

ab_aligned_buffer/
lib.rs

1//! Efficient abstraction for memory buffers aligned to 16 bytes (`u128`) with both owned and shared
2//! variants.
3//!
4//! [`OwnedAlignedBuffer`] represents a memory location aligned to 16 bytes that can be modified.
5//!
6//! [`SharedAlignedBuffer`] can't be modified but supports cheap reference-counting clones (like
7//! `Arc`, but much more efficient).
8//!
9//! Does not require a standard library (`no_std`) but does require allocator and atomics.
10
11#![feature(const_block_items, box_vec_non_null)]
12#![cfg_attr(test, feature(pointer_is_aligned_to))]
13#![no_std]
14
15#[cfg(test)]
16mod tests;
17
18extern crate alloc;
19
20use alloc::alloc::realloc;
21use alloc::boxed::Box;
22use core::alloc::Layout;
23use core::mem::MaybeUninit;
24use core::ops::{Deref, DerefMut};
25use core::ptr::NonNull;
26use core::slice;
27use core::sync::atomic::{AtomicU32, Ordering};
28use stable_deref_trait::{CloneStableDeref, StableDeref};
29use yoke::CloneableCart;
30
31const {
32    assert!(
33        align_of::<u128>() == size_of::<u128>(),
34        "Size and alignment are both 16 bytes"
35    );
36    assert!(size_of::<u128>() >= size_of::<AtomicU32>());
37    assert!(align_of::<u128>() >= align_of::<AtomicU32>());
38}
39
40#[repr(C, align(16))]
41struct ConstInnerBuffer {
42    strong_count: AtomicU32,
43}
44
45const {
46    assert!(align_of::<ConstInnerBuffer>() == align_of::<u128>());
47    assert!(size_of::<ConstInnerBuffer>() == size_of::<u128>());
48}
49
50static EMPTY_SHARED_ALIGNED_BUFFER: SharedAlignedBuffer = SharedAlignedBuffer {
51    inner: InnerBuffer {
52        buffer: NonNull::from_ref({
53            static BUFFER: MaybeUninit<ConstInnerBuffer> = MaybeUninit::new(ConstInnerBuffer {
54                strong_count: AtomicU32::new(1),
55            });
56
57            &BUFFER
58        })
59        .cast::<MaybeUninit<u128>>(),
60        capacity: 0,
61        len: 0,
62    },
63};
64
65#[derive(Debug)]
66struct InnerBuffer {
67    // The first bytes are allocated for `strong_count`
68    buffer: NonNull<MaybeUninit<u128>>,
69    capacity: u32,
70    len: u32,
71}
72
73// SAFETY: Heap-allocated memory buffer can be used from any thread
74unsafe impl Send for InnerBuffer {}
75// SAFETY: Heap-allocated memory buffer can be used from any thread
76unsafe impl Sync for InnerBuffer {}
77
78impl Default for InnerBuffer {
79    #[inline(always)]
80    fn default() -> Self {
81        EMPTY_SHARED_ALIGNED_BUFFER.inner.clone()
82    }
83}
84
85impl Clone for InnerBuffer {
86    #[inline(always)]
87    fn clone(&self) -> Self {
88        self.strong_count_ref().fetch_add(1, Ordering::AcqRel);
89
90        Self {
91            buffer: self.buffer,
92            capacity: self.capacity,
93            len: self.len,
94        }
95    }
96}
97
98impl Drop for InnerBuffer {
99    #[inline(always)]
100    fn drop(&mut self) {
101        if self.strong_count_ref().fetch_sub(1, Ordering::AcqRel) == 1 {
102            // SAFETY: Created from `Box` in constructor
103            let _ = unsafe {
104                Box::from_non_null(NonNull::slice_from_raw_parts(
105                    self.buffer,
106                    1 + (self.capacity as usize).div_ceil(size_of::<u128>()),
107                ))
108            };
109        }
110    }
111}
112
113impl InnerBuffer {
114    /// Allocates a new buffer + one `u128` worth of memory at the beginning for
115    /// `strong_count` in case it is later converted to [`SharedAlignedBuffer`].
116    ///
117    /// `strong_count` field is automatically initialized as `1`.
118    #[inline(always)]
119    fn allocate(capacity: u32) -> Self {
120        let buffer = Box::into_non_null(Box::<[u128]>::new_uninit_slice(
121            1 + (capacity as usize).div_ceil(size_of::<u128>()),
122        ));
123        // SAFETY: The first bytes are allocated for `strong_count`, which is a correctly aligned
124        // copy type
125        unsafe { buffer.cast::<AtomicU32>().write(AtomicU32::new(1)) };
126        Self {
127            buffer: buffer.cast::<MaybeUninit<u128>>(),
128            capacity,
129            len: 0,
130        }
131    }
132
133    #[inline(always)]
134    fn resize(&mut self, capacity: u32) {
135        // SAFETY: Non-null correctly aligned pointer, correct size
136        let layout = Layout::for_value(unsafe {
137            slice::from_raw_parts(
138                self.buffer.as_ptr(),
139                1 + (self.capacity as usize).div_ceil(size_of::<u128>()),
140            )
141        });
142
143        // `size_of::<u128>()` is added because the first bytes are allocated for `strong_count`
144        let new_size = size_of::<u128>() + (capacity as usize).next_multiple_of(layout.align());
145
146        // SAFETY: Allocated with global allocator, correct layout, non-zero size that is a
147        // multiple of alignment
148        let new_ptr = unsafe {
149            realloc(self.buffer.as_ptr().cast::<u8>(), layout, new_size).cast::<MaybeUninit<u128>>()
150        };
151        let Some(new_ptr) = NonNull::new(new_ptr) else {
152            panic!("Realloc from {} to {new_size} has failed", self.capacity());
153        };
154
155        self.buffer = new_ptr;
156        self.capacity = capacity;
157    }
158
159    #[inline(always)]
160    const fn len(&self) -> u32 {
161        self.len
162    }
163
164    /// `len` bytes must be initialized
165    #[inline(always)]
166    unsafe fn set_len(&mut self, len: u32) {
167        debug_assert!(
168            len <= self.capacity(),
169            "Too many bytes {} > {}",
170            len,
171            self.capacity()
172        );
173        self.len = len;
174    }
175
176    #[inline(always)]
177    const fn capacity(&self) -> u32 {
178        self.capacity
179    }
180
181    #[inline(always)]
182    const fn strong_count_ref(&self) -> &AtomicU32 {
183        // SAFETY: The first bytes are allocated for `strong_count`, which is a correctly aligned
184        // copy type initialized in the constructor
185        unsafe { self.buffer.as_ptr().cast::<AtomicU32>().as_ref_unchecked() }
186    }
187
188    #[inline(always)]
189    const fn as_slice(&self) -> &[u8] {
190        let len = self.len() as usize;
191        // SAFETY: Not null and length is a protected invariant of the implementation
192        unsafe { slice::from_raw_parts(self.as_ptr(), len) }
193    }
194
195    #[inline(always)]
196    const fn as_mut_slice(&mut self) -> &mut [u8] {
197        let len = self.len() as usize;
198        // SAFETY: Not null and length is a protected invariant of the implementation
199        unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), len) }
200    }
201
202    #[inline(always)]
203    const fn as_ptr(&self) -> *const u8 {
204        // SAFETY: Constructor allocates the first element for `strong_count`
205        unsafe { self.buffer.as_ptr().cast_const().add(1).cast::<u8>() }
206    }
207
208    #[inline(always)]
209    const fn as_mut_ptr(&mut self) -> *mut u8 {
210        // SAFETY: Constructor allocates the first element for `strong_count`
211        unsafe { self.buffer.as_ptr().add(1).cast::<u8>() }
212    }
213}
214
215/// Owned aligned buffer for executor purposes.
216///
217/// See [`SharedAlignedBuffer`] for a version that can be cheaply cloned while reusing the original
218/// allocation.
219///
220/// Data is aligned to 16 bytes (128 bits), which is the largest alignment required by primitive
221/// types and by extension any type that implements `TrivialType`/`IoType`.
222#[derive(Debug)]
223pub struct OwnedAlignedBuffer {
224    inner: InnerBuffer,
225}
226
227impl Deref for OwnedAlignedBuffer {
228    type Target = [u8];
229
230    #[inline(always)]
231    fn deref(&self) -> &Self::Target {
232        self.as_slice()
233    }
234}
235
236impl DerefMut for OwnedAlignedBuffer {
237    #[inline(always)]
238    fn deref_mut(&mut self) -> &mut Self::Target {
239        self.as_mut_slice()
240    }
241}
242
243// SAFETY: Heap-allocated data structure, points to the same memory if moved
244unsafe impl StableDeref for OwnedAlignedBuffer {}
245
246impl Clone for OwnedAlignedBuffer {
247    #[inline(always)]
248    fn clone(&self) -> Self {
249        let mut new_instance = Self::with_capacity(self.capacity());
250        new_instance.copy_from_slice(self.as_slice());
251        new_instance
252    }
253}
254
255impl OwnedAlignedBuffer {
256    /// Create a new instance with at least specified capacity.
257    ///
258    /// NOTE: Actual capacity might be larger due to alignment requirements.
259    #[inline(always)]
260    pub fn with_capacity(capacity: u32) -> Self {
261        Self {
262            inner: InnerBuffer::allocate(capacity),
263        }
264    }
265
266    /// Create a new instance from provided bytes.
267    ///
268    /// # Panics
269    /// If `bytes.len()` doesn't fit into `u32`
270    #[inline(always)]
271    pub fn from_bytes(bytes: &[u8]) -> Self {
272        let mut instance = Self::with_capacity(0);
273        instance.copy_from_slice(bytes);
274        instance
275    }
276
277    #[inline(always)]
278    pub const fn as_slice(&self) -> &[u8] {
279        self.inner.as_slice()
280    }
281
282    #[inline(always)]
283    pub const fn as_mut_slice(&mut self) -> &mut [u8] {
284        self.inner.as_mut_slice()
285    }
286
287    #[inline(always)]
288    pub const fn as_ptr(&self) -> *const u8 {
289        self.inner.as_ptr()
290    }
291
292    #[inline(always)]
293    pub const fn as_mut_ptr(&mut self) -> *mut u8 {
294        self.inner.as_mut_ptr()
295    }
296
297    #[inline(always)]
298    pub fn into_shared(self) -> SharedAlignedBuffer {
299        SharedAlignedBuffer { inner: self.inner }
300    }
301
302    /// Ensure capacity of the buffer is at least `capacity`.
303    ///
304    /// Will re-allocate if necessary.
305    #[inline(always)]
306    pub fn ensure_capacity(&mut self, capacity: u32) {
307        if capacity > self.capacity() {
308            self.inner.resize(capacity)
309        }
310    }
311
312    /// Will re-allocate if capacity is not enough to store provided bytes.
313    ///
314    /// # Panics
315    /// If `bytes.len()` doesn't fit into `u32`
316    #[inline(always)]
317    pub fn copy_from_slice(&mut self, bytes: &[u8]) {
318        let Ok(len) = u32::try_from(bytes.len()) else {
319            panic!("Too many bytes {}", bytes.len());
320        };
321
322        if len > self.capacity() {
323            self.inner
324                .resize(len.max(self.capacity().saturating_mul(2)));
325        }
326
327        // SAFETY: Sufficient capacity guaranteed above, natural alignment of bytes is 1 for input
328        // and output, non-overlapping allocations guaranteed by the type system
329        unsafe {
330            self.as_mut_ptr()
331                .copy_from_nonoverlapping(bytes.as_ptr(), bytes.len());
332
333            self.inner.set_len(len);
334        }
335    }
336
337    /// Will re-allocate if capacity is not enough to store provided bytes.
338    ///
339    /// Returns `false` if `self.len() + bytes.len()` doesn't fit into `u32`.
340    #[inline(always)]
341    #[must_use]
342    pub fn append(&mut self, bytes: &[u8]) -> bool {
343        let Ok(len) = u32::try_from(bytes.len()) else {
344            return false;
345        };
346
347        let Some(new_len) = self.len().checked_add(len) else {
348            return false;
349        };
350
351        if new_len > self.capacity() {
352            self.inner
353                .resize(new_len.max(self.capacity().saturating_mul(2)));
354        }
355
356        // SAFETY: Sufficient capacity guaranteed above, natural alignment of bytes is 1 for input
357        // and output, non-overlapping allocations guaranteed by the type system
358        unsafe {
359            self.as_mut_ptr()
360                .add(self.len() as usize)
361                .copy_from_nonoverlapping(bytes.as_ptr(), bytes.len());
362
363            self.inner.set_len(new_len);
364        }
365
366        true
367    }
368
369    #[inline(always)]
370    pub const fn is_empty(&self) -> bool {
371        self.inner.len() == 0
372    }
373
374    #[inline(always)]
375    pub const fn len(&self) -> u32 {
376        self.inner.len()
377    }
378
379    #[inline(always)]
380    pub const fn capacity(&self) -> u32 {
381        self.inner.capacity()
382    }
383
384    /// Set the length of the useful data to a specified value.
385    ///
386    /// # Safety
387    /// There must be `new_len` bytes initialized in the buffer.
388    ///
389    /// # Panics
390    /// If `bytes.len()` doesn't fit into `u32`
391    #[inline(always)]
392    pub unsafe fn set_len(&mut self, new_len: u32) {
393        // SAFETY: Guaranteed by method contract
394        unsafe {
395            self.inner.set_len(new_len);
396        }
397    }
398}
399
400/// Shared aligned buffer for executor purposes.
401///
402/// See [`OwnedAlignedBuffer`] for a version that can be mutated.
403///
404/// Data is aligned to 16 bytes (128 bits), which is the largest alignment required by primitive
405/// types and by extension any type that implements `TrivialType`/`IoType`.
406///
407/// NOTE: Counter for the number of shared instances is `u32` and will wrap around if exceeded
408/// breaking internal invariants (which is extremely unlikely, but still).
409#[derive(Debug, Default, Clone)]
410pub struct SharedAlignedBuffer {
411    inner: InnerBuffer,
412}
413
414impl Deref for SharedAlignedBuffer {
415    type Target = [u8];
416
417    #[inline(always)]
418    fn deref(&self) -> &Self::Target {
419        self.as_slice()
420    }
421}
422
423// SAFETY: Heap-allocated data structure, points to the same memory if moved
424unsafe impl StableDeref for SharedAlignedBuffer {}
425// SAFETY: Inner buffer is exactly the same and points to the same memory after clone
426unsafe impl CloneStableDeref for SharedAlignedBuffer {}
427// SAFETY: Inner buffer is exactly the same and points to the same memory after clone
428unsafe impl CloneableCart for SharedAlignedBuffer {}
429
430impl SharedAlignedBuffer {
431    /// Static reference to an empty buffer
432    #[inline(always)]
433    pub const fn empty_ref() -> &'static Self {
434        &EMPTY_SHARED_ALIGNED_BUFFER
435    }
436
437    /// Create a new instance from provided bytes.
438    ///
439    /// # Panics
440    /// If `bytes.len()` doesn't fit into `u32`
441    #[inline(always)]
442    pub fn from_bytes(bytes: &[u8]) -> Self {
443        OwnedAlignedBuffer::from_bytes(bytes).into_shared()
444    }
445
446    /// Convert into owned buffer.
447    ///
448    /// If this is the last shared instance, then allocation will be reused, otherwise a new
449    /// allocation will be created.
450    ///
451    /// Returns `None` if there exit other shared instances.
452    #[inline(always)]
453    pub fn into_owned(self) -> OwnedAlignedBuffer {
454        if self.inner.strong_count_ref().load(Ordering::Acquire) == 1 {
455            OwnedAlignedBuffer { inner: self.inner }
456        } else {
457            OwnedAlignedBuffer::from_bytes(self.as_slice())
458        }
459    }
460
461    #[inline(always)]
462    pub const fn as_slice(&self) -> &[u8] {
463        self.inner.as_slice()
464    }
465
466    #[inline(always)]
467    pub const fn as_ptr(&self) -> *const u8 {
468        self.inner.as_ptr()
469    }
470
471    #[inline(always)]
472    pub const fn is_empty(&self) -> bool {
473        self.inner.len() == 0
474    }
475
476    #[inline(always)]
477    pub const fn len(&self) -> u32 {
478        self.inner.len()
479    }
480}