From c2f664c43128a1d20c64d55938459ff346b77f33 Mon Sep 17 00:00:00 2001 From: Avril Date: Fri, 26 Mar 2021 03:07:01 +0000 Subject: [PATCH] added alloca_zero benchmarked against vec with runtime value started growable vector type impl for stackalloc'd memory --- Cargo.toml | 4 +- src/avec.rs | 112 +++++++++++++++++++++++++++++++++++++++ src/lib.rs | 99 ++++++---------------------------- src/tests.rs | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 277 insertions(+), 84 deletions(-) create mode 100644 src/avec.rs create mode 100644 src/tests.rs diff --git a/Cargo.toml b/Cargo.toml index b247eb0..b34cd6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,8 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[dependencies] - +[dev-dependencies] +lazy_static = "1.4.0" [build-dependencies] cc = "1.0" diff --git a/src/avec.rs b/src/avec.rs new file mode 100644 index 0000000..f1a58c3 --- /dev/null +++ b/src/avec.rs @@ -0,0 +1,112 @@ +//! A `Vec`-like wrapper type that only allocates if a provided buffer is first exhausted. +use std::mem::{ + MaybeUninit, + ManuallyDrop, +}; +use std::marker::{Send, Sync, PhantomData}; +use std::ops::Drop; +use std::slice; + +#[repr(C)] +#[derive(Debug)] +struct StackBuffer +{ + fill_ptr: usize, + buf_ptr: *mut MaybeUninit, +} +impl Clone for StackBuffer +{ + fn clone(&self) -> Self { + Self{ + fill_ptr: self.fill_ptr, + buf_ptr: self.buf_ptr, + } + } +} +impl Copy for StackBuffer{} + +#[repr(C)] +#[derive(Debug, Clone)] +struct HeapBuffer +{ + _fill_ptr: usize, // vec.len() + buf: Vec, +} + +#[repr(C)] +union Internal +{ + stack: StackBuffer, + heap: ManuallyDrop>, +} + +/// A growable vector with a backing slice that will move its elements to the heap if the slice space is exhausted. +pub struct AVec<'a, T> +{ + /// max size of `inner.stack` before it's moved to `inner.heap`. + stack_sz: usize, + inner: Internal, + + _stack: PhantomData<&'a mut [MaybeUninit]>, +} +unsafe impl<'a, T> Send for AVec<'a, T>{} +unsafe impl<'a, T> Sync for AVec<'a, T>{} + +impl<'a, T> Drop for AVec<'a, T> +{ + fn drop(&mut self) { + if self.is_allocated() { + // All stack elements have been moved to the heap. Drop the heap buffer. + unsafe { + ManuallyDrop::drop(&mut self.inner.heap); + } + } else { + if std::mem::needs_drop::() { + // Drop the allocated stack elements in place + unsafe { + std::ptr::drop_in_place(std::ptr::slice_from_raw_parts_mut(self.inner.stack.buf_ptr as *mut T, self.fill_ptr())); // I think this drops the elements, we don't need to loop. + /* + for x in slice::from_raw_parts_mut(self.inner.stack.buf_ptr, self.fill_ptr()) + { + std::ptr::drop_in_place(x.as_mut_ptr()); + }*/ + } + } + } + } +} + +impl<'a, T> AVec<'a, T> +{ + /// The current fill_ptr of this stack buffer + fn fill_ptr(&self) -> usize + { + // SAFETY: Both fields are repr(C) with this element first + unsafe { + self.inner.stack.fill_ptr + } + } + + /// Have the elements been moved to the heap? + pub fn is_allocated(&self) -> bool + { + self.fill_ptr() > self.stack_sz + } + + /// Create a new `AVec` with this backing buffer. + pub fn new(stack: &'a mut [MaybeUninit]) -> Self + { + let (buf_ptr, stack_sz) = (stack.as_mut_ptr(), stack.len()); + + Self { + stack_sz, + inner: Internal { + stack: StackBuffer { + fill_ptr: 0, + buf_ptr, + } + }, + _stack: PhantomData + } + } +} diff --git a/src/lib.rs b/src/lib.rs index fcd756d..a87fac8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,8 +19,10 @@ use std::{ }, slice, ffi::c_void, + ptr, }; +//TODO: pub mod avec; pub use avec::AVec; mod ffi; /// Allocate a runtime length uninitialised byte buffer on the stack, call `callback` with this buffer, and then deallocate the buffer. @@ -100,87 +102,20 @@ where F: FnOnce(&mut [MaybeUninit]) -> T } -#[cfg(test)] -mod tests { - #[test] - #[should_panic] - fn unwinding_over_boundary() - { - super::alloca(120, |_buf| panic!()); - } - #[test] - fn with_alloca() - { - use std::mem::MaybeUninit; - - const SIZE: usize = 128; - let sum = super::alloca(SIZE, |buf| { - - println!("Buffer size is {}", buf.len()); - for (i, x) in (1..).zip(buf.iter_mut()) { - *x = MaybeUninit::new(i as u8); - } - eprintln!("Buffer is now {:?}", unsafe { std::mem::transmute::<_, & &mut [u8]>(&buf) }); - - buf.iter().map(|x| unsafe { x.assume_init() } as u64).sum::() - }); - - assert_eq!(sum, (1..=SIZE).sum::() as u64); - } - #[test] - fn raw_trampoline() - { - use std::ffi::c_void; - - let size: usize = 100; - let output = { - let mut size: usize = size; - extern "C" fn callback(ptr: *mut c_void, data: *mut c_void) - { - let size = unsafe {&mut *(data as *mut usize)}; - let slice = unsafe { - std::ptr::write_bytes(ptr, 0, *size); - std::slice::from_raw_parts_mut(ptr as *mut u8, *size) - }; - println!("From callback! Size is {}", slice.len()); - - for (i, x) in (0..).zip(slice.iter_mut()) - { - *x = i as u8; - } - - *size = slice.iter().map(|&x| x as usize).sum::(); - } - - unsafe { - super::ffi::alloca_trampoline(size, callback, &mut size as *mut usize as *mut _); - } - size - }; - - assert_eq!(output, (0..size).sum::()); - } - - #[cfg(nightly)] - mod bench - { - const SIZE: usize = 1024; - use test::{black_box, Bencher}; - use std::mem::MaybeUninit; - - #[bench] - fn vec_of_uninit_bytes_known(b: &mut Bencher) - { - b.iter(|| { - black_box(vec![MaybeUninit::::uninit(); SIZE]); - }) - } - #[bench] - fn stackalloc_of_uninit_bytes_known(b: &mut Bencher) - { - b.iter(|| { - black_box(crate::alloca(SIZE, |b| {black_box(b);})); +/// Allocate a runtime length zeroed byte buffer on the stack, call `callback` with this buffer, and then deallocate the buffer. +/// +/// See `alloca()`. +#[inline] pub fn alloca_zeroed(size: usize, callback: F) -> T +where F: FnOnce(&mut [u8]) -> T +{ + alloca(size, move |buf| { + // SAFETY: We zero-initialise the backing slice + callback(unsafe { + ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len()); // buf.fill(MaybeUninit::zeroed()); + &mut *(buf as *mut [MaybeUninit] as *mut [u8]) // MaybeUninit::slice_assume_init_mut() }) - } - } + }) } + +#[cfg(test)] +mod tests; diff --git a/src/tests.rs b/src/tests.rs new file mode 100644 index 0000000..311c005 --- /dev/null +++ b/src/tests.rs @@ -0,0 +1,146 @@ +//! Contains tests and benchmarks + +#[test] +#[should_panic] +fn unwinding_over_boundary() +{ + super::alloca(120, |_buf| panic!()); +} +#[test] +fn with_alloca() +{ + use std::mem::MaybeUninit; + + const SIZE: usize = 128; + let sum = super::alloca(SIZE, |buf| { + + println!("Buffer size is {}", buf.len()); + for (i, x) in (1..).zip(buf.iter_mut()) { + *x = MaybeUninit::new(i as u8); + } + eprintln!("Buffer is now {:?}", unsafe { std::mem::transmute::<_, & &mut [u8]>(&buf) }); + + buf.iter().map(|x| unsafe { x.assume_init() } as u64).sum::() + }); + + assert_eq!(sum, (1..=SIZE).sum::() as u64); +} +#[test] +fn raw_trampoline() +{ + use std::ffi::c_void; + + let size: usize = 100; + let output = { + let mut size: usize = size; + extern "C" fn callback(ptr: *mut c_void, data: *mut c_void) + { + let size = unsafe {&mut *(data as *mut usize)}; + let slice = unsafe { + std::ptr::write_bytes(ptr, 0, *size); + std::slice::from_raw_parts_mut(ptr as *mut u8, *size) + }; + println!("From callback! Size is {}", slice.len()); + + for (i, x) in (0..).zip(slice.iter_mut()) + { + *x = i as u8; + } + + *size = slice.iter().map(|&x| x as usize).sum::(); + } + + unsafe { + super::ffi::alloca_trampoline(size, callback, &mut size as *mut usize as *mut _); + } + size + }; + + assert_eq!(output, (0..size).sum::()); +} + +#[cfg(nightly)] +mod bench +{ + const SIZE: usize = 1024; + use test::{black_box, Bencher}; + use std::mem::MaybeUninit; + use lazy_static::lazy_static; + + lazy_static! { + static ref SIZE_RANDOM: usize = { + use std::time; + + let base = time::SystemTime::now().duration_since(time::UNIX_EPOCH).unwrap().as_millis() as u64; + + ((base & 300) + 1024) as usize + }; + } + + #[bench] + fn vec_of_uninit_bytes_unknown(b: &mut Bencher) + { + let size = *SIZE_RANDOM; + b.iter(|| { + black_box(vec![MaybeUninit::::uninit(); size]); + }) + } + #[bench] + fn stackalloc_of_uninit_bytes_unknown(b: &mut Bencher) + { + let size = *SIZE_RANDOM; + + b.iter(|| { + black_box(crate::alloca(size, |b| {black_box(b);})); + }) + } + + #[bench] + fn stackalloc_of_zeroed_bytes_unknown(b: &mut Bencher) + { + let size = *SIZE_RANDOM; + + b.iter(|| { + black_box(crate::alloca_zeroed(size, |b| {black_box(b);})); + }) + } + + #[bench] + fn vec_of_zeroed_bytes_unknown(b: &mut Bencher) + { + let size = *SIZE_RANDOM; + + b.iter(|| { + black_box(vec![0u8; size]); + }) + } + #[bench] + fn vec_of_zeroed_bytes_known(b: &mut Bencher) + { + b.iter(|| { + black_box(vec![0u8; SIZE]); + }) + } + #[bench] + fn vec_of_uninit_bytes_known(b: &mut Bencher) + { + b.iter(|| { + black_box(vec![MaybeUninit::::uninit(); SIZE]); + }) + } + #[bench] + fn stackalloc_of_uninit_bytes_known(b: &mut Bencher) + { + b.iter(|| { + black_box(crate::alloca(SIZE, |b| {black_box(b);})); + }) + } + + #[bench] + fn stackalloc_of_zeroed_bytes_known(b: &mut Bencher) + { + b.iter(|| { + black_box(crate::alloca_zeroed(SIZE, |b| {black_box(b);})); + }) + } +}