added alloca_zero

benchmarked against vec with runtime value

started growable vector type impl for stackalloc'd memory
avec
Avril 3 years ago
parent b0a52b992b
commit c2f664c431
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -7,8 +7,8 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
[dev-dependencies]
lazy_static = "1.4.0"
[build-dependencies]
cc = "1.0"

@ -0,0 +1,112 @@
//! A `Vec`-like wrapper type that only allocates if a provided buffer is first exhausted.
use std::mem::{
MaybeUninit,
ManuallyDrop,
};
use std::marker::{Send, Sync, PhantomData};
use std::ops::Drop;
use std::slice;
#[repr(C)]
#[derive(Debug)]
struct StackBuffer<T>
{
fill_ptr: usize,
buf_ptr: *mut MaybeUninit<T>,
}
impl<T> Clone for StackBuffer<T>
{
fn clone(&self) -> Self {
Self{
fill_ptr: self.fill_ptr,
buf_ptr: self.buf_ptr,
}
}
}
impl<T> Copy for StackBuffer<T>{}
#[repr(C)]
#[derive(Debug, Clone)]
struct HeapBuffer<T>
{
_fill_ptr: usize, // vec.len()
buf: Vec<T>,
}
#[repr(C)]
union Internal<T>
{
stack: StackBuffer<T>,
heap: ManuallyDrop<HeapBuffer<T>>,
}
/// A growable vector with a backing slice that will move its elements to the heap if the slice space is exhausted.
pub struct AVec<'a, T>
{
/// max size of `inner.stack` before it's moved to `inner.heap`.
stack_sz: usize,
inner: Internal<T>,
_stack: PhantomData<&'a mut [MaybeUninit<T>]>,
}
unsafe impl<'a, T> Send for AVec<'a, T>{}
unsafe impl<'a, T> Sync for AVec<'a, T>{}
impl<'a, T> Drop for AVec<'a, T>
{
fn drop(&mut self) {
if self.is_allocated() {
// All stack elements have been moved to the heap. Drop the heap buffer.
unsafe {
ManuallyDrop::drop(&mut self.inner.heap);
}
} else {
if std::mem::needs_drop::<T>() {
// Drop the allocated stack elements in place
unsafe {
std::ptr::drop_in_place(std::ptr::slice_from_raw_parts_mut(self.inner.stack.buf_ptr as *mut T, self.fill_ptr())); // I think this drops the elements, we don't need to loop.
/*
for x in slice::from_raw_parts_mut(self.inner.stack.buf_ptr, self.fill_ptr())
{
std::ptr::drop_in_place(x.as_mut_ptr());
}*/
}
}
}
}
}
impl<'a, T> AVec<'a, T>
{
/// The current fill_ptr of this stack buffer
fn fill_ptr(&self) -> usize
{
// SAFETY: Both fields are repr(C) with this element first
unsafe {
self.inner.stack.fill_ptr
}
}
/// Have the elements been moved to the heap?
pub fn is_allocated(&self) -> bool
{
self.fill_ptr() > self.stack_sz
}
/// Create a new `AVec` with this backing buffer.
pub fn new(stack: &'a mut [MaybeUninit<T>]) -> Self
{
let (buf_ptr, stack_sz) = (stack.as_mut_ptr(), stack.len());
Self {
stack_sz,
inner: Internal {
stack: StackBuffer {
fill_ptr: 0,
buf_ptr,
}
},
_stack: PhantomData
}
}
}

@ -19,8 +19,10 @@ use std::{
},
slice,
ffi::c_void,
ptr,
};
//TODO: pub mod avec; pub use avec::AVec;
mod ffi;
/// Allocate a runtime length uninitialised byte buffer on the stack, call `callback` with this buffer, and then deallocate the buffer.
@ -100,87 +102,20 @@ where F: FnOnce(&mut [MaybeUninit<u8>]) -> T
}
#[cfg(test)]
mod tests {
#[test]
#[should_panic]
fn unwinding_over_boundary()
{
super::alloca(120, |_buf| panic!());
}
#[test]
fn with_alloca()
{
use std::mem::MaybeUninit;
const SIZE: usize = 128;
let sum = super::alloca(SIZE, |buf| {
println!("Buffer size is {}", buf.len());
for (i, x) in (1..).zip(buf.iter_mut()) {
*x = MaybeUninit::new(i as u8);
}
eprintln!("Buffer is now {:?}", unsafe { std::mem::transmute::<_, & &mut [u8]>(&buf) });
buf.iter().map(|x| unsafe { x.assume_init() } as u64).sum::<u64>()
});
assert_eq!(sum, (1..=SIZE).sum::<usize>() as u64);
}
#[test]
fn raw_trampoline()
{
use std::ffi::c_void;
let size: usize = 100;
let output = {
let mut size: usize = size;
extern "C" fn callback(ptr: *mut c_void, data: *mut c_void)
{
let size = unsafe {&mut *(data as *mut usize)};
let slice = unsafe {
std::ptr::write_bytes(ptr, 0, *size);
std::slice::from_raw_parts_mut(ptr as *mut u8, *size)
};
println!("From callback! Size is {}", slice.len());
for (i, x) in (0..).zip(slice.iter_mut())
{
*x = i as u8;
}
*size = slice.iter().map(|&x| x as usize).sum::<usize>();
}
unsafe {
super::ffi::alloca_trampoline(size, callback, &mut size as *mut usize as *mut _);
}
size
};
assert_eq!(output, (0..size).sum::<usize>());
}
#[cfg(nightly)]
mod bench
{
const SIZE: usize = 1024;
use test::{black_box, Bencher};
use std::mem::MaybeUninit;
#[bench]
fn vec_of_uninit_bytes_known(b: &mut Bencher)
{
b.iter(|| {
black_box(vec![MaybeUninit::<u8>::uninit(); SIZE]);
})
}
#[bench]
fn stackalloc_of_uninit_bytes_known(b: &mut Bencher)
{
b.iter(|| {
black_box(crate::alloca(SIZE, |b| {black_box(b);}));
/// Allocate a runtime length zeroed byte buffer on the stack, call `callback` with this buffer, and then deallocate the buffer.
///
/// See `alloca()`.
#[inline] pub fn alloca_zeroed<T, F>(size: usize, callback: F) -> T
where F: FnOnce(&mut [u8]) -> T
{
alloca(size, move |buf| {
// SAFETY: We zero-initialise the backing slice
callback(unsafe {
ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len()); // buf.fill(MaybeUninit::zeroed());
&mut *(buf as *mut [MaybeUninit<u8>] as *mut [u8]) // MaybeUninit::slice_assume_init_mut()
})
}
}
})
}
#[cfg(test)]
mod tests;

@ -0,0 +1,146 @@
//! Contains tests and benchmarks
#[test]
#[should_panic]
fn unwinding_over_boundary()
{
super::alloca(120, |_buf| panic!());
}
#[test]
fn with_alloca()
{
use std::mem::MaybeUninit;
const SIZE: usize = 128;
let sum = super::alloca(SIZE, |buf| {
println!("Buffer size is {}", buf.len());
for (i, x) in (1..).zip(buf.iter_mut()) {
*x = MaybeUninit::new(i as u8);
}
eprintln!("Buffer is now {:?}", unsafe { std::mem::transmute::<_, & &mut [u8]>(&buf) });
buf.iter().map(|x| unsafe { x.assume_init() } as u64).sum::<u64>()
});
assert_eq!(sum, (1..=SIZE).sum::<usize>() as u64);
}
#[test]
fn raw_trampoline()
{
use std::ffi::c_void;
let size: usize = 100;
let output = {
let mut size: usize = size;
extern "C" fn callback(ptr: *mut c_void, data: *mut c_void)
{
let size = unsafe {&mut *(data as *mut usize)};
let slice = unsafe {
std::ptr::write_bytes(ptr, 0, *size);
std::slice::from_raw_parts_mut(ptr as *mut u8, *size)
};
println!("From callback! Size is {}", slice.len());
for (i, x) in (0..).zip(slice.iter_mut())
{
*x = i as u8;
}
*size = slice.iter().map(|&x| x as usize).sum::<usize>();
}
unsafe {
super::ffi::alloca_trampoline(size, callback, &mut size as *mut usize as *mut _);
}
size
};
assert_eq!(output, (0..size).sum::<usize>());
}
#[cfg(nightly)]
mod bench
{
const SIZE: usize = 1024;
use test::{black_box, Bencher};
use std::mem::MaybeUninit;
use lazy_static::lazy_static;
lazy_static! {
static ref SIZE_RANDOM: usize = {
use std::time;
let base = time::SystemTime::now().duration_since(time::UNIX_EPOCH).unwrap().as_millis() as u64;
((base & 300) + 1024) as usize
};
}
#[bench]
fn vec_of_uninit_bytes_unknown(b: &mut Bencher)
{
let size = *SIZE_RANDOM;
b.iter(|| {
black_box(vec![MaybeUninit::<u8>::uninit(); size]);
})
}
#[bench]
fn stackalloc_of_uninit_bytes_unknown(b: &mut Bencher)
{
let size = *SIZE_RANDOM;
b.iter(|| {
black_box(crate::alloca(size, |b| {black_box(b);}));
})
}
#[bench]
fn stackalloc_of_zeroed_bytes_unknown(b: &mut Bencher)
{
let size = *SIZE_RANDOM;
b.iter(|| {
black_box(crate::alloca_zeroed(size, |b| {black_box(b);}));
})
}
#[bench]
fn vec_of_zeroed_bytes_unknown(b: &mut Bencher)
{
let size = *SIZE_RANDOM;
b.iter(|| {
black_box(vec![0u8; size]);
})
}
#[bench]
fn vec_of_zeroed_bytes_known(b: &mut Bencher)
{
b.iter(|| {
black_box(vec![0u8; SIZE]);
})
}
#[bench]
fn vec_of_uninit_bytes_known(b: &mut Bencher)
{
b.iter(|| {
black_box(vec![MaybeUninit::<u8>::uninit(); SIZE]);
})
}
#[bench]
fn stackalloc_of_uninit_bytes_known(b: &mut Bencher)
{
b.iter(|| {
black_box(crate::alloca(SIZE, |b| {black_box(b);}));
})
}
#[bench]
fn stackalloc_of_zeroed_bytes_known(b: &mut Bencher)
{
b.iter(|| {
black_box(crate::alloca_zeroed(SIZE, |b| {black_box(b);}));
})
}
}
Loading…
Cancel
Save