From fb769265ca3b32a62310f12f4d9c2921ac961295 Mon Sep 17 00:00:00 2001 From: Avril Date: Fri, 15 Apr 2022 20:33:01 +0100 Subject: [PATCH] Added `SmallString`: Allocates onto heap if string is larger than `SIZE` (like smallvec) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for stack-str's current commit: Middle blessing − 中吉 --- src/lib.rs | 92 ++++++++++++++- src/small.rs | 312 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 403 insertions(+), 1 deletion(-) create mode 100644 src/small.rs diff --git a/src/lib.rs b/src/lib.rs index 5cc47e3..7831868 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,17 +5,51 @@ use std::{ slice, str, ptr, + + fmt, error, + + borrow::Borrow, + ops, }; +/// A `str` that lives entirely on the stack. #[derive(Debug, Clone)] #[cfg_attr(feature="copy", derive(Copy))] +#[repr(C)] // Needed for SmallString pub struct StackString{ fill_ptr: usize, buffer: MaybeUninit<[u8; SIZE]>, } +#[derive(Debug)] +pub struct StrTooLargeError(usize); + +impl error::Error for StrTooLargeError{} +impl fmt::Display for StrTooLargeError +{ + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "string could not fit into {SIZE} bytes: was {} bytes long", self.0) + } +} + +mod small; +pub use small::SmallString; + +//TODO: Document impl StackString { + #[inline] + pub fn try_from_utf8_array(sz: [u8; SIZE]) -> Result + { + let s = std::str::from_utf8(&sz)?.len(); + Ok(Self { + fill_ptr: s, + buffer: MaybeUninit::new(sz), + }) + } + #[inline] pub const fn capacity(&self) -> usize { @@ -161,7 +195,7 @@ impl StackString pub fn try_append_whole_str<'i>(&'i mut self, s: &str) -> Option<&'i mut str> { let av = self.available(); - if s.len() >= av { + if s.len() <= av { let len = self.append_from_str(s); debug_assert_eq!(len, s.len(), "Bad append"); let _ = len; @@ -172,3 +206,59 @@ impl StackString } } } + +impl Borrow for StackString +{ + #[inline] + fn borrow(&self) -> &str { + self.as_str() + } +} + +impl ops::Deref for StackString +{ + type Target = str; + #[inline] + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl std::str::FromStr for StackString +{ + type Err = StrTooLargeError; + #[inline] + fn from_str(s: &str) -> Result { + if s.len() > SIZE { + Err(StrTooLargeError(s.len())) + } else { + let mut o = Self::new(); + if o.try_append_whole_str(s).is_none() { + Err(StrTooLargeError(s.len())) + } else { + Ok(o) + } + } + } +} +impl TryFrom for StackString +{ + type Error = StrTooLargeError; + + #[inline] + fn try_from(from: String) -> Result + { + from.parse() + } +} + +impl TryFrom<[u8; SIZE]> for StackString +{ + type Error = std::str::Utf8Error; + + #[inline] + fn try_from(from: [u8; SIZE]) -> Result + { + Self::try_from_utf8_array(from) + } +} diff --git a/src/small.rs b/src/small.rs new file mode 100644 index 0000000..d171e28 --- /dev/null +++ b/src/small.rs @@ -0,0 +1,312 @@ +//! Stack-allocate a string until a certain point, then, move it to the heap. +use super::*; +use mem::ManuallyDrop; +use std::{ + ptr::NonNull, + ops, +}; + +#[derive(Debug)] +#[repr(C)] +struct HeapString +{ + len: usize, + data: NonNull, // Box<[u8]> allocated. +} + +impl HeapString { + #[inline(always)] + pub unsafe fn new_from_bytes(data: Box<[u8]>) -> Self + { + let len = data.len(); + let data = { + let raw = Box::into_raw(data); + debug_assert!(!raw.is_null(), "Box::into_raw returned null"); + let raw = raw.as_mut().unwrap_unchecked().as_mut_ptr(); + debug_assert!(!raw.is_null(), "raw slice is null"); + NonNull::new_unchecked(raw) + }; + Self { + len, data + } + } + + #[inline(always)] + pub fn new(data: Box) -> Self + { + unsafe { + Self::new_from_bytes(data.into_boxed_bytes()) + } + } + + #[inline(always)] + pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] + { + slice::from_raw_parts_mut(self.data.as_ptr(), self.len) + } + + #[inline(always)] + pub fn as_bytes(&self) -> &[u8] + { + unsafe { + slice::from_raw_parts(self.data.as_ptr() as *const u8, self.len) + } + } + + #[inline(always)] + pub fn as_str(&self) -> &str + { + unsafe { + std::str::from_utf8_unchecked(self.as_bytes()) + } + } + + #[inline(always)] + pub fn into_boxed_str(self) -> Box + { + unsafe { + std::str::from_boxed_utf8_unchecked(self.into_boxed_bytes()) + } + } + + #[inline(always)] + pub fn into_boxed_bytes(self) -> Box<[u8]> + { + let bx = unsafe { + Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len)) + }; + mem::forget(self); + bx + } +} + +impl From for String +{ + #[inline] + fn from(from: HeapString) -> Self + { + from.into_boxed_str().into() + } +} + + +impl From for HeapString +{ + #[inline] + fn from(from: String) -> Self + { + Self::new(from.into_boxed_str()) + } +} + + +impl ops::Drop for HeapString +{ + fn drop(&mut self) { + drop(unsafe { + Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len)) + }); + } +} + +#[repr(C)] +union SmallStringInner +{ + fill_ptr: usize, + stack: ManuallyDrop>, + heap: ManuallyDrop, +} + +impl SmallStringInner +{ + #[inline(always)] + fn is_heap(&self) -> bool + { + (unsafe { self.fill_ptr }) > SIZE + } + #[inline(always)] + fn get_stack_mut(&mut self) -> Result<&'_ mut StackString, &'_ mut HeapString> + { + if self.is_heap() { + unsafe { + Err(&mut self.heap) + } + } else { + unsafe { + Ok(&mut self.stack) + } + } + } + #[inline(always)] + fn get_heap_mut(&mut self) -> Result<&'_ mut HeapString, &'_ mut StackString> + { + if self.is_heap() { + unsafe { + Ok(&mut self.heap) + } + } else { + unsafe { + Err(&mut self.stack) + } + } + } + #[inline(always)] + fn get_stack(&self) -> Result<&'_ StackString, &'_ HeapString> + { + if self.is_heap() { + unsafe { + Err(&self.heap) + } + } else { + unsafe { + Ok(&self.stack) + } + } + } + #[inline(always)] + fn get_heap(&self) -> Result<&'_ HeapString, &'_ StackString> + { + if self.is_heap() { + unsafe { + Ok(&self.heap) + } + } else { + unsafe { + Err(&self.stack) + } + } + } +} + +impl ops::Drop for SmallStringInner +{ + fn drop(&mut self) { + if self.is_heap() { + unsafe { + ManuallyDrop::drop(&mut self.heap); + } + } // StackString does not need dropping. + } +} + +/// A string that may or may not be allocated on the heap +//TODO: impl Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash; etc. +pub struct SmallString +{ + inner: SmallStringInner, +} +unsafe impl Send for SmallString{} +unsafe impl Sync for SmallString{} + +//TODO: Document +impl SmallString +{ + #[inline] + pub const fn new() -> Self + { + Self { + inner: SmallStringInner { + stack: ManuallyDrop::new(StackString::new()) + } + } + } + #[inline] + pub fn as_str(&self) -> &str + { + match self.inner.get_stack() { + Ok(st) => st.as_str(), + Err(he) => he.as_str(), + } + } + + #[inline] + pub fn len(&self) -> usize + { + unsafe { self.inner.fill_ptr } + } + + #[inline] + pub fn is_allocated(&self) -> bool + { + self.inner.is_heap() + } + + //TODO: Appending, etc. Moving to heap, etc. +} + +impl Borrow for SmallString +{ + #[inline] + fn borrow(&self) -> &str { + self.as_str() + } +} + +impl ops::Deref for SmallString +{ + type Target = str; + #[inline] + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl From for SmallString +{ + #[inline] + fn from(from: String) -> Self + { + Self{ inner: if from.len() > SIZE { + SmallStringInner { heap: ManuallyDrop::new(from.into()) } + } else { + SmallStringInner { + stack: ManuallyDrop::new({ + let res = StackString::try_from(from); + debug_assert!(res.is_ok(), "String conversion failed for stack sized string ({}) within bounds {SIZE}", res.unwrap_err().0); + // SAFETY: The precondition of StackString::try_from::() returning `Err` has already been checked. + unsafe { res.unwrap_unchecked() } + })} + }} + } +} + +impl From> for String +{ + #[inline] + fn from(mut from: SmallString) -> Self + { + let res = if from.is_allocated() { + unsafe { + ManuallyDrop::take(&mut from.inner.heap) + }.into() + } else { + unsafe { + from.inner.stack.as_str() + }.into() + }; + // If heap allocated, the memory has already been moved. If not, then drop isn't needed anyway + mem::forget(from.inner); + res + } +} + + +impl std::str::FromStr for SmallString +{ + type Err = std::convert::Infallible; + + #[inline] + fn from_str(s: &str) -> Result { + let inner = if s.len() > SIZE { + SmallStringInner { heap: ManuallyDrop::new(HeapString::new(s.into())) } + } else { + SmallStringInner { stack: ManuallyDrop::new({ + let res = StackString::from_str(s); + debug_assert!(res.is_ok(), "String conversion failed for stack sized string ({}) within bounds {SIZE}", s.len()); + // SAFETY: The precondition of StackString::from_str() returning `Err` has already been checked. + unsafe { res.unwrap_unchecked() } + })} + }; + Ok(Self{inner}) + } +}