//! Stack-allocate a string until a certain point, then, move it to the heap. use super::*; use mem::ManuallyDrop; use std::{ ptr::NonNull, ops, }; #[derive(Debug)] #[repr(C)] struct HeapString { len: usize, // fill_ptr // TODO: add this: cap: usize, // actual size of `data` slice data: NonNull, // Box<[u8]> allocated. } unsafe fn unwrap_boxed_slice(data: Box<[T]>) -> (usize, NonNull) { let len = data.len(); let data = { let raw = Box::into_raw(data); debug_assert!(!raw.is_null(), "Box::into_raw returned null"); let raw = raw.as_mut().unwrap_unchecked().as_mut_ptr(); debug_assert!(!raw.is_null(), "raw slice is null"); NonNull::new_unchecked(raw) }; (len, data) } impl HeapString { #[inline(always)] pub unsafe fn new_from_bytes(data: Box<[u8]>) -> Self { let (len, data) = unwrap_boxed_slice(data); Self { len, data } } #[inline(always)] pub fn new(data: Box) -> Self { unsafe { Self::new_from_bytes(data.into_boxed_bytes()) } } #[inline(always)] pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] { slice::from_raw_parts_mut(self.data.as_ptr(), self.len) } #[inline(always)] pub fn as_bytes(&self) -> &[u8] { unsafe { slice::from_raw_parts(self.data.as_ptr() as *const u8, self.len) } } #[inline(always)] pub fn as_mut_str(&mut self) -> &mut str { unsafe { std::str::from_utf8_unchecked_mut(self.as_bytes_mut()) } } #[inline(always)] pub fn as_str(&self) -> &str { unsafe { std::str::from_utf8_unchecked(self.as_bytes()) } } #[inline(always)] pub fn into_boxed_str(self) -> Box { unsafe { std::str::from_boxed_utf8_unchecked(self.into_boxed_bytes()) } } #[inline(always)] pub fn into_boxed_bytes(self) -> Box<[u8]> { let bx = unsafe { Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len)) }; mem::forget(self); bx } /// Allocates `more` more bytes, and extends `len` by `more`. /// /// # Returns /// Pointer to the start of the uninitialised newly allocated memory. /// /// # Safety /// The caller must initialise the memory returned from this function call to `more` bytes. #[inline(always)] unsafe fn extend_allocate(&mut self, more: usize) -> NonNull { let mut bx: Vec = Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len)).into(); bx.reserve_exact(more); debug_assert_eq!(self.len+more, bx.capacity(), "Bad reserve_exact()"); bx.set_len(bx.capacity()); bx.truncate(self.len+more); let (len, data) = unwrap_boxed_slice(bx.into_boxed_slice()); debug_assert_eq!(len, self.len+more, "Bad into_boxed_slice()"); self.len = len; self.data = data; let p = NonNull::new(self.data.as_ptr().sub(more)); debug_assert!(p.is_some(), "data - more == null"); p.unwrap_unchecked() } /// Allocates enough space to fit exactly `bytes` more data, updates `self.len`, then copies those bytes into the newly allocated memory. /// /// # Returns /// The newly extended slice of `self`'s data now containing `bytes`. /// /// # Safety /// The caller must guarantee that `bytes` is valid utf8. #[inline(always)] unsafe fn extend_from_bytes_unchecked<'a>(&'a mut self, bytes: &[u8]) -> &'a mut [u8] { let len = bytes.len(); let end = self.extend_allocate(len); ptr::copy_nonoverlapping(bytes.as_ptr(), end.as_ptr(), len); slice::from_raw_parts_mut(end.as_ptr(), len) } //TODO: extend_from_str() } impl From for String { #[inline(always)] fn from(from: HeapString) -> Self { from.into_boxed_str().into() } } impl From for HeapString { #[inline(always)] fn from(from: String) -> Self { Self::new(from.into_boxed_str()) } } impl From> for HeapString { #[inline(always)] fn from(from: Box) -> Self { Self::new(from) } } impl From for Box { #[inline(always)] fn from(from: HeapString) -> Self { from.into_boxed_str() } } impl From for Box<[u8]> { #[inline(always)] fn from(from: HeapString) -> Self { from.into_boxed_bytes() } } impl<'a> From<&'a str> for HeapString { #[inline(always)] fn from(from: &'a str) -> Self { Self::new(from.into()) } } impl ops::Drop for HeapString { fn drop(&mut self) { drop(unsafe { Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len)) }); } } #[repr(C)] union SmallStringInner { fill_ptr: usize, stack: ManuallyDrop>, heap: ManuallyDrop, } impl SmallStringInner { #[inline(always)] fn is_heap(&self) -> bool { (unsafe { self.fill_ptr }) > SIZE } #[inline(always)] fn get_stack_mut(&mut self) -> Result<&'_ mut StackString, &'_ mut HeapString> { if self.is_heap() { unsafe { Err(&mut self.heap) } } else { unsafe { Ok(&mut self.stack) } } } #[inline(always)] fn get_heap_mut(&mut self) -> Result<&'_ mut HeapString, &'_ mut StackString> { if self.is_heap() { unsafe { Ok(&mut self.heap) } } else { unsafe { Err(&mut self.stack) } } } #[inline(always)] fn get_stack(&self) -> Result<&'_ StackString, &'_ HeapString> { if self.is_heap() { unsafe { Err(&self.heap) } } else { unsafe { Ok(&self.stack) } } } #[inline(always)] fn get_heap(&self) -> Result<&'_ HeapString, &'_ StackString> { if self.is_heap() { unsafe { Ok(&self.heap) } } else { unsafe { Err(&self.stack) } } } } impl ops::Drop for SmallStringInner { fn drop(&mut self) { if self.is_heap() { unsafe { ManuallyDrop::drop(&mut self.heap); } } // StackString does not need dropping. } } /// A string that may or may not be allocated on the heap //TODO: impl Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash; etc. pub struct SmallString { inner: SmallStringInner, } unsafe impl Send for SmallString{} unsafe impl Sync for SmallString{} //TODO: Document impl SmallString { #[inline] pub const fn new() -> Self { Self { inner: SmallStringInner { stack: ManuallyDrop::new(StackString::new()) } } } #[inline] pub fn as_str(&self) -> &str { match self.inner.get_stack() { Ok(st) => st.as_str(), Err(he) => he.as_str(), } } #[inline] pub fn len(&self) -> usize { unsafe { self.inner.fill_ptr } } #[inline] pub fn is_allocated(&self) -> bool { self.inner.is_heap() } //TODO: Appending, etc. Moving to heap, etc. /// Moves data from `inner.stack` to `inner.heap`, returns a reference to the newly allocated `HeapString` /// /// # Safety /// These conditions must be met or calling this is UB: /// * The active discriminant is `inner.stack`. /// * After this returns, the active discriminant is recognised as `inner.heap`. /// * `is_allocated()` must return `true` after the operation that calls this is completed. #[inline(always)] unsafe fn shunt_to_heap_unchecked(&mut self) -> &'_ mut HeapString { let allocated: Box = self.inner.stack.as_str().into(); let current = &mut self.inner.heap; *current = ManuallyDrop::new(HeapString::new(allocated)); current } /// Shunt the stack-allocated to heap along with 1 or more `strs` /// /// # Safety /// * The caller must guarantee that the current invariant is `inner.stack`. /// * the caller must recognise that the invariant after this function returns is `inner.heap`. /// * the `fill_ptr` will be updated automacitally. The caller must guarantee that is is `> SIZE` when this function returns. #[inline(always)] unsafe fn shunt_to_heap_with_unchecked<'i, I>(&'i mut self, strings: I) -> &'i mut HeapString where I: IntoIterator, { let string = { let mut string: String = self.inner.stack.as_str().into(); string.extend(strings); string.into_boxed_str() }; let heap = &mut self.inner.heap; *heap = ManuallyDrop::new(HeapString::new(string)); heap } /// Extends the memory inside `self` to fit more `bytes`. /// The data is moved to the heap first if `self.len() + bytes.len() > SIZE`. /// /// # Returns /// A mutable reference to the memory inside `self` now containing the data of `bytes`. /// /// # Safety /// The caller must ensure `bytes` is valid utf-8. #[inline(always)] unsafe fn extend_from_bytes_unchecked<'i>(&'i mut self, bytes: &[u8]) -> &'i mut [u8] { let len = self.len(); if !self.inner.is_heap() && bytes.len() + len > SIZE { return self.shunt_to_heap_unchecked().extend_from_bytes_unchecked(bytes); } match self.inner.get_stack_mut() { Ok(stack) => { let (end, fp) = (stack.buf_end(), &mut stack.fill_ptr); ptr::copy_nonoverlapping(bytes.as_ptr(), end, bytes.len()); let slice = slice::from_raw_parts_mut(end, *fp); *fp += bytes.len(); slice }, Err(heap) => { heap.extend_from_bytes_unchecked(bytes) } } } #[inline] pub fn extend_from_str<'i, 'a: 'i>(&'i mut self, s: &'a str) -> &'i mut str { if self.inner.is_heap() { // Append to heap. unsafe { std::str::from_utf8_unchecked_mut((&mut *(self.inner.heap)).extend_from_bytes_unchecked(s.as_bytes())) } } else { // Attempt to append to stack let appended_stack = (unsafe { &mut self.inner.stack }).append_from_str(s); if appended_stack != s.len() { // Shunt to heap, along with the rest of `s`. let s = &s[appended_stack..]; unsafe { self.shunt_to_heap_with_unchecked(std::iter::once(s)).as_mut_str() } } else { // Fits in stack, return that. (unsafe {&mut self.inner.stack}).as_mut_str() } } } } impl Borrow for SmallString { #[inline] fn borrow(&self) -> &str { self.as_str() } } impl ops::Deref for SmallString { type Target = str; #[inline] fn deref(&self) -> &Self::Target { self.as_str() } } impl<'a, const SIZE: usize> From<&'a str> for SmallString { #[inline] fn from(string: &'a str) -> Self { if string.len() <= SIZE { match StackString::::try_from(string) { Ok(ss) => return ss.into(), _ => (), } } // Too large, shunt to heap HeapString::from(string).into() } } impl From> for SmallString { #[inline] fn from(from: StackString) -> Self { Self { inner: SmallStringInner { stack: ManuallyDrop::new(from), } } } } impl From for SmallString { #[inline(always)] fn from(from: HeapString) -> Self { Self { inner: SmallStringInner { heap: ManuallyDrop::new(from), } } } } impl From> for HeapString { #[inline(always)] fn from(mut from: SmallString) -> Self { let h = if from.is_allocated() { unsafe { ManuallyDrop::take(&mut from.inner.heap) } } else { unsafe { from.inner.stack.as_str() }.into() }; std::mem::forget(from); h } } impl From for SmallString { #[inline] fn from(from: String) -> Self { Self{ inner: if from.len() > SIZE { SmallStringInner { heap: ManuallyDrop::new(from.into()) } } else { SmallStringInner { stack: ManuallyDrop::new({ let res = StackString::try_from(from); debug_assert!(res.is_ok(), "String conversion failed for stack sized string ({}) within bounds {SIZE}", res.unwrap_err().0); // SAFETY: The precondition of StackString::try_from::() returning `Err` has already been checked. unsafe { res.unwrap_unchecked() } })} }} } } impl From> for String { #[inline] fn from(mut from: SmallString) -> Self { let res = if from.is_allocated() { unsafe { ManuallyDrop::take(&mut from.inner.heap) }.into() } else { unsafe { from.inner.stack.as_str() }.into() }; // If heap allocated, the memory has already been moved. If not, then drop isn't needed anyway mem::forget(from.inner); res } } impl std::str::FromStr for SmallString { type Err = std::convert::Infallible; #[inline] fn from_str(s: &str) -> Result { let inner = if s.len() > SIZE { SmallStringInner { heap: ManuallyDrop::new(HeapString::new(s.into())) } } else { SmallStringInner { stack: ManuallyDrop::new({ let res = StackString::from_str(s); debug_assert!(res.is_ok(), "String conversion failed for stack sized string ({}) within bounds {SIZE}", s.len()); // SAFETY: The precondition of StackString::from_str() returning `Err` has already been checked. unsafe { res.unwrap_unchecked() } })} }; Ok(Self{inner}) } } impl fmt::Display for SmallString { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(self.as_str()) } } impl> Extend for SmallString { #[inline] fn extend>(&mut self, iter: U) { for s in iter { //XXX: There's gotta be a more efficient way for this, like we have in `shunt_with_unchecked()`. self.extend_from_str(s.as_ref()); } } } impl fmt::Write for SmallString { #[inline] fn write_str(&mut self, s: &str) -> fmt::Result { (self.extend_from_str(s).len() == s.len()) .then(|| ()).ok_or_else(|| fmt::Error::default()) } #[inline] fn write_char(&mut self, c: char) -> fmt::Result { let l = c.len_utf8(); if self.is_allocated() { let heap = unsafe {&mut self.inner.heap}; let slice = unsafe { std::slice::from_raw_parts_mut(heap.extend_allocate(l).as_ptr(), l) }; let _ = c.encode_utf8(slice); } else if self.len() + l > SIZE { // Shunt to heap with `c` let mut buf = [0u8; mem::size_of::()]; unsafe { self.shunt_to_heap_with_unchecked( std::iter::once_with(|| c.encode_utf8(&mut buf[..])).map(|&mut ref x| x) ); } } else { // Room in stack for `c` let stack = unsafe{&mut self.inner.stack}; let res = stack.write_char(c); debug_assert!(res.is_ok(), "failed to append {c} to stack of len {}, when size is {SIZE} and char is only {l}", stack.len()); let _ = res; } Ok(()) } } const _:() = { use std::io::{ self, Write, }; impl Write for SmallString { #[inline] fn write(&mut self, buf: &[u8]) -> std::io::Result { let buf = std::str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; Ok(self.extend_from_str(buf).len()) } #[inline] fn flush(&mut self) -> io::Result<()> { Ok(()) } #[inline] fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { let buf = std::str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; (self.extend_from_str(buf).len() == buf.len()) .then(|| ()).ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "failed to write entire buffer")) } } }; #[cfg(test)] mod tests { use super::*; #[test] fn extending() { let mut ss: SmallString<40> = "Hello world".into(); } }