You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

651 lines
16 KiB

//! Stack-allocate a string until a certain point, then, move it to the heap.
use super::*;
use mem::ManuallyDrop;
use std::{
ptr::NonNull,
ops,
};
#[derive(Debug)]
#[repr(C)]
struct HeapString
{
len: usize, // fill_ptr
// TODO: add this: cap: usize, // actual size of `data` slice
data: NonNull<u8>, // Box<[u8]> allocated.
}
unsafe fn unwrap_boxed_slice<T>(data: Box<[T]>) -> (usize, NonNull<T>)
{
let len = data.len();
let data = {
let raw = Box::into_raw(data);
debug_assert!(!raw.is_null(), "Box::into_raw returned null");
let raw = raw.as_mut().unwrap_unchecked().as_mut_ptr();
debug_assert!(!raw.is_null(), "raw slice is null");
NonNull::new_unchecked(raw)
};
(len, data)
}
impl HeapString {
#[inline(always)]
pub unsafe fn new_from_bytes(data: Box<[u8]>) -> Self
{
let (len, data) = unwrap_boxed_slice(data);
Self {
len, data
}
}
#[inline(always)]
pub fn new(data: Box<str>) -> Self
{
unsafe {
Self::new_from_bytes(data.into_boxed_bytes())
}
}
#[inline(always)]
pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8]
{
slice::from_raw_parts_mut(self.data.as_ptr(), self.len)
}
#[inline(always)]
pub fn as_bytes(&self) -> &[u8]
{
unsafe {
slice::from_raw_parts(self.data.as_ptr() as *const u8, self.len)
}
}
#[inline(always)]
pub fn as_mut_str(&mut self) -> &mut str
{
unsafe {
std::str::from_utf8_unchecked_mut(self.as_bytes_mut())
}
}
#[inline(always)]
pub fn as_str(&self) -> &str
{
unsafe {
std::str::from_utf8_unchecked(self.as_bytes())
}
}
#[inline(always)]
pub fn into_boxed_str(self) -> Box<str>
{
unsafe {
std::str::from_boxed_utf8_unchecked(self.into_boxed_bytes())
}
}
#[inline(always)]
pub fn into_boxed_bytes(self) -> Box<[u8]>
{
let bx = unsafe {
Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len))
};
mem::forget(self);
bx
}
/// Allocates `more` more bytes, and extends `len` by `more`.
///
/// # Returns
/// Pointer to the start of the uninitialised newly allocated memory.
///
/// # Safety
/// The caller must initialise the memory returned from this function call to `more` bytes.
#[inline(always)]
unsafe fn extend_allocate(&mut self, more: usize) -> NonNull<u8>
{
let mut bx: Vec<u8> = Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len)).into();
bx.reserve_exact(more);
debug_assert_eq!(self.len+more, bx.capacity(), "Bad reserve_exact()");
bx.set_len(bx.capacity());
bx.truncate(self.len+more);
let (len, data) = unwrap_boxed_slice(bx.into_boxed_slice());
debug_assert_eq!(len, self.len+more, "Bad into_boxed_slice()");
self.len = len;
self.data = data;
let p = NonNull::new(self.data.as_ptr().sub(more));
debug_assert!(p.is_some(), "data - more == null");
p.unwrap_unchecked()
}
/// Allocates enough space to fit exactly `bytes` more data, updates `self.len`, then copies those bytes into the newly allocated memory.
///
/// # Returns
/// The newly extended slice of `self`'s data now containing `bytes`.
///
/// # Safety
/// The caller must guarantee that `bytes` is valid utf8.
#[inline(always)]
unsafe fn extend_from_bytes_unchecked<'a>(&'a mut self, bytes: &[u8]) -> &'a mut [u8]
{
let len = bytes.len();
let end = self.extend_allocate(len);
ptr::copy_nonoverlapping(bytes.as_ptr(), end.as_ptr(), len);
slice::from_raw_parts_mut(end.as_ptr(), len)
}
//TODO: extend_from_str()
}
impl From<HeapString> for String
{
#[inline(always)]
fn from(from: HeapString) -> Self
{
from.into_boxed_str().into()
}
}
impl From<String> for HeapString
{
#[inline(always)]
fn from(from: String) -> Self
{
Self::new(from.into_boxed_str())
}
}
impl From<Box<str>> for HeapString
{
#[inline(always)]
fn from(from: Box<str>) -> Self
{
Self::new(from)
}
}
impl From<HeapString> for Box<str>
{
#[inline(always)]
fn from(from: HeapString) -> Self
{
from.into_boxed_str()
}
}
impl From<HeapString> for Box<[u8]>
{
#[inline(always)]
fn from(from: HeapString) -> Self
{
from.into_boxed_bytes()
}
}
impl<'a> From<&'a str> for HeapString
{
#[inline(always)]
fn from(from: &'a str) -> Self
{
Self::new(from.into())
}
}
impl ops::Drop for HeapString
{
fn drop(&mut self) {
drop(unsafe {
Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len))
});
}
}
#[repr(C)]
union SmallStringInner<const SIZE: usize>
{
fill_ptr: usize,
stack: ManuallyDrop<StackString<SIZE>>,
heap: ManuallyDrop<HeapString>,
}
impl<const SIZE: usize> SmallStringInner<SIZE>
{
#[inline(always)]
fn is_heap(&self) -> bool
{
(unsafe { self.fill_ptr }) > SIZE
}
#[inline(always)]
fn get_stack_mut(&mut self) -> Result<&'_ mut StackString<SIZE>, &'_ mut HeapString>
{
if self.is_heap() {
unsafe {
Err(&mut self.heap)
}
} else {
unsafe {
Ok(&mut self.stack)
}
}
}
#[inline(always)]
fn get_heap_mut(&mut self) -> Result<&'_ mut HeapString, &'_ mut StackString<SIZE>>
{
if self.is_heap() {
unsafe {
Ok(&mut self.heap)
}
} else {
unsafe {
Err(&mut self.stack)
}
}
}
#[inline(always)]
fn get_stack(&self) -> Result<&'_ StackString<SIZE>, &'_ HeapString>
{
if self.is_heap() {
unsafe {
Err(&self.heap)
}
} else {
unsafe {
Ok(&self.stack)
}
}
}
#[inline(always)]
fn get_heap(&self) -> Result<&'_ HeapString, &'_ StackString<SIZE>>
{
if self.is_heap() {
unsafe {
Ok(&self.heap)
}
} else {
unsafe {
Err(&self.stack)
}
}
}
}
impl<const SIZE: usize> ops::Drop for SmallStringInner<SIZE>
{
fn drop(&mut self) {
if self.is_heap() {
unsafe {
ManuallyDrop::drop(&mut self.heap);
}
} // StackString does not need dropping.
}
}
/// A string that may or may not be allocated on the heap
//TODO: impl Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash; etc.
pub struct SmallString<const SIZE: usize>
{
inner: SmallStringInner<SIZE>,
}
unsafe impl<const S: usize> Send for SmallString<S>{}
unsafe impl<const S: usize> Sync for SmallString<S>{}
//TODO: Document
impl<const SIZE: usize> SmallString<SIZE>
{
#[inline]
pub const fn new() -> Self
{
Self {
inner: SmallStringInner {
stack: ManuallyDrop::new(StackString::new())
}
}
}
#[inline]
pub fn as_str(&self) -> &str
{
match self.inner.get_stack() {
Ok(st) => st.as_str(),
Err(he) => he.as_str(),
}
}
#[inline]
pub fn len(&self) -> usize
{
unsafe { self.inner.fill_ptr }
}
#[inline]
pub fn is_allocated(&self) -> bool
{
self.inner.is_heap()
}
//TODO: Appending, etc. Moving to heap, etc.
/// Moves data from `inner.stack` to `inner.heap`, returns a reference to the newly allocated `HeapString`
///
/// # Safety
/// These conditions must be met or calling this is UB:
/// * The active discriminant is `inner.stack`.
/// * After this returns, the active discriminant is recognised as `inner.heap`.
/// * `is_allocated()` must return `true` after the operation that calls this is completed.
#[inline(always)]
unsafe fn shunt_to_heap_unchecked(&mut self) -> &'_ mut HeapString
{
let allocated: Box<str> = self.inner.stack.as_str().into();
let current = &mut self.inner.heap;
*current = ManuallyDrop::new(HeapString::new(allocated));
current
}
/// Shunt the stack-allocated to heap along with 1 or more `strs`
///
/// # Safety
/// * The caller must guarantee that the current invariant is `inner.stack`.
/// * the caller must recognise that the invariant after this function returns is `inner.heap`.
/// * the `fill_ptr` will be updated automacitally. The caller must guarantee that is is `> SIZE` when this function returns.
#[inline(always)]
unsafe fn shunt_to_heap_with_unchecked<'i, I>(&'i mut self, strings: I) -> &'i mut HeapString
where I: IntoIterator<Item = &'i str>,
{
let string = {
let mut string: String = self.inner.stack.as_str().into();
string.extend(strings);
string.into_boxed_str()
};
let heap = &mut self.inner.heap;
*heap = ManuallyDrop::new(HeapString::new(string));
heap
}
/// Extends the memory inside `self` to fit more `bytes`.
/// The data is moved to the heap first if `self.len() + bytes.len() > SIZE`.
///
/// # Returns
/// A mutable reference to the memory inside `self` now containing the data of `bytes`.
///
/// # Safety
/// The caller must ensure `bytes` is valid utf-8.
#[inline(always)]
unsafe fn extend_from_bytes_unchecked<'i>(&'i mut self, bytes: &[u8]) -> &'i mut [u8]
{
let len = self.len();
if !self.inner.is_heap() && bytes.len() + len > SIZE {
return self.shunt_to_heap_unchecked().extend_from_bytes_unchecked(bytes);
}
match self.inner.get_stack_mut() {
Ok(stack) => {
let (end, fp) = (stack.buf_end(), &mut stack.fill_ptr);
ptr::copy_nonoverlapping(bytes.as_ptr(), end, bytes.len());
let slice = slice::from_raw_parts_mut(end, *fp);
*fp += bytes.len();
slice
},
Err(heap) => {
heap.extend_from_bytes_unchecked(bytes)
}
}
}
#[inline]
pub fn extend_from_str<'i, 'a: 'i>(&'i mut self, s: &'a str) -> &'i mut str
{
if self.inner.is_heap() {
// Append to heap.
unsafe {
std::str::from_utf8_unchecked_mut((&mut *(self.inner.heap)).extend_from_bytes_unchecked(s.as_bytes()))
}
} else {
// Attempt to append to stack
let appended_stack = (unsafe { &mut self.inner.stack }).append_from_str(s);
if appended_stack != s.len() {
// Shunt to heap, along with the rest of `s`.
let s = &s[appended_stack..];
unsafe {
self.shunt_to_heap_with_unchecked(std::iter::once(s)).as_mut_str()
}
} else {
// Fits in stack, return that.
(unsafe {&mut self.inner.stack}).as_mut_str()
}
}
}
}
impl<const SIZE: usize> Borrow<str> for SmallString<SIZE>
{
#[inline]
fn borrow(&self) -> &str {
self.as_str()
}
}
impl<const SIZE: usize> ops::Deref for SmallString<SIZE>
{
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl<'a, const SIZE: usize> From<&'a str> for SmallString<SIZE>
{
#[inline]
fn from(string: &'a str) -> Self {
if string.len() <= SIZE {
match StackString::<SIZE>::try_from(string) {
Ok(ss) => return ss.into(),
_ => (),
}
}
// Too large, shunt to heap
HeapString::from(string).into()
}
}
impl<const SIZE: usize> From<StackString<SIZE>> for SmallString<SIZE>
{
#[inline]
fn from(from: StackString<SIZE>) -> Self
{
Self {
inner: SmallStringInner {
stack: ManuallyDrop::new(from),
}
}
}
}
impl<const SIZE: usize> From<HeapString> for SmallString<SIZE>
{
#[inline(always)]
fn from(from: HeapString) -> Self
{
Self {
inner: SmallStringInner {
heap: ManuallyDrop::new(from),
}
}
}
}
impl<const SIZE: usize> From<SmallString<SIZE>> for HeapString
{
#[inline(always)]
fn from(mut from: SmallString<SIZE>) -> Self
{
let h = if from.is_allocated() {
unsafe {
ManuallyDrop::take(&mut from.inner.heap)
}
} else {
unsafe {
from.inner.stack.as_str()
}.into()
};
std::mem::forget(from);
h
}
}
impl<const SIZE: usize> From<String> for SmallString<SIZE>
{
#[inline]
fn from(from: String) -> Self
{
Self{ inner: if from.len() > SIZE {
SmallStringInner { heap: ManuallyDrop::new(from.into()) }
} else {
SmallStringInner {
stack: ManuallyDrop::new({
let res = StackString::try_from(from);
debug_assert!(res.is_ok(), "String conversion failed for stack sized string ({}) within bounds {SIZE}", res.unwrap_err().0);
// SAFETY: The precondition of StackString::try_from::<String>() returning `Err` has already been checked.
unsafe { res.unwrap_unchecked() }
})}
}}
}
}
impl<const SIZE: usize> From<SmallString<SIZE>> for String
{
#[inline]
fn from(mut from: SmallString<SIZE>) -> Self
{
let res = if from.is_allocated() {
unsafe {
ManuallyDrop::take(&mut from.inner.heap)
}.into()
} else {
unsafe {
from.inner.stack.as_str()
}.into()
};
// If heap allocated, the memory has already been moved. If not, then drop isn't needed anyway
mem::forget(from.inner);
res
}
}
impl<const SIZE: usize> std::str::FromStr for SmallString<SIZE>
{
type Err = std::convert::Infallible;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
let inner = if s.len() > SIZE {
SmallStringInner { heap: ManuallyDrop::new(HeapString::new(s.into())) }
} else {
SmallStringInner { stack: ManuallyDrop::new({
let res = StackString::from_str(s);
debug_assert!(res.is_ok(), "String conversion failed for stack sized string ({}) within bounds {SIZE}", s.len());
// SAFETY: The precondition of StackString::from_str() returning `Err` has already been checked.
unsafe { res.unwrap_unchecked() }
})}
};
Ok(Self{inner})
}
}
impl<const SIZE: usize> fmt::Display for SmallString<SIZE>
{
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
f.write_str(self.as_str())
}
}
impl<const SIZE: usize, T: AsRef<str>> Extend<T> for SmallString<SIZE>
{
#[inline]
fn extend<U: IntoIterator<Item = T>>(&mut self, iter: U) {
for s in iter {
//XXX: There's gotta be a more efficient way for this, like we have in `shunt_with_unchecked<I>()`.
self.extend_from_str(s.as_ref());
}
}
}
impl<const SIZE: usize> fmt::Write for SmallString<SIZE>
{
#[inline]
fn write_str(&mut self, s: &str) -> fmt::Result {
(self.extend_from_str(s).len() == s.len())
.then(|| ()).ok_or_else(|| fmt::Error::default())
}
#[inline]
fn write_char(&mut self, c: char) -> fmt::Result {
let l = c.len_utf8();
if self.is_allocated() {
let heap = unsafe {&mut self.inner.heap};
let slice = unsafe {
std::slice::from_raw_parts_mut(heap.extend_allocate(l).as_ptr(), l)
};
let _ = c.encode_utf8(slice);
} else if self.len() + l > SIZE {
// Shunt to heap with `c`
let mut buf = [0u8; mem::size_of::<char>()];
unsafe { self.shunt_to_heap_with_unchecked(
std::iter::once_with(|| c.encode_utf8(&mut buf[..])).map(|&mut ref x| x)
);
}
} else {
// Room in stack for `c`
let stack = unsafe{&mut self.inner.stack};
let res = stack.write_char(c);
debug_assert!(res.is_ok(), "failed to append {c} to stack of len {}, when size is {SIZE} and char is only {l}", stack.len());
let _ = res;
}
Ok(())
}
}
const _:() = {
use std::io::{
self,
Write,
};
impl<const SIZE: usize> Write for SmallString<SIZE>
{
#[inline]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let buf = std::str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Ok(self.extend_from_str(buf).len())
}
#[inline]
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
#[inline]
fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
let buf = std::str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
(self.extend_from_str(buf).len() == buf.len())
.then(|| ()).ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "failed to write entire buffer"))
}
}
};
#[cfg(test)]
mod tests
{
use super::*;
#[test]
fn extending()
{
let mut ss: SmallString<40> = "Hello world".into();
}
}