You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
651 lines
16 KiB
651 lines
16 KiB
//! Stack-allocate a string until a certain point, then, move it to the heap.
|
|
use super::*;
|
|
use mem::ManuallyDrop;
|
|
use std::{
|
|
ptr::NonNull,
|
|
ops,
|
|
};
|
|
|
|
#[derive(Debug)]
|
|
#[repr(C)]
|
|
struct HeapString
|
|
{
|
|
len: usize, // fill_ptr
|
|
// TODO: add this: cap: usize, // actual size of `data` slice
|
|
data: NonNull<u8>, // Box<[u8]> allocated.
|
|
}
|
|
|
|
unsafe fn unwrap_boxed_slice<T>(data: Box<[T]>) -> (usize, NonNull<T>)
|
|
{
|
|
let len = data.len();
|
|
let data = {
|
|
let raw = Box::into_raw(data);
|
|
debug_assert!(!raw.is_null(), "Box::into_raw returned null");
|
|
let raw = raw.as_mut().unwrap_unchecked().as_mut_ptr();
|
|
debug_assert!(!raw.is_null(), "raw slice is null");
|
|
NonNull::new_unchecked(raw)
|
|
};
|
|
(len, data)
|
|
}
|
|
|
|
impl HeapString {
|
|
#[inline(always)]
|
|
pub unsafe fn new_from_bytes(data: Box<[u8]>) -> Self
|
|
{
|
|
let (len, data) = unwrap_boxed_slice(data);
|
|
Self {
|
|
len, data
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn new(data: Box<str>) -> Self
|
|
{
|
|
unsafe {
|
|
Self::new_from_bytes(data.into_boxed_bytes())
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8]
|
|
{
|
|
slice::from_raw_parts_mut(self.data.as_ptr(), self.len)
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn as_bytes(&self) -> &[u8]
|
|
{
|
|
unsafe {
|
|
slice::from_raw_parts(self.data.as_ptr() as *const u8, self.len)
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn as_mut_str(&mut self) -> &mut str
|
|
{
|
|
unsafe {
|
|
std::str::from_utf8_unchecked_mut(self.as_bytes_mut())
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
pub fn as_str(&self) -> &str
|
|
{
|
|
unsafe {
|
|
std::str::from_utf8_unchecked(self.as_bytes())
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn into_boxed_str(self) -> Box<str>
|
|
{
|
|
unsafe {
|
|
std::str::from_boxed_utf8_unchecked(self.into_boxed_bytes())
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn into_boxed_bytes(self) -> Box<[u8]>
|
|
{
|
|
let bx = unsafe {
|
|
Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len))
|
|
};
|
|
mem::forget(self);
|
|
bx
|
|
}
|
|
|
|
/// Allocates `more` more bytes, and extends `len` by `more`.
|
|
///
|
|
/// # Returns
|
|
/// Pointer to the start of the uninitialised newly allocated memory.
|
|
///
|
|
/// # Safety
|
|
/// The caller must initialise the memory returned from this function call to `more` bytes.
|
|
#[inline(always)]
|
|
unsafe fn extend_allocate(&mut self, more: usize) -> NonNull<u8>
|
|
{
|
|
let mut bx: Vec<u8> = Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len)).into();
|
|
bx.reserve_exact(more);
|
|
debug_assert_eq!(self.len+more, bx.capacity(), "Bad reserve_exact()");
|
|
bx.set_len(bx.capacity());
|
|
bx.truncate(self.len+more);
|
|
|
|
let (len, data) = unwrap_boxed_slice(bx.into_boxed_slice());
|
|
debug_assert_eq!(len, self.len+more, "Bad into_boxed_slice()");
|
|
self.len = len;
|
|
self.data = data;
|
|
let p = NonNull::new(self.data.as_ptr().sub(more));
|
|
debug_assert!(p.is_some(), "data - more == null");
|
|
p.unwrap_unchecked()
|
|
}
|
|
|
|
/// Allocates enough space to fit exactly `bytes` more data, updates `self.len`, then copies those bytes into the newly allocated memory.
|
|
///
|
|
/// # Returns
|
|
/// The newly extended slice of `self`'s data now containing `bytes`.
|
|
///
|
|
/// # Safety
|
|
/// The caller must guarantee that `bytes` is valid utf8.
|
|
#[inline(always)]
|
|
unsafe fn extend_from_bytes_unchecked<'a>(&'a mut self, bytes: &[u8]) -> &'a mut [u8]
|
|
{
|
|
let len = bytes.len();
|
|
let end = self.extend_allocate(len);
|
|
ptr::copy_nonoverlapping(bytes.as_ptr(), end.as_ptr(), len);
|
|
slice::from_raw_parts_mut(end.as_ptr(), len)
|
|
}
|
|
//TODO: extend_from_str()
|
|
}
|
|
|
|
impl From<HeapString> for String
|
|
{
|
|
#[inline(always)]
|
|
fn from(from: HeapString) -> Self
|
|
{
|
|
from.into_boxed_str().into()
|
|
}
|
|
}
|
|
|
|
|
|
impl From<String> for HeapString
|
|
{
|
|
#[inline(always)]
|
|
fn from(from: String) -> Self
|
|
{
|
|
Self::new(from.into_boxed_str())
|
|
}
|
|
}
|
|
|
|
impl From<Box<str>> for HeapString
|
|
{
|
|
#[inline(always)]
|
|
fn from(from: Box<str>) -> Self
|
|
{
|
|
Self::new(from)
|
|
}
|
|
}
|
|
|
|
impl From<HeapString> for Box<str>
|
|
{
|
|
#[inline(always)]
|
|
fn from(from: HeapString) -> Self
|
|
{
|
|
from.into_boxed_str()
|
|
}
|
|
}
|
|
|
|
|
|
impl From<HeapString> for Box<[u8]>
|
|
{
|
|
#[inline(always)]
|
|
fn from(from: HeapString) -> Self
|
|
{
|
|
from.into_boxed_bytes()
|
|
}
|
|
}
|
|
|
|
impl<'a> From<&'a str> for HeapString
|
|
{
|
|
#[inline(always)]
|
|
fn from(from: &'a str) -> Self
|
|
{
|
|
Self::new(from.into())
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl ops::Drop for HeapString
|
|
{
|
|
fn drop(&mut self) {
|
|
drop(unsafe {
|
|
Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len))
|
|
});
|
|
}
|
|
}
|
|
|
|
#[repr(C)]
|
|
union SmallStringInner<const SIZE: usize>
|
|
{
|
|
fill_ptr: usize,
|
|
stack: ManuallyDrop<StackString<SIZE>>,
|
|
heap: ManuallyDrop<HeapString>,
|
|
}
|
|
|
|
impl<const SIZE: usize> SmallStringInner<SIZE>
|
|
{
|
|
#[inline(always)]
|
|
fn is_heap(&self) -> bool
|
|
{
|
|
(unsafe { self.fill_ptr }) > SIZE
|
|
}
|
|
#[inline(always)]
|
|
fn get_stack_mut(&mut self) -> Result<&'_ mut StackString<SIZE>, &'_ mut HeapString>
|
|
{
|
|
if self.is_heap() {
|
|
unsafe {
|
|
Err(&mut self.heap)
|
|
}
|
|
} else {
|
|
unsafe {
|
|
Ok(&mut self.stack)
|
|
}
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
fn get_heap_mut(&mut self) -> Result<&'_ mut HeapString, &'_ mut StackString<SIZE>>
|
|
{
|
|
if self.is_heap() {
|
|
unsafe {
|
|
Ok(&mut self.heap)
|
|
}
|
|
} else {
|
|
unsafe {
|
|
Err(&mut self.stack)
|
|
}
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
fn get_stack(&self) -> Result<&'_ StackString<SIZE>, &'_ HeapString>
|
|
{
|
|
if self.is_heap() {
|
|
unsafe {
|
|
Err(&self.heap)
|
|
}
|
|
} else {
|
|
unsafe {
|
|
Ok(&self.stack)
|
|
}
|
|
}
|
|
}
|
|
#[inline(always)]
|
|
fn get_heap(&self) -> Result<&'_ HeapString, &'_ StackString<SIZE>>
|
|
{
|
|
if self.is_heap() {
|
|
unsafe {
|
|
Ok(&self.heap)
|
|
}
|
|
} else {
|
|
unsafe {
|
|
Err(&self.stack)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<const SIZE: usize> ops::Drop for SmallStringInner<SIZE>
|
|
{
|
|
fn drop(&mut self) {
|
|
if self.is_heap() {
|
|
unsafe {
|
|
ManuallyDrop::drop(&mut self.heap);
|
|
}
|
|
} // StackString does not need dropping.
|
|
}
|
|
}
|
|
|
|
/// A string that may or may not be allocated on the heap
|
|
//TODO: impl Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash; etc.
|
|
pub struct SmallString<const SIZE: usize>
|
|
{
|
|
inner: SmallStringInner<SIZE>,
|
|
}
|
|
unsafe impl<const S: usize> Send for SmallString<S>{}
|
|
unsafe impl<const S: usize> Sync for SmallString<S>{}
|
|
|
|
//TODO: Document
|
|
impl<const SIZE: usize> SmallString<SIZE>
|
|
{
|
|
#[inline]
|
|
pub const fn new() -> Self
|
|
{
|
|
Self {
|
|
inner: SmallStringInner {
|
|
stack: ManuallyDrop::new(StackString::new())
|
|
}
|
|
}
|
|
}
|
|
#[inline]
|
|
pub fn as_str(&self) -> &str
|
|
{
|
|
match self.inner.get_stack() {
|
|
Ok(st) => st.as_str(),
|
|
Err(he) => he.as_str(),
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub fn len(&self) -> usize
|
|
{
|
|
unsafe { self.inner.fill_ptr }
|
|
}
|
|
|
|
#[inline]
|
|
pub fn is_allocated(&self) -> bool
|
|
{
|
|
self.inner.is_heap()
|
|
}
|
|
|
|
//TODO: Appending, etc. Moving to heap, etc.
|
|
/// Moves data from `inner.stack` to `inner.heap`, returns a reference to the newly allocated `HeapString`
|
|
///
|
|
/// # Safety
|
|
/// These conditions must be met or calling this is UB:
|
|
/// * The active discriminant is `inner.stack`.
|
|
/// * After this returns, the active discriminant is recognised as `inner.heap`.
|
|
/// * `is_allocated()` must return `true` after the operation that calls this is completed.
|
|
#[inline(always)]
|
|
unsafe fn shunt_to_heap_unchecked(&mut self) -> &'_ mut HeapString
|
|
{
|
|
let allocated: Box<str> = self.inner.stack.as_str().into();
|
|
let current = &mut self.inner.heap;
|
|
*current = ManuallyDrop::new(HeapString::new(allocated));
|
|
current
|
|
}
|
|
|
|
/// Shunt the stack-allocated to heap along with 1 or more `strs`
|
|
///
|
|
/// # Safety
|
|
/// * The caller must guarantee that the current invariant is `inner.stack`.
|
|
/// * the caller must recognise that the invariant after this function returns is `inner.heap`.
|
|
/// * the `fill_ptr` will be updated automacitally. The caller must guarantee that is is `> SIZE` when this function returns.
|
|
#[inline(always)]
|
|
unsafe fn shunt_to_heap_with_unchecked<'i, I>(&'i mut self, strings: I) -> &'i mut HeapString
|
|
where I: IntoIterator<Item = &'i str>,
|
|
{
|
|
let string = {
|
|
let mut string: String = self.inner.stack.as_str().into();
|
|
string.extend(strings);
|
|
string.into_boxed_str()
|
|
};
|
|
|
|
let heap = &mut self.inner.heap;
|
|
*heap = ManuallyDrop::new(HeapString::new(string));
|
|
heap
|
|
}
|
|
|
|
/// Extends the memory inside `self` to fit more `bytes`.
|
|
/// The data is moved to the heap first if `self.len() + bytes.len() > SIZE`.
|
|
///
|
|
/// # Returns
|
|
/// A mutable reference to the memory inside `self` now containing the data of `bytes`.
|
|
///
|
|
/// # Safety
|
|
/// The caller must ensure `bytes` is valid utf-8.
|
|
#[inline(always)]
|
|
unsafe fn extend_from_bytes_unchecked<'i>(&'i mut self, bytes: &[u8]) -> &'i mut [u8]
|
|
{
|
|
let len = self.len();
|
|
if !self.inner.is_heap() && bytes.len() + len > SIZE {
|
|
return self.shunt_to_heap_unchecked().extend_from_bytes_unchecked(bytes);
|
|
}
|
|
match self.inner.get_stack_mut() {
|
|
Ok(stack) => {
|
|
let (end, fp) = (stack.buf_end(), &mut stack.fill_ptr);
|
|
ptr::copy_nonoverlapping(bytes.as_ptr(), end, bytes.len());
|
|
let slice = slice::from_raw_parts_mut(end, *fp);
|
|
*fp += bytes.len();
|
|
slice
|
|
},
|
|
Err(heap) => {
|
|
heap.extend_from_bytes_unchecked(bytes)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub fn extend_from_str<'i, 'a: 'i>(&'i mut self, s: &'a str) -> &'i mut str
|
|
{
|
|
if self.inner.is_heap() {
|
|
// Append to heap.
|
|
unsafe {
|
|
std::str::from_utf8_unchecked_mut((&mut *(self.inner.heap)).extend_from_bytes_unchecked(s.as_bytes()))
|
|
}
|
|
} else {
|
|
// Attempt to append to stack
|
|
let appended_stack = (unsafe { &mut self.inner.stack }).append_from_str(s);
|
|
if appended_stack != s.len() {
|
|
// Shunt to heap, along with the rest of `s`.
|
|
let s = &s[appended_stack..];
|
|
unsafe {
|
|
self.shunt_to_heap_with_unchecked(std::iter::once(s)).as_mut_str()
|
|
}
|
|
} else {
|
|
// Fits in stack, return that.
|
|
(unsafe {&mut self.inner.stack}).as_mut_str()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<const SIZE: usize> Borrow<str> for SmallString<SIZE>
|
|
{
|
|
#[inline]
|
|
fn borrow(&self) -> &str {
|
|
self.as_str()
|
|
}
|
|
}
|
|
|
|
impl<const SIZE: usize> ops::Deref for SmallString<SIZE>
|
|
{
|
|
type Target = str;
|
|
#[inline]
|
|
fn deref(&self) -> &Self::Target {
|
|
self.as_str()
|
|
}
|
|
}
|
|
|
|
impl<'a, const SIZE: usize> From<&'a str> for SmallString<SIZE>
|
|
{
|
|
#[inline]
|
|
fn from(string: &'a str) -> Self {
|
|
if string.len() <= SIZE {
|
|
match StackString::<SIZE>::try_from(string) {
|
|
Ok(ss) => return ss.into(),
|
|
_ => (),
|
|
}
|
|
}
|
|
// Too large, shunt to heap
|
|
HeapString::from(string).into()
|
|
}
|
|
}
|
|
|
|
impl<const SIZE: usize> From<StackString<SIZE>> for SmallString<SIZE>
|
|
{
|
|
#[inline]
|
|
fn from(from: StackString<SIZE>) -> Self
|
|
{
|
|
Self {
|
|
inner: SmallStringInner {
|
|
stack: ManuallyDrop::new(from),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<const SIZE: usize> From<HeapString> for SmallString<SIZE>
|
|
{
|
|
#[inline(always)]
|
|
fn from(from: HeapString) -> Self
|
|
{
|
|
Self {
|
|
inner: SmallStringInner {
|
|
heap: ManuallyDrop::new(from),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<const SIZE: usize> From<SmallString<SIZE>> for HeapString
|
|
{
|
|
#[inline(always)]
|
|
fn from(mut from: SmallString<SIZE>) -> Self
|
|
{
|
|
let h = if from.is_allocated() {
|
|
unsafe {
|
|
ManuallyDrop::take(&mut from.inner.heap)
|
|
}
|
|
} else {
|
|
unsafe {
|
|
from.inner.stack.as_str()
|
|
}.into()
|
|
};
|
|
std::mem::forget(from);
|
|
h
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<const SIZE: usize> From<String> for SmallString<SIZE>
|
|
{
|
|
#[inline]
|
|
fn from(from: String) -> Self
|
|
{
|
|
Self{ inner: if from.len() > SIZE {
|
|
SmallStringInner { heap: ManuallyDrop::new(from.into()) }
|
|
} else {
|
|
SmallStringInner {
|
|
stack: ManuallyDrop::new({
|
|
let res = StackString::try_from(from);
|
|
debug_assert!(res.is_ok(), "String conversion failed for stack sized string ({}) within bounds {SIZE}", res.unwrap_err().0);
|
|
// SAFETY: The precondition of StackString::try_from::<String>() returning `Err` has already been checked.
|
|
unsafe { res.unwrap_unchecked() }
|
|
})}
|
|
}}
|
|
}
|
|
}
|
|
|
|
impl<const SIZE: usize> From<SmallString<SIZE>> for String
|
|
{
|
|
#[inline]
|
|
fn from(mut from: SmallString<SIZE>) -> Self
|
|
{
|
|
let res = if from.is_allocated() {
|
|
unsafe {
|
|
ManuallyDrop::take(&mut from.inner.heap)
|
|
}.into()
|
|
} else {
|
|
unsafe {
|
|
from.inner.stack.as_str()
|
|
}.into()
|
|
};
|
|
// If heap allocated, the memory has already been moved. If not, then drop isn't needed anyway
|
|
mem::forget(from.inner);
|
|
res
|
|
}
|
|
}
|
|
|
|
|
|
impl<const SIZE: usize> std::str::FromStr for SmallString<SIZE>
|
|
{
|
|
type Err = std::convert::Infallible;
|
|
|
|
#[inline]
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
let inner = if s.len() > SIZE {
|
|
SmallStringInner { heap: ManuallyDrop::new(HeapString::new(s.into())) }
|
|
} else {
|
|
SmallStringInner { stack: ManuallyDrop::new({
|
|
let res = StackString::from_str(s);
|
|
debug_assert!(res.is_ok(), "String conversion failed for stack sized string ({}) within bounds {SIZE}", s.len());
|
|
// SAFETY: The precondition of StackString::from_str() returning `Err` has already been checked.
|
|
unsafe { res.unwrap_unchecked() }
|
|
})}
|
|
};
|
|
Ok(Self{inner})
|
|
}
|
|
}
|
|
|
|
impl<const SIZE: usize> fmt::Display for SmallString<SIZE>
|
|
{
|
|
#[inline]
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
|
{
|
|
f.write_str(self.as_str())
|
|
}
|
|
}
|
|
|
|
impl<const SIZE: usize, T: AsRef<str>> Extend<T> for SmallString<SIZE>
|
|
{
|
|
#[inline]
|
|
fn extend<U: IntoIterator<Item = T>>(&mut self, iter: U) {
|
|
for s in iter {
|
|
//XXX: There's gotta be a more efficient way for this, like we have in `shunt_with_unchecked<I>()`.
|
|
self.extend_from_str(s.as_ref());
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<const SIZE: usize> fmt::Write for SmallString<SIZE>
|
|
{
|
|
#[inline]
|
|
fn write_str(&mut self, s: &str) -> fmt::Result {
|
|
(self.extend_from_str(s).len() == s.len())
|
|
.then(|| ()).ok_or_else(|| fmt::Error::default())
|
|
}
|
|
#[inline]
|
|
fn write_char(&mut self, c: char) -> fmt::Result {
|
|
let l = c.len_utf8();
|
|
if self.is_allocated() {
|
|
let heap = unsafe {&mut self.inner.heap};
|
|
let slice = unsafe {
|
|
std::slice::from_raw_parts_mut(heap.extend_allocate(l).as_ptr(), l)
|
|
};
|
|
let _ = c.encode_utf8(slice);
|
|
} else if self.len() + l > SIZE {
|
|
// Shunt to heap with `c`
|
|
let mut buf = [0u8; mem::size_of::<char>()];
|
|
|
|
unsafe { self.shunt_to_heap_with_unchecked(
|
|
std::iter::once_with(|| c.encode_utf8(&mut buf[..])).map(|&mut ref x| x)
|
|
);
|
|
}
|
|
} else {
|
|
// Room in stack for `c`
|
|
let stack = unsafe{&mut self.inner.stack};
|
|
let res = stack.write_char(c);
|
|
debug_assert!(res.is_ok(), "failed to append {c} to stack of len {}, when size is {SIZE} and char is only {l}", stack.len());
|
|
let _ = res;
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
const _:() = {
|
|
use std::io::{
|
|
self,
|
|
Write,
|
|
};
|
|
|
|
impl<const SIZE: usize> Write for SmallString<SIZE>
|
|
{
|
|
#[inline]
|
|
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
|
let buf = std::str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
|
|
Ok(self.extend_from_str(buf).len())
|
|
}
|
|
#[inline]
|
|
fn flush(&mut self) -> io::Result<()> {
|
|
Ok(())
|
|
}
|
|
#[inline]
|
|
fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
|
|
let buf = std::str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
|
|
(self.extend_from_str(buf).len() == buf.len())
|
|
.then(|| ()).ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "failed to write entire buffer"))
|
|
}
|
|
}
|
|
};
|
|
|
|
#[cfg(test)]
|
|
mod tests
|
|
{
|
|
use super::*;
|
|
#[test]
|
|
fn extending()
|
|
{
|
|
let mut ss: SmallString<40> = "Hello world".into();
|
|
}
|
|
}
|