From 1714fea6b8676980b8552222f65c4b43f588fba9 Mon Sep 17 00:00:00 2001 From: Avril Date: Sat, 16 Apr 2022 15:48:50 +0100 Subject: [PATCH] Added `SmallString`: `extend_from_bytes_unchecked()`: append a slice of bytes to the memory, handling re/allocation if needed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for stack-str's current commit: Blessing − 吉 --- src/ext.rs | 13 ++++++ src/small.rs | 112 ++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 116 insertions(+), 9 deletions(-) diff --git a/src/ext.rs b/src/ext.rs index d3fbceb..dfcaa59 100644 --- a/src/ext.rs +++ b/src/ext.rs @@ -1,5 +1,18 @@ //! Extensions +pub trait Tuple2Ext +{ + fn swap(self) -> (U, T); +} + +impl Tuple2Ext for (T, U) +{ + #[inline(always)] + fn swap(self) -> (U, T) { + (self.1, self.0) + } +} + #[inline(always)] pub fn is_utf8_char_boundary(byte: u8) -> bool { diff --git a/src/small.rs b/src/small.rs index b058b34..b102d98 100644 --- a/src/small.rs +++ b/src/small.rs @@ -10,22 +10,29 @@ use std::{ #[repr(C)] struct HeapString { - len: usize, + len: usize, // fill_ptr + // TODO: add this: cap: usize, // actual size of `data` slice data: NonNull, // Box<[u8]> allocated. } +unsafe fn unwrap_boxed_slice(data: Box<[T]>) -> (usize, NonNull) +{ + let len = data.len(); + let data = { + let raw = Box::into_raw(data); + debug_assert!(!raw.is_null(), "Box::into_raw returned null"); + let raw = raw.as_mut().unwrap_unchecked().as_mut_ptr(); + debug_assert!(!raw.is_null(), "raw slice is null"); + NonNull::new_unchecked(raw) + }; + (len, data) +} + impl HeapString { #[inline(always)] pub unsafe fn new_from_bytes(data: Box<[u8]>) -> Self { - let len = data.len(); - let data = { - let raw = Box::into_raw(data); - debug_assert!(!raw.is_null(), "Box::into_raw returned null"); - let raw = raw.as_mut().unwrap_unchecked().as_mut_ptr(); - debug_assert!(!raw.is_null(), "raw slice is null"); - NonNull::new_unchecked(raw) - }; + let (len, data) = unwrap_boxed_slice(data); Self { len, data } @@ -78,6 +85,48 @@ impl HeapString { mem::forget(self); bx } + + /// Allocates `more` more bytes, and extends `len` by `more`. + /// + /// # Returns + /// Pointer to the start of the uninitialised newly allocated memory. + /// + /// # Safety + /// The caller must initialise the memory returned from this function call to `more` bytes. + #[inline(always)] + unsafe fn extend_allocate(&mut self, more: usize) -> NonNull + { + let mut bx: Vec = Box::from_raw(ptr::slice_from_raw_parts_mut(self.data.as_ptr(), self.len)).into(); + bx.reserve_exact(more); + debug_assert_eq!(self.len+more, bx.capacity(), "Bad reserve_exact()"); + bx.set_len(bx.capacity()); + bx.truncate(self.len+more); + + let (len, data) = unwrap_boxed_slice(bx.into_boxed_slice()); + debug_assert_eq!(len, self.len+more, "Bad into_boxed_slice()"); + self.len = len; + self.data = data; + let p = NonNull::new(self.data.as_ptr().sub(more)); + debug_assert!(p.is_some(), "data - more == null"); + p.unwrap_unchecked() + } + + /// Allocates enough space to fit exactly `bytes` more data, updates `self.len`, then copies those bytes into the newly allocated memory. + /// + /// # Returns + /// The newly extended slice of `self`'s data now containing `bytes`. + /// + /// # Safety + /// The caller must guarantee that `bytes` is valid utf8. + #[inline(always)] + unsafe fn extend_from_bytes_unchecked<'a>(&'a mut self, bytes: &[u8]) -> &'a mut [u8] + { + let len = bytes.len(); + let end = self.extend_allocate(len); + ptr::copy_nonoverlapping(bytes.as_ptr(), end.as_ptr(), len); + slice::from_raw_parts_mut(end.as_ptr(), len) + } + //TODO: extend_from_str() } impl From for String @@ -232,6 +281,51 @@ impl SmallString } //TODO: Appending, etc. Moving to heap, etc. + /// Moves data from `inner.stack` to `inner.heap`, returns a reference to the newly allocated `HeapString` + /// + /// # Safety + /// These conditions must be met or calling this is UB: + /// * The active discriminant is `inner.stack`. + /// * After this returns, the active discriminant is recognised as `inner.heap`. + /// * `is_allocated()` must return `true` after the operation that calls this is completed. + #[inline(always)] + unsafe fn shunt_to_heap_unchecked(&mut self) -> &'_ mut HeapString + { + let allocated: Box = self.inner.stack.as_str().into(); + let current = &mut self.inner.heap; + *current = ManuallyDrop::new(HeapString::new(allocated)); + current + } + + /// Extends the memory inside `self` to fit more `bytes`. + /// The data is moved to the heap first if `self.len() + bytes.len() > SIZE`. + /// + /// # Returns + /// A mutable reference to the memory inside `self` now containing the data of `bytes`. + /// + /// # Safety + /// The caller must ensure `bytes` is valid utf-8. + #[inline(always)] + unsafe fn extend_from_bytes_unchecked<'i>(&'i mut self, bytes: &[u8]) -> &'i mut [u8] + { + let len = self.len(); + if bytes.len() + len > SIZE { + return self.shunt_to_heap_unchecked().extend_from_bytes_unchecked(bytes); + } + match self.inner.get_stack_mut() { + Ok(stack) => { + let (end, fp) = (stack.buf_end(), &mut stack.fill_ptr); + ptr::copy_nonoverlapping(bytes.as_ptr(), end, bytes.len()); + let slice = slice::from_raw_parts_mut(end, *fp); + *fp += bytes.len(); + slice + }, + Err(heap) => { + heap.extend_from_bytes_unchecked(bytes) + } + } + } + //TODO: extend_from_str() } impl Borrow for SmallString