From 1a40fcb569cfbe0454b113ee9f8e8829a3d7315e Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 14 Apr 2022 00:55:32 +0100 Subject: [PATCH] initial commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added type `StackString`, accessors and some basic UTF-8 string appending. TODO: remove the call to `str::floor_char_boundary()` and reimplement it here: it is an unstable feature. Fortune for stack-str's current commit: Future small blessing − 末小吉 --- .gitignore | 2 + Cargo.toml | 8 +++ src/lib.rs | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 184 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/lib.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4fffb2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..4447373 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "stack-str" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..5cc47e3 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,174 @@ +#![feature(round_char_boundary)] + +use std::{ + mem::{self, MaybeUninit,}, + slice, + str, + ptr, +}; + +#[derive(Debug, Clone)] +#[cfg_attr(feature="copy", derive(Copy))] +pub struct StackString{ + fill_ptr: usize, + buffer: MaybeUninit<[u8; SIZE]>, +} + +impl StackString +{ + #[inline] + pub const fn capacity(&self) -> usize + { + SIZE + } + #[inline] + pub const fn len(&self) -> usize + { + self.fill_ptr + } + #[inline] + pub const fn available(&self) -> usize + { + SIZE - self.fill_ptr + } + + #[inline] + pub const fn new() -> Self + { + Self { + fill_ptr: 0, + buffer: MaybeUninit::uninit(), + } + } + + #[inline(always)] + fn buf_end(&mut self) -> *mut u8 + { + unsafe { + (self.buffer.as_mut_ptr() as *mut u8).add(self.fill_ptr) + } + } + + #[inline(always)] + fn as_raw_buf_mut(&mut self) -> &mut [u8] + { + unsafe { + slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as *mut u8, self.fill_ptr) + } + } + #[inline(always)] + fn as_raw_buf(&self) -> &[u8] + { + unsafe { + slice::from_raw_parts(self.buffer.as_ptr() as *const u8, self.fill_ptr) + } + } + + #[inline] + pub fn as_mut_str(&mut self) -> &mut str + { + unsafe { + std::str::from_utf8_unchecked_mut(self.as_raw_buf_mut()) + } + } + #[inline] + pub fn as_str(&self) -> &str + { + unsafe { + std::str::from_utf8_unchecked(self.as_raw_buf()) + } + } + + /// Append as much of `s` into `self` as possible, return the number of bytes appeneded. + /// + /// This function guarantees: + /// * if `s` cannot fit wholely into `self`, the lowest valid UTF-8 codepoint of `s` that will fit into `self` is used instead + /// # Returns + /// The number of bytes copied from `s`. + #[inline(always)] + pub fn append_from_str(&mut self, s: &str) -> usize + { + let sl = s.len(); + + if self.fill_ptr >= SIZE || sl == 0 { + return 0; + } + + let av = self.available(); + let sl = if sl <= av { + // Can fit whole `str` in + unsafe { + ptr::copy_nonoverlapping(s.as_bytes().as_ptr(), self.buf_end(), sl); + } + sl + } else if s.is_char_boundary(av) { + // Can only fit part in, check if on codepoint boundary + unsafe { + ptr::copy_nonoverlapping(s.as_bytes().as_ptr(), self.buf_end(), av); + } + av + } else { + // Can only fit part in, find the char boundary below `av` and append that. + return self.append_from_str(&s[..s.floor_char_boundary(av)]); //TODO: implement floor_char_boundary() ourselves, and we probably don't need this recursive call. + }; + self.fill_ptr += sl; + sl + } + + /// Append as much of `s` into `self` as possible. + /// + /// This function has the same guarantees as `append_from_str()`. + /// + /// # Returns + /// * `Ok(s)` - if the entire string was appeneded: The part of `self` that now contains `s`. + /// * `Err(s)` - if the entire string was **not** appeneded: The part of `s` that was not copied into `self`. The difference between the returned string and the parameter `s` is how much was copied into `self. + #[inline] + pub fn try_append_from_str<'inner, 'outer>(&'inner mut self, s: &'outer str) -> Result<&'inner mut str, &'outer str> + { + match self.append_from_str(s) { + whole if whole == s.len() => { + let substr = self.fill_ptr - whole; + Ok(&mut self.as_mut_str()[substr..]) + }, + copied /*if copied < s.len()*/ => Err(&s[copied..]), + } + } + + /// Append as much of `s` into `self` as possible. + /// + /// This function has the same guarantees as `append_from_str()`. + /// + /// # Returns + /// A tuple containing the copied part of `s`, and the part of `s` that `self` did not have space for. + #[inline] + pub fn append_from_str_split<'i, 'o>(&'i mut self, s: &'o str) -> (&'i mut str, &'o str) + { + let end = self.fill_ptr; + let written = self.append_from_str(s); + if written == s.len() { + (&mut self.as_mut_str()[end..], "") + } else { + (&mut self.as_mut_str()[end..written], &s[written..]) + } + } + + /// Attempt to append the whole string `s` into `self`. + /// + /// # Returns + /// * `Some(substr)` - If all of `s` fits in to `self`: the substring of `self` that now contains `s`. + /// * `None` - If `s` was too large to fit entirely in to `self` + #[inline] + pub fn try_append_whole_str<'i>(&'i mut self, s: &str) -> Option<&'i mut str> + { + let av = self.available(); + if s.len() >= av { + let len = self.append_from_str(s); + debug_assert_eq!(len, s.len(), "Bad append"); + let _ = len; + + Some(&mut self.as_mut_str()[av..]) + } else { + None + } + } +}