//! Extensions use std::{ fmt, error, pin::Pin, task::{Poll,Context,}, ops::{ Range, }, marker::{ PhantomData, }, }; use tokio::{ io::AsyncRead, prelude::*, }; use futures::future::Future; pub trait FindSliceBounds { type SliceType: ?Sized; fn slice_bounds(&self, from: &Self::SliceType) -> Range; } pub trait SliceInPlace { #[deprecated = "Slow. Use `drain_inverse`"] #[inline] fn slice_in_place>(&mut self, slice: R) { self.drain_inverse(slice); } fn drain_inverse>(&mut self, slice: R); } impl SliceInPlace for String { fn slice_in_place>(&mut self, slice: R) { let mut i=0; self.retain(|_| (slice.contains(&i), i+=1).0); } fn drain_inverse>(&mut self, slice: R) { use std::ops::Bound; match slice.end_bound() { Bound::Excluded(&ex) => drop(self.drain(ex..)), Bound::Included(&inc) => drop(self.drain(inc+1..)), _ => (), }; match slice.start_bound() { Bound::Included(&ex) => drop(self.drain(..ex)), Bound::Excluded(&ex) => drop(..ex+1), _ => () }; } } impl SliceInPlace for Vec { fn slice_in_place>(&mut self, slice: R) { let mut i=0; self.retain(|_| (slice.contains(&i), i+=1).0); } fn drain_inverse>(&mut self, slice: R) { use std::ops::Bound; match slice.end_bound() { Bound::Excluded(&ex) => drop(self.drain(ex..)), Bound::Included(&inc) => drop(self.drain(inc+1..)), _ => (), }; match slice.start_bound() { Bound::Included(&ex) => drop(self.drain(..ex)), Bound::Excluded(&ex) => drop(..ex+1), _ => () }; } } impl> FindSliceBounds for T { type SliceType = str; fn slice_bounds(&self, from: &Self::SliceType) -> Range{ let this = self.as_ref(); unsafe { let sptr = from.as_ptr(); let eptr = sptr.add(from.len()); let ssptr = this.as_ptr(); let septr = ssptr.add(this.len()); let sptr = sptr as usize; let ssptr = ssptr as usize; let eptr = eptr as usize; let septr = septr as usize; assert!(sptr >= ssptr && sptr <= septr, "Start index of slice is outside the bounds of self"); assert!(eptr >= ssptr && eptr <= septr, "End index of slice is outside the bounds of self"); (sptr - ssptr)..(eptr - ssptr) } } } #[cfg(test)] mod test_slice_in_place { use test::{Bencher, black_box}; use super::*; #[test] fn slice_in_place_str() { let mut string = String::from(" hello world "); assert_eq!(&string[string.slice_bounds(string.trim())], string.trim()); assert_eq!(&string[string.slice_bounds(string.trim())], "hello world"); string.slice_in_place(string.slice_bounds(string.trim())); assert_eq!(&string[..], "hello world"); let string = String::from("hello world"); assert_eq!(&string[string.slice_bounds(string.trim())], string.trim()); assert_eq!(&string[string.slice_bounds(string.trim())], "hello world"); } #[bench] fn bench_slice_in_place(b: &mut Bencher) { let mut string = String::from("hello ONE TWO world"); b.iter(|| { black_box(string.slice_in_place(7..16)); string.push_str("ONE TWO"); }); } #[bench] fn bench_drain_inverse(b: &mut Bencher) { let mut string = String::from("hello ONE TWO world"); b.iter(|| { black_box(string.drain_inverse(7..16)); string.push_str("ONE TWO"); }); } #[test] fn drain_inverse() { let mut string = String::from("123hello world5678"); string.drain_inverse(3..=13); assert_eq!(&string[..], "hello world"); string.drain_inverse(6..); assert_eq!(&string[..], "world"); } } #[derive(Debug)] pub struct GroupIter>(std::iter::Fuse, Vec, usize, PhantomData); impl>, I: Iterator, T> Iterator for GroupIter { type Item = U; fn next(&mut self) -> Option { if self.1.len() == 0 { // fill for (_, item) in (0..self.2).zip(&mut self.0) { self.1.push(item); } } if self.1.len() == 0 { None } else{ Some(std::mem::replace(&mut self.1, Vec::with_capacity(self.2)).into()) } } fn size_hint(&self) -> (usize, Option) { let (low, high) = self.0.size_hint(); (low / self.2, high.map(|x| (x / self.2) + 1)) //not too sure if this is right... } } impl, T> std::iter::FusedIterator for GroupIter{} pub trait GroupIterExt: Sized { /// Group this iterator to return a constructed `U` of every `n` items. /// /// # Notes /// If there isn't `n` items left in the iterator, then the rest is returned. fn group_into(self, n: usize) -> GroupIter where U: From>; /// Group this iterator to return a `Vec` of every `n` items. /// /// # Notes /// If there isn't `n` items left in the iterator, then the rest is returned. #[inline] fn group(self, n: usize) -> GroupIter> { self.group_into(n) } /// Group this iterator to return a boxed slice of every `n` items. /// /// # Notes /// If there isn't `n` items left in the iterator, then the rest is returned. #[inline] fn group_into_boxed_slice(self, n: usize) -> GroupIter { self.group_into(n) } } impl GroupIterExt<::IntoIter, ::Item> for T { fn group_into(self, every: usize) -> GroupIter<::IntoIter, ::Item, U> where U: From::Item>> { GroupIter(self.into_iter().fuse(), Vec::with_capacity(every), every, PhantomData) } } pub trait JoinStrsExt: Sized { /// Join an iterator of `str` with a seperator fn join(self, with: &str) -> String; } impl JoinStrsExt for I where I: IntoIterator, T: AsRef { /// Join an iterator of `str` with a seperator fn join(self, with: &str) -> String { let mut output = String::new(); let mut first=true; for string in self.into_iter() { if !first { output.push_str(with); } let string = string.as_ref(); output.push_str(string); first=false; } output } } #[derive(Debug, Clone)] pub struct StrChunks<'a, T: ?Sized>(&'a str, usize, PhantomData<&'a T>); impl<'a, T: ?Sized> StrChunks<'a, T> { /// The rest of the string pub fn as_str(&self) -> &'a str { &self.0[..] } /// The number of chars to break at pub fn every(&self) -> usize { self.1 } /// Set the number of chars to break at. /// /// # Note /// Probably don't do this as it modifies the iterators internal state weirdly. But it should be fine generally. pub fn every_mut(&mut self) -> &mut usize { &mut self.1 } } impl<'a, T: ?Sized> Iterator for StrChunks<'a, T> { type Item = &'a str; fn next(&mut self) -> Option { match self.0.char_indices().nth(self.1).map(|x| x.0) { None if self.0.len() > 0 => Some(std::mem::replace(&mut self.0, "")), Some(i) => { let (left, right) = self.0.split_at(i); self.0 = right; Some(left) }, _ => None, } } fn size_hint(&self) -> (usize, Option) { let (low, high) = self.0.chars().size_hint(); (low / self.1, high.map(|x| (x / self.1) + 1)) //not too sure if this is right... } } impl<'a, T: ?Sized> std::iter::FusedIterator for StrChunks<'a, T>{} /// Split a `str` into chunks on char boundaries pub trait ChunkStrsExt { /// Split this str into a chunking iterator every specified number of chars. /// /// If there are not enough chars left in the string, the rest is returned. /// # Note /// This operates on codepoints, not bytes. fn chunk(&self, every: usize) -> StrChunks<'_, Self>; } #[cfg(test)] mod chunk_tests { use super::*; #[test] fn chunk_test() { let string = r"a2eab409c57a829d23139c61ff2d5e479260c96158ebec0ce4d458afb85b76dłこんな僕は生きてるだけ何万人の人が悲しいんで。!?"; assert_eq!(&string.chunk(8).join("")[..], string); for chunk in string.chunk(8) { println!("{}", chunk); } } #[test] fn group_test() { let string = r"a2eab409c57a829d23139c61ff2d5e479260c96158ebec0ce4d458afb85b76dłこんな僕は生きてるだけ何万人の人が悲しいんで。!?"; let astr: String = string.chars().group(8).map(|x| -> String {x.iter().collect()}).collect(); assert_eq!(&astr[..], string); for chunk in string.chars().group(8) { println!("{:?}", chunk); } } } impl+?Sized> ChunkStrsExt for T { fn chunk(&self, every: usize) -> StrChunks<'_, Self> { StrChunks(self.as_ref(), every, PhantomData) } } pub trait SwapTupleExt: Sized { fn swap(self) -> (U,T); } impl SwapTupleExt for (T,U) { #[inline(always)] fn swap(self) -> (U,T) { (self.1, self.0) } } const ASCII_MAP: [char; 256] = [ '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', ]; const fn create_hex_map() -> [(u8, u8); 256] { let mut out = [(0, 0); 256]; const HEX: &[u8; 16] = b"0123456789abcdef"; let mut i = 0usize; while i <= 255 { out[i] = ( HEX[i >> 4], HEX[i & 0xf] ); i+=1; } out } const HEX_MAP: [(u8, u8); 256] = create_hex_map(); pub struct HexStringView<'a, I:?Sized>(&'a I, bool); pub struct HexStringIter<'a, I:?Sized>(std::slice::Iter<'a, u8>, (u8, u8), PhantomData<&'a I>); pub struct HexView<'a, I:?Sized>(&'a I); pub struct AsciiView<'a, I:?Sized>(&'a I); pub struct AsciiIter<'a, I:?Sized>(&'a [u8], PhantomData<&'a I>); const SPLIT_EVERY: usize = 16; impl<'a, I: ?Sized+AsRef<[u8]>> Iterator for AsciiIter<'a, I> { type Item = char; fn next(&mut self) -> Option { match match self.0 { [] => None, [chr, ..] => Some(ASCII_MAP[*chr as usize]), } { x @ Some(_) => { self.0 = &self.0[1..]; x }, _ => None, } } fn size_hint(&self) -> (usize, Option) { (self.0.len(), Some(self.0.len())) } } impl<'a, I: ?Sized+AsRef<[u8]>> ExactSizeIterator for AsciiIter<'a, I>{} impl<'a, I: ?Sized+AsRef<[u8]>> std::iter::FusedIterator for AsciiIter<'a, I>{} impl<'a, I: ?Sized+AsRef<[u8]>> Iterator for HexStringIter<'a, I> { type Item = char; fn next(&mut self) -> Option { match self.1 { ref mut buf @ (0, 0) => { // both are taken if let Some(&byte) = self.0.next() { *buf = HEX_MAP[byte as usize]; } else { return None; } (Some(buf.0 as char),buf.0 = 0).0 }, (0, ref mut second) => { // first is taken (Some(*second as char),*second = 0).0 }, #[cold] (ref mut first, _) => { // neither are taken, usually shouldn't happen (Some(*first as char),*first = 0).0 }, } } fn size_hint(&self) -> (usize, Option) { let sz = self.0.size_hint(); (sz.0 * 2, sz.1.map(|x| x*2)) } } impl<'a, I: ?Sized+AsRef<[u8]>> ExactSizeIterator for HexStringIter<'a, I>{} impl<'a, I: ?Sized+AsRef<[u8]>> std::iter::FusedIterator for HexStringIter<'a, I>{} impl<'a, I: AsRef<[u8]>+?Sized> fmt::Display for AsciiView<'a, I> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for byte in self.0.as_ref().iter().map(|&byte| ASCII_MAP[byte as usize]) { use std::fmt::Write; f.write_char(byte)?; } Ok(()) } } impl<'a, I: AsRef<[u8]>+?Sized> fmt::Display for HexView<'a, I> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use std::iter; let mut abuf = ['\0'; SPLIT_EVERY]; let mut last_n =0 ; for (i, (n, &byte)) in (0..).zip(iter::repeat(0..SPLIT_EVERY).flatten().zip(self.0.as_ref().iter())) { if n== 0 { write!(f,"0x{:016x}\t", i)?; } abuf[n] = ASCII_MAP[byte as usize]; write!(f, "{:02x} ", byte)?; if n==SPLIT_EVERY-1 { write!(f, "\t\t")?; for ch in abuf.iter().filter(|&x| *x!= '\0') { write!(f, "{}", ch)?; } writeln!(f)?; abuf = ['\0'; SPLIT_EVERY]; } last_n = n; } if last_n != SPLIT_EVERY-1 { for _ in 0..(SPLIT_EVERY-last_n) { write!(f, " ")?; } write!(f, "\t\t")?; for ch in abuf.iter().filter(|&x| *x!= '\0') { write!(f, "{}", ch)?; } writeln!(f)?; } Ok(()) } } impl<'a, I: AsRef<[u8]>+?Sized> fmt::Display for HexStringView<'a, I> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if self.1 { let mut iter = self.0.as_ref().iter(); if let Some(byte) = iter.next() { write!(f, "{:02}", byte)?; } else { return Ok(()) } for byte in iter { write!(f, " {:02x}", byte)?; } } else { for byte in self.0.as_ref().iter() { write!(f, "{:02x}", byte)?; } } Ok(()) } } /// Extensions on byte slices to print them nicely pub trait HexStringExt: AsRef<[u8]> { /// An iterator that prints readable ascii of each byte fn iter_ascii(&self) -> AsciiIter<'_, Self>; /// A `Display` implementor that prints ascii of each byte fn fmt_ascii(&self) -> AsciiView<'_, Self>; /// A pretty hex view `Display` implementor of the bytes fn fmt_view(&self) -> HexView<'_, Self>; /// A `Display` implementor that prints the hex of each byte in lowercase fn fmt_hex(&self) -> HexStringView<'_, Self>; /// An iterator over `char`s that yields the hex of each byte /// /// # Notes /// This yields each character one at a time, to get the hex of each byte, chunk it with a window of 2. fn iter_hex(&self) -> HexStringIter<'_, Self>; /// Convenience method for creating a hex string. fn to_hex_string(&self) -> String { let mut string = String::with_capacity(self.as_ref().len()*2); use fmt::Write; write!(&mut string, "{}", self.fmt_hex()).unwrap(); string } /// Convenience method for creating a hex string with each byte broken by a hyphen. fn to_broken_hex_string(&self) -> String { let fmt = HexStringView( self.fmt_hex().0, true ); let mut string = String::with_capacity(self.as_ref().len()*3); use fmt::Write; write!(&mut string, "{}", fmt).unwrap(); string } /// Convenience method for creating a string from `fmt_view()` #[inline] fn to_view_string(&self) -> String { format!("{}", self.fmt_view()) } /// Convenience method for creating a string from `fmt_ascii()` #[inline] fn to_ascii_string(&self) -> String { self.iter_ascii().collect() } } impl+?Sized> HexStringExt for T { fn iter_hex(&self) -> HexStringIter<'_, Self> { HexStringIter(self.as_ref().iter(), (0,0), PhantomData) } fn iter_ascii(&self) -> AsciiIter<'_, Self> { AsciiIter(self.as_ref(), PhantomData) } fn fmt_ascii(&self) -> AsciiView<'_, Self> { AsciiView(&self) } fn fmt_hex(&self) -> HexStringView<'_, Self> { HexStringView(&self, false) } fn fmt_view(&self) -> HexView<'_, Self> { HexView(&self) } } #[pin_project] pub struct ReadAllBytes<'a, T: AsyncRead+Unpin+?Sized>(#[pin] &'a mut T, Option); impl<'a, T: AsyncRead+Unpin+?Sized> Future for ReadAllBytes<'a, T> { type Output = std::io::Result>; fn poll(self: Pin<&mut Self>, ctx: &mut Context) -> Poll { let fut = async move { let this = self.project(); let mut output = Vec::with_capacity(4096*10); let mut input = this.0; let max = *this.1; let mut buffer =[0u8; 4096]; let mut read; while {read = input.read(&mut buffer[..]).await?; read!=0} { output.extend_from_slice(&buffer[..read]); if let Some(max) = max { if output.len() >=max { return Err(std::io::Error::new(std::io::ErrorKind::Other, format!("Attempted to read more than allowed max {} bytes", max))); } } } Ok(output) }; tokio::pin!(fut); fut.poll(ctx) } } pub trait ReadAllBytesExt: AsyncRead+Unpin { /// Attempt to read the whole stream to a new `Vec`. fn read_whole_stream(&mut self, max: Option) -> ReadAllBytes<'_, Self> { ReadAllBytes(self, max) } } impl ReadAllBytesExt for T{} pub trait FromHexExt { fn repl_with_hex>(&mut self, input: U) -> Result<(), HexDecodeError>; } impl+?Sized> FromHexExt for T { fn repl_with_hex>(&mut self, input: U) -> Result<(), HexDecodeError> { let out = self.as_mut(); #[inline] fn val(c: u8, idx: usize) -> Result { match c { b'A'..=b'F' => Ok(c - b'A' + 10), b'a'..=b'f' => Ok(c - b'a' + 10), b'0'..=b'9' => Ok(c - b'0'), _ => Err(HexDecodeError{ chr: c as char, idx, }), } } for (i, (byte, digits)) in (0..).zip(out.iter_mut().zip(input.as_ref().chunks_exact(2))) { *byte = val(digits[0], 2*i)? << 4 | val(digits[1], 2 * i + 1)?; } Ok(()) } } #[derive(Debug)] pub struct HexDecodeError { idx: usize, chr: char, } impl error::Error for HexDecodeError{} impl fmt::Display for HexDecodeError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Invalid hex at index {} (character was {:?})", self.idx, self.chr) } } #[cfg(test)] mod tests { use super::*; fn format() { let bytes = b"hello world one two three \x142!"; panic!("\n{}\n", bytes.fmt_view()); } #[test] fn hex() { const INPUT_HEX: [u8; 32] = hex_literal::hex!("d0a2404173bac722b29282652f2c457b573261e3c8701b908bb0bd3ada3d7f2d"); const INPUT_STR: &str = "d0a2404173bac722b29282652f2c457b573261e3c8701b908bb0bd3ada3d7f2d"; let mut output = [0u8; 32]; output.repl_with_hex(INPUT_STR).expect("Failed!"); assert_eq!(&INPUT_HEX[..], &output[..]); } #[cfg(nightly)] mod benchmarks { use super::*; use test::{Bencher, black_box}; #[bench] fn hex_via_val(b: &mut Bencher) { fn repl_with_hex>(out: &mut [u8], input: U) -> Result<(), HexDecodeError> { #[inline] fn val(c: u8, idx: usize) -> Result { match c { b'A'..=b'F' => Ok(c - b'A' + 10), b'a'..=b'f' => Ok(c - b'a' + 10), b'0'..=b'9' => Ok(c - b'0'), _ => Err(HexDecodeError{ chr: c as char, idx, }), } } for (i, (byte, digits)) in (0..).zip(out.iter_mut().zip(input.as_ref().chunks_exact(2))) { *byte = val(digits[0], 2*i)? << 4 | val(digits[1], 2 * i + 1)?; } Ok(()) } const INPUT_HEX: [u8; 32] = hex_literal::hex!("d0a2404173bac722b29282652f2c457b573261e3c8701b908bb0bd3ada3d7f2d"); const INPUT_STR: &str = "d0a2404173bac722b29282652f2c457b573261e3c8701b908bb0bd3ada3d7f2d"; let mut output = [0u8; 32]; b.iter(|| { black_box(repl_with_hex(&mut output[..], INPUT_STR).unwrap()); }); assert_eq!(&INPUT_HEX[..], &output[..]); } #[bench] fn hex_via_lazy(b: &mut Bencher) { fn repl_with_hex>(out: &mut [u8], input: U) -> Result<(), HexDecodeError> { use smallmap::Map; lazy_static::lazy_static! { static ref MAP: Map = { let mut map = Map::new(); for c in 0..=255u8 { map.insert(c, match c { b'A'..=b'F' => c - b'A' + 10, b'a'..=b'f' => c - b'a' + 10, b'0'..=b'9' => c - b'0', _ => continue, }); } map }; } #[inline(always)] fn val(c: u8, idx: usize) -> Result { MAP.get(&c).copied() .ok_or_else(|| HexDecodeError{idx, chr: c as char}) } for (i, (byte, digits)) in (0..).zip(out.iter_mut().zip(input.as_ref().chunks_exact(2))) { *byte = val(digits[0], 2*i)? << 4 | val(digits[1], 2 * i + 1)?; } Ok(()) } const INPUT_HEX: [u8; 32] = hex_literal::hex!("d0a2404173bac722b29282652f2c457b573261e3c8701b908bb0bd3ada3d7f2d"); const INPUT_STR: &str = "d0a2404173bac722b29282652f2c457b573261e3c8701b908bb0bd3ada3d7f2d"; let mut output = [0u8; 32]; b.iter(|| { black_box(repl_with_hex(&mut output[..], INPUT_STR).unwrap()); }); assert_eq!(&INPUT_HEX[..], &output[..]); } } }