diff --git a/.#README.md b/.#README.md new file mode 120000 index 0000000..22fde22 --- /dev/null +++ b/.#README.md @@ -0,0 +1 @@ +avril@eientei.889:1600549970 \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 861b35d..76380ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,8 @@ name = "smallmap" description = "Small byte-sized generic key-value map type" keywords = ["map", "table", "small", "key", "value"] -version = "0.1.0" + +version = "1.0.0" authors = ["Avril "] edition = "2018" diff --git a/README.md b/README.md index 3aace81..936f1cb 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,28 @@ # smallmap -A small byte sized table map. (Currently *requires* nightly). +A small table map using single byte key indecies. Designed for maps with tiny keys. +Pages are stored as 256 entry key-value arrays which are indexed by the byte key index. The key is compared for collision check and on collision the next page is checked or inserted if needed. +`smallmap` does not ever need to allocate more than 1 page for types which all invariants can be represented as unique bytes. +## Use cases Designed for instances where you want a small map with relatively trivial keys (e.g. primitive type). -Performance greately outpaces hash-based maps in these cases. +Performance can greately outpace hash-based by an order of magnitude or more in these cases. +### Maybe use if + +* You have small keys +* Your map is not at risk of Denial of Service attacks. +* Your keys will have a lot of collisions + +### Don't use if + +* You have complex keys +* Denial of service is a concern +* Your map will contain a large volume of entries +* Your keys may have a large number of collisions when represented as `u8`. # Benchmarks -Some rudamentary benchmarks +Some crude and basic benchmarks ## char @@ -16,18 +31,19 @@ Some rudamentary benchmarks | `HashMap` | 16 | | `smallmap::Map` | 7 | -## Iterating a string's chars and incrementing values +## Iterating a string's chars and counting each | Which | ns/iter | |-----------------|---------| -| `HashMap` | 65,418 | -| `smallmap::Map` | 9,416 | +| `HashMap` | 8,418 | +| `BTreeMap` | 9,742 | +| `smallmap::Map` | 4,416 | -## u8 (single table) +## u8 | Which | ns/iter | |-----------------|---------| | `HashMap` | 15 | | `smallmap::Map` | 2 | # License - Dunno yet. Maybe MIT haven't decided... +MIT licensed diff --git a/src/defaults.rs b/src/defaults.rs index 6323a3b..d15c4cf 100644 --- a/src/defaults.rs +++ b/src/defaults.rs @@ -14,8 +14,4 @@ macro_rules! collapse { } collapse!(str); -collapse!(&str); -collapse!(&mut str); collapse!([u8]); -collapse!(&[u8]); -collapse!(&mut [u8]); diff --git a/src/entry.rs b/src/entry.rs index e5e87b1..459505c 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -1,75 +1,93 @@ -//! Entry API +//! Map entries. +//! +//! The API is similar to that of `BTreeMap` and `HashMap`'s `Entry` types. use super::*; +/// Varient of [`Entry`](Entry) that already contains a value. #[derive(Debug)] pub struct OccupiedEntry<'a, K, V>(pub(crate) &'a mut Option<(K,V)>); impl<'a, K, V> OccupiedEntry<'a, K, V> where K: Collapse { + /// Get a reference to the value pub fn get(&self) -> &V { &self.0.as_ref().unwrap().1 } + /// Get a mutable reference to the value pub fn get_mut(&mut self) -> &mut V { &mut self.0.as_mut().unwrap().1 } + /// Consume this instance, returning the held mutable reference to the value pub fn into_mut(self) -> &'a mut V { &mut self.0.as_mut().unwrap().1 } + /// A reference to the key pub fn key(&self) -> &K { &self.0.as_ref().unwrap().0 } + /// Replace the held value with another, yielding the old one pub fn insert(&mut self, value: V) -> V { std::mem::replace(&mut self.0.as_mut().unwrap().1, value) } + /// Remove this entry from the `Map`, yielding the removed value pub fn remove(self) -> V { self.remove_entry().1 } + /// Remove this entry from the `Map`, yielding the removed key-value pair. pub fn remove_entry(self) -> (K, V) { self.0.take().unwrap() } } +/// Varient of [`Entry`](Entry) that does not contain a value. #[derive(Debug)] pub struct VacantEntry<'a,K,V>(pub(crate) &'a mut Option<(K,V)>, pub(crate) K); impl<'a, K, V> VacantEntry<'a, K, V> where K: Collapse { + /// Insert a value into this empty slot, retuning a mutable reference to the new value. pub fn insert(self, value: V) -> &'a mut V { *self.0 = Some((self.1, value)); &mut self.0.as_mut().unwrap().1 } + /// Consume this instance, returning the held key. pub fn into_key(self) -> K { self.1 } + /// A reference to the held key pub fn key(&self) -> &K { &self.1 } } +/// Represents a space in a `Map` that may or may not contains a value. #[derive(Debug)] pub enum Entry<'a, K, V> { + /// This entry slot does not yet contain a value Vacant(VacantEntry<'a, K, V>), + /// This entry slot does contain a value Occupied(OccupiedEntry<'a, K, V>), } impl<'a, K, V> Entry<'a, K, V> where K: Collapse { + /// Run this closure on a mutable reference to the internal value if it is present, otherwise do nothing. pub fn and_modify(mut self, f: F) -> Entry<'a, K, V> { if let Self::Occupied(occuped) = &mut self { @@ -77,7 +95,8 @@ where K: Collapse } self } - + + /// A reference to the key pub fn key(&self) -> &K { match self { @@ -86,6 +105,7 @@ where K: Collapse } } + /// Insert into the entry if it is empty the value returned by the closure and return a mutable reference to the new value, otherwise return a mutable reference to the already present value. pub fn or_insert_with V>(self, with: F) -> &'a mut V { match self { @@ -94,6 +114,7 @@ where K: Collapse } } + /// Insert into the entry this value if it is empty and return a mutable reference to the new value, otherwise return a mutable reference to the already present value. #[inline] pub fn or_insert(self, value: V) -> &'a mut V { self.or_insert_with(|| value) @@ -105,6 +126,7 @@ impl<'a, K, V> Entry<'a, K, V> where K: Collapse, V: Default { + /// Insert into the entry the default value if it is empty and return a mutable reference to the new value, otherwise return a mutable reference to the already present value. #[inline] pub fn or_default(self) -> &'a mut V { self.or_insert_with(Default::default) diff --git a/src/init.rs b/src/init.rs new file mode 100644 index 0000000..6a8534b --- /dev/null +++ b/src/init.rs @@ -0,0 +1,39 @@ +//! Blank page + + +#[inline(always)] pub const fn blank_page() -> [Option<(K,V)>; super::MAX] +{ + //stable doesn't let us use [None; MAX], so... + [None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None, + None,None,None,None,None,None,None,None,] +} diff --git a/src/iter.rs b/src/iter.rs index f456df3..92c2a5d 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -1,6 +1,7 @@ //! Iterator types for `Map` use super::*; +/// An iterator over `Page`s pub struct Pages<'a, K, V>(pub(crate) std::slice::Iter<'a, Page>); impl<'a, K, V> Iterator for Pages<'a,K,V> @@ -16,6 +17,7 @@ impl<'a, K, V> Iterator for Pages<'a,K,V> } } +/// A mutable iterator over `Page`s pub struct PagesMut<'a, K, V>(pub(crate) std::slice::IterMut<'a, Page>); impl<'a, K, V> Iterator for PagesMut<'a,K,V> @@ -34,6 +36,7 @@ impl<'a, K, V> Iterator for PagesMut<'a,K,V> impl<'a, K, V> ExactSizeIterator for PagesMut<'a,K,V>{} impl<'a, K, V> std::iter::FusedIterator for PagesMut<'a,K,V>{} +/// An iterator over elements in a `Page`. pub struct PageElements<'a, K, V>(pub(crate) std::slice::Iter<'a, Option<(K,V)>>); impl<'a, K, V> Iterator for PageElements<'a,K,V> @@ -50,7 +53,7 @@ impl<'a, K, V> Iterator for PageElements<'a,K,V> } impl<'a, K, V> std::iter::FusedIterator for PageElements<'a,K,V>{} - +/// A mutable iterator over elements in a `Page`. pub struct PageElementsMut<'a, K, V>(pub(crate) std::slice::IterMut<'a, Option<(K,V)>>); impl<'a, K, V> Iterator for PageElementsMut<'a,K,V> @@ -67,6 +70,7 @@ impl<'a, K, V> Iterator for PageElementsMut<'a,K,V> } impl<'a, K, V> std::iter::FusedIterator for PageElementsMut<'a,K,V>{} +/// A consuming iterator over elements in a `Page`. pub struct IntoPageElements(pub(crate) [Option<(K,V)>; MAX], pub(crate) usize); impl Iterator for IntoPageElements @@ -91,6 +95,7 @@ impl Iterator for IntoPageElements } impl std::iter::FusedIterator for IntoPageElements{} +/// An iterator over entries in a `Map`. pub struct Iter<'a, K, V>(pub(crate) Option>, pub(crate) Pages<'a, K,V>); impl<'a, K,V> Iterator for Iter<'a, K,V> @@ -117,7 +122,7 @@ where K: Collapse } impl<'a, K: Collapse, V> std::iter::FusedIterator for Iter<'a, K,V>{} - +/// A mutable iterator over entries in a `Map`. pub struct IterMut<'a, K, V>(pub(crate) Option>, pub(crate) PagesMut<'a, K,V>); impl<'a, K,V> Iterator for IterMut<'a, K,V> @@ -144,8 +149,7 @@ where K: Collapse } impl<'a, K: Collapse, V> std::iter::FusedIterator for IterMut<'a, K,V>{} - - +/// A consuming iterator over entries in a `Map`. pub struct IntoIter(pub(crate) Option>, pub(crate) std::vec::IntoIter>); impl Iterator for IntoIter diff --git a/src/lib.rs b/src/lib.rs index dda0001..420829e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,58 +1,121 @@ -#![feature(const_in_array_repeat_expressions)] -#![feature(const_fn)] -#![feature(drain_filter)] -#![cfg_attr(nightly, feature(test))] - +//! # smallmap +//! A small table map with a byte sized key index. +//! +//! With a key type which all invariants can be represented as unique bytes, searching this map is a single index dereference. +//! With only a few bytes it is still very efficient. +//! +//! ## Usage +//! The API is a similar subset to `HashMap`, containing the same `insert`, `get`, and `entry` functions: +//! +//! ``` +//! # use smallmap::Map; +//! fn max_char(chars: &str) -> (char, usize) +//! { +//! let mut map = Map::new(); +//! for x in chars.chars() { +//! *map.entry(x).insert_or(0usize) += 1; +//! } +//! +//! map.into_iter().max_by_key(|(k, v)| v).unwrap_or_default() +//! } +//! ``` +//! +//! ## Use cases +//! Designed for instances where you want a small map with small key types. +//! Performance greately outpaces complex hash-based maps in these cases. +//! +//! ### When not to use +//! Generally don't use this if your key would have a lot of collisions being represents in 8 bits, otherwise it might be a faster alternative to hash-based maps. You should check yourself before sticking with this crate instead of `std`'s vectorised map implementations. -#![allow(dead_code)] +#![cfg_attr(nightly, feature(test))] +#![cfg_attr(nightly, feature(drain_filter))] +#![cfg_attr(nightly, feature(const_fn))] #[cfg(nightly)] extern crate test; const MAX: usize = 256; -//TODO: Move test -//TODO: Document -//TODO: Readme -//TODO: LICENSE -//TODO: Publish and upload to githubxc +use std::borrow::Borrow; + +pub mod iter; +use iter::*; +pub mod entry; +pub use entry::Entry; -use std::{ - borrow::Borrow, -}; +mod init; +/// Trait for types that can be used as `Map` keys. +/// +/// Implementors should try to minimise collisions by making `collapse` return a relatively unique value if possible. +/// But it is not required. +/// Primitive `Eq` types already implement this, as well as `str` and `[u8]`. +/// A simple folding implementation is provided for byte slices here [`collapse_iter()`](collapse_iter). +/// +/// Integer types implement this through the modulo of itself over 256, whereas byte slice types implement it through an XOR fold over itself. It doesn't matter though, the programmer is free to implement it how she chooses. pub trait Collapse: Eq { + /// Create the index key for this instance. This is similar in use to `Hash::hash()`. fn collapse(&self) -> u8; } +/// A single page in a `Map`. Contains up to 256 key-value entries. #[repr(transparent)] -#[derive(Debug,Clone,PartialEq,Eq,Ord,PartialOrd,Hash)] +#[cfg_attr(nightly, derive(Debug,Clone,PartialEq,Eq,Ord,PartialOrd,Hash))] pub struct Page([Option<(TKey, TValue)>; MAX]); +#[cfg(not(nightly))] impl Clone for Page +{ + fn clone(&self) -> Self + { + #[inline(always)] fn copy_slice(dst: &mut [T], src: &[T]) + { + for (d, s) in dst.iter_mut().zip(src.iter()) + { + *d = s.clone() + } + } + let mut new = init::blank_page(); + copy_slice(&mut new[..], &self.0[..]); + Self(new) + } +} + impl Page where K: Collapse { /// Create a new blank page + #[cfg(nightly)] pub const fn new() -> Self { - Self([None; MAX]) + Self(init::blank_page()) } - + /// Create a new blank page + #[cfg(not(nightly))] + pub fn new() -> Self + { + Self(init::blank_page()) + } + + /// The number of entries currently in this page + /// + /// This is a count that iterates over all slots, if possible store it in a temporary instead of re-calling it many times. pub fn len(&self) -> usize { self.0.iter().map(Option::as_ref).filter_map(std::convert::identity).count() } + /// An iterator over all entries currently in this page pub fn iter(&self) -> PageElements<'_, K,V> { PageElements(self.0.iter()) } - + + /// A mutable iterator over all entries currently in this page pub fn iter_mut(&mut self) -> PageElementsMut<'_, K,V> { PageElementsMut(self.0.iter_mut()) } - + fn search(&self, key: &Q) -> &Option<(K,V)> where Q: Collapse { @@ -76,6 +139,7 @@ where K: Collapse type Item= (K,V); type IntoIter = IntoPageElements; + /// Consume this `Page` into an iterator of all values currently in it. fn into_iter(self) -> Self::IntoIter { IntoPageElements(self.0, 0) @@ -93,13 +157,17 @@ where K: Collapse } } -#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] +/// A small hashtable-like map with byte sized key indecies. +#[cfg_attr(nightly, derive(Debug, Clone, PartialEq, Eq, Hash, Default))] pub struct Map(Vec>); -pub mod iter; -use iter::*; -pub mod entry; -pub use entry::Entry; +#[cfg(not(nightly))] impl Clone for Map +{ + fn clone(&self) -> Self + { + Self(self.0.clone()) + } +} impl Map where K: Collapse @@ -129,73 +197,91 @@ where K: Collapse } pub fn entry(&mut self, key: K) -> Entry<'_, K, V> { - if self.0.iter() - .filter(|x| x.search(&key).is_none()) - .count() == 0 { + // somehow this is faster than using index, even though here we search twice????? i don't know why but there you go + if let None = self.0.iter() + .filter(|x| x.search(&key).as_ref().and_then(|(k, v)| if k==&key {None} else {Some((k,v))}).is_none()) + .next() { self.new_page(); - }//so dumb..... SO dumb - //will need to completely reimplement all entry::* shit to just have mut reference to Map and then usize indecies for location I guess. Fuck this + } self.fuck_entry(key).unwrap() } + /// Remove all empty pages from this instance. pub fn clean(&mut self) { + #[cfg(nightly)] self.0.drain_filter(|x| x.len() <1); + #[cfg(not(nightly))] + { + let mut i = 0; + while i != self.0.len() { + if self.0[i].len() <1 { + self.0.remove(i); + } else { + i += 1; + } + } + } } - + + /// The number of entries currently in this map + /// + /// This is an iterating count over all slots in all current pages, if possible store it in a temporary instead of re-calling it. pub fn len(&self) -> usize { self.pages().map(Page::len).sum() } + /// The number of pages currently in this map pub fn num_pages(&self) -> usize { self.0.len() } + /// Consume the instance, returning all pages. pub fn into_pages(self) -> Vec> { self.0 } + /// An iterator over all pages pub fn pages(&self) -> Pages<'_, K, V> { iter::Pages(self.0.iter()) } - + + /// A mutable iterator over all pages pub fn pages_mut(&mut self) -> PagesMut<'_, K, V> { iter::PagesMut(self.0.iter_mut()) } - pub(crate) fn iter_opaque(&self) -> impl Iterator + '_ - { - self.pages().map(|x| x.iter()).flatten() - } - + /// An iterator over all elements in the map pub fn iter(&self) -> Iter<'_, K, V> { Iter(None, self.pages()) } - - pub(crate) fn iter_mut_opaque(&mut self) -> impl Iterator + '_ - { - self.pages_mut().map(|x| x.iter_mut()).flatten() - } - + + /// A mutable iterator over all elements in the map pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { IterMut(None, self.pages_mut()) } - + + /// Create a new empty `Map` pub fn new() -> Self { Self(vec![Page::new()]) } + /// Create a new empty `Map` with a specific number of pages pre-allocated pub fn with_capacity(pages: usize) -> Self { + if pages == 0 { + panic!("Got 0 capacity, this is invalid."); + } let mut p = Vec::with_capacity(pages); p.push(Page::new()); Self(p) } + /// Get a mutable reference of the value corresponding to this key if it is in the map. pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> where K: Borrow, Q: Collapse + Eq @@ -212,13 +298,15 @@ where K: Collapse None } + /// Search the map for entry corresponding to this key #[inline] pub fn contains_key(&self, key: &Q) -> bool where K: Borrow, Q: Collapse + Eq { self.get(key).is_some() } - + + /// Get a reference of the value corresponding to this key if it is in the map. pub fn get(&self, key: &Q) -> Option<&V> where K: Borrow, Q: Collapse + Eq @@ -235,24 +323,7 @@ where K: Collapse None } - - fn search_mut(&mut self, key: &Q) -> Option<&mut Option<(K,V)>> - where K: Borrow, - Q: Collapse + Eq - { - for page in self.0.iter_mut() - { - let se = page.search_mut(key); - match se { - Some((ref ok, _)) if key.eq(ok.borrow()) => { - return Some(se); - }, - _ => (), - } - } - None - } - + /// Remove the entry corresponding to this key in the map, returning the value if it was present pub fn remove(&mut self, key: &Q) -> Option where K: Borrow, Q: Collapse + Eq @@ -270,6 +341,7 @@ where K: Collapse None } + /// Insert a new key-value entry into this map, returning the pervious value if it was present pub fn insert(&mut self, key: K, value: V) -> Option { for page in self.0.iter_mut() @@ -298,6 +370,7 @@ impl IntoIterator for Map type Item= (K,V); type IntoIter = IntoIter; + /// Consume this map into an iterator over all currently inserted entries fn into_iter(self) -> Self::IntoIter { IntoIter(None, self.0.into_iter()) @@ -305,8 +378,12 @@ impl IntoIterator for Map } +/// Helper trait implementing `Collapse` for types that can be represents as a slice of bytes. +/// +/// The `collapse` implementation used is a XOR fold over all bytes. pub trait CollapseMemory: Eq { + /// Get the memory representation of this instance to be used to key calculations in `Map`. fn as_memory(&self) -> &[u8]; } impl Collapse for T @@ -324,11 +401,17 @@ pub use primitives::*; mod defaults; pub use defaults::*; -/// Collapse bytes with default XOR fold -pub fn collapse>(bytes: T) -> u8 +#[cfg(test)] +mod tests; + +/// Collapse a slice of bytes with an XOR fold +#[inline] pub fn collapse>(bytes: T) -> u8 { bytes.as_ref().iter().copied().fold(0, |a, b| a ^ b) } -#[cfg(test)] -mod tests; +/// Collapse an iterator of bytes with an XOR fold +#[inline] pub fn collapse_iter>(bytes: T) -> u8 +{ + bytes.into_iter().fold(0, |a, b| a ^ b) +} diff --git a/src/tests.rs b/src/tests.rs index cba4c33..6df4130 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -2,7 +2,6 @@ use super::*; use std::collections::{ HashMap, - BTreeMap, }; #[test] @@ -74,6 +73,7 @@ fn type_primitive() mod benchmarks { use super::*; + use std::collections::BTreeMap; use test::{Bencher, black_box}; macro_rules! map_bench {