Browse Source

documented

master
Avril 2 years ago
parent
commit
e4a11ee8f8
Signed by: flanchan
GPG Key ID: 284488987C31F630
  1. 1
      .#README.md
  2. 3
      Cargo.toml
  3. 32
      README.md
  4. 4
      src/defaults.rs
  5. 26
      src/entry.rs
  6. 39
      src/init.rs
  7. 12
      src/iter.rs
  8. 215
      src/lib.rs
  9. 2
      src/tests.rs

1
.#README.md

@ -0,0 +1 @@
avril@eientei.889:1600549970

3
Cargo.toml

@ -2,7 +2,8 @@
name = "smallmap"
description = "Small byte-sized generic key-value map type"
keywords = ["map", "table", "small", "key", "value"]
version = "0.1.0"
version = "1.0.0"
authors = ["Avril <flanchan@cumallover.me>"]
edition = "2018"

32
README.md

@ -1,13 +1,28 @@
# smallmap
A small byte sized table map. (Currently *requires* nightly).
A small table map using single byte key indecies. Designed for maps with tiny keys.
Pages are stored as 256 entry key-value arrays which are indexed by the byte key index. The key is compared for collision check and on collision the next page is checked or inserted if needed.
`smallmap` does not ever need to allocate more than 1 page for types which all invariants can be represented as unique bytes.
## Use cases
Designed for instances where you want a small map with relatively trivial keys (e.g. primitive type).
Performance greately outpaces hash-based maps in these cases.
Performance can greately outpace hash-based by an order of magnitude or more in these cases.
### Maybe use if
* You have small keys
* Your map is not at risk of Denial of Service attacks.
* Your keys will have a lot of collisions
### Don't use if
* You have complex keys
* Denial of service is a concern
* Your map will contain a large volume of entries
* Your keys may have a large number of collisions when represented as `u8`.
# Benchmarks
Some rudamentary benchmarks
Some crude and basic benchmarks
## char
@ -16,18 +31,19 @@ Some rudamentary benchmarks
| `HashMap` | 16 |
| `smallmap::Map` | 7 |
## Iterating a string's chars and incrementing values
## Iterating a string's chars and counting each
| Which | ns/iter |
|-----------------|---------|
| `HashMap` | 65,418 |
| `smallmap::Map` | 9,416 |
| `HashMap` | 8,418 |
| `BTreeMap` | 9,742 |
| `smallmap::Map` | 4,416 |
## u8 (single table)
## u8
| Which | ns/iter |
|-----------------|---------|
| `HashMap` | 15 |
| `smallmap::Map` | 2 |
# License
Dunno yet. Maybe MIT haven't decided...
MIT licensed

4
src/defaults.rs

@ -14,8 +14,4 @@ macro_rules! collapse {
}
collapse!(str);
collapse!(&str);
collapse!(&mut str);
collapse!([u8]);
collapse!(&[u8]);
collapse!(&mut [u8]);

26
src/entry.rs

@ -1,75 +1,93 @@
//! Entry API
//! Map entries.
//!
//! The API is similar to that of `BTreeMap` and `HashMap`'s `Entry` types.
use super::*;
/// Varient of [`Entry`](Entry) that already contains a value.
#[derive(Debug)]
pub struct OccupiedEntry<'a, K, V>(pub(crate) &'a mut Option<(K,V)>);
impl<'a, K, V> OccupiedEntry<'a, K, V>
where K: Collapse
{
/// Get a reference to the value
pub fn get(&self) -> &V
{
&self.0.as_ref().unwrap().1
}
/// Get a mutable reference to the value
pub fn get_mut(&mut self) -> &mut V
{
&mut self.0.as_mut().unwrap().1
}
/// Consume this instance, returning the held mutable reference to the value
pub fn into_mut(self) -> &'a mut V
{
&mut self.0.as_mut().unwrap().1
}
/// A reference to the key
pub fn key(&self) -> &K
{
&self.0.as_ref().unwrap().0
}
/// Replace the held value with another, yielding the old one
pub fn insert(&mut self, value: V) -> V
{
std::mem::replace(&mut self.0.as_mut().unwrap().1, value)
}
/// Remove this entry from the `Map`, yielding the removed value
pub fn remove(self) -> V
{
self.remove_entry().1
}
/// Remove this entry from the `Map`, yielding the removed key-value pair.
pub fn remove_entry(self) -> (K, V)
{
self.0.take().unwrap()
}
}
/// Varient of [`Entry`](Entry) that does not contain a value.
#[derive(Debug)]
pub struct VacantEntry<'a,K,V>(pub(crate) &'a mut Option<(K,V)>, pub(crate) K);
impl<'a, K, V> VacantEntry<'a, K, V>
where K: Collapse
{
/// Insert a value into this empty slot, retuning a mutable reference to the new value.
pub fn insert(self, value: V) -> &'a mut V
{
*self.0 = Some((self.1, value));
&mut self.0.as_mut().unwrap().1
}
/// Consume this instance, returning the held key.
pub fn into_key(self) -> K
{
self.1
}
/// A reference to the held key
pub fn key(&self) -> &K
{
&self.1
}
}
/// Represents a space in a `Map` that may or may not contains a value.
#[derive(Debug)]
pub enum Entry<'a, K, V>
{
/// This entry slot does not yet contain a value
Vacant(VacantEntry<'a, K, V>),
/// This entry slot does contain a value
Occupied(OccupiedEntry<'a, K, V>),
}
impl<'a, K, V> Entry<'a, K, V>
where K: Collapse
{
/// Run this closure on a mutable reference to the internal value if it is present, otherwise do nothing.
pub fn and_modify<F: FnOnce(&mut V)>(mut self, f: F) -> Entry<'a, K, V>
{
if let Self::Occupied(occuped) = &mut self {
@ -77,7 +95,8 @@ where K: Collapse
}
self
}
/// A reference to the key
pub fn key(&self) -> &K
{
match self {
@ -86,6 +105,7 @@ where K: Collapse
}
}
/// Insert into the entry if it is empty the value returned by the closure and return a mutable reference to the new value, otherwise return a mutable reference to the already present value.
pub fn or_insert_with<F: FnOnce() -> V>(self, with: F) -> &'a mut V
{
match self {
@ -94,6 +114,7 @@ where K: Collapse
}
}
/// Insert into the entry this value if it is empty and return a mutable reference to the new value, otherwise return a mutable reference to the already present value.
#[inline] pub fn or_insert(self, value: V) -> &'a mut V
{
self.or_insert_with(|| value)
@ -105,6 +126,7 @@ impl<'a, K, V> Entry<'a, K, V>
where K: Collapse,
V: Default
{
/// Insert into the entry the default value if it is empty and return a mutable reference to the new value, otherwise return a mutable reference to the already present value.
#[inline] pub fn or_default(self) -> &'a mut V
{
self.or_insert_with(Default::default)

39
src/init.rs

@ -0,0 +1,39 @@
//! Blank page
#[inline(always)] pub const fn blank_page<K,V>() -> [Option<(K,V)>; super::MAX]
{
//stable doesn't let us use [None; MAX], so...
[None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,
None,None,None,None,None,None,None,None,]
}

12
src/iter.rs

@ -1,6 +1,7 @@
//! Iterator types for `Map`
use super::*;
/// An iterator over `Page`s
pub struct Pages<'a, K, V>(pub(crate) std::slice::Iter<'a, Page<K,V>>);
impl<'a, K, V> Iterator for Pages<'a,K,V>
@ -16,6 +17,7 @@ impl<'a, K, V> Iterator for Pages<'a,K,V>
}
}
/// A mutable iterator over `Page`s
pub struct PagesMut<'a, K, V>(pub(crate) std::slice::IterMut<'a, Page<K,V>>);
impl<'a, K, V> Iterator for PagesMut<'a,K,V>
@ -34,6 +36,7 @@ impl<'a, K, V> Iterator for PagesMut<'a,K,V>
impl<'a, K, V> ExactSizeIterator for PagesMut<'a,K,V>{}
impl<'a, K, V> std::iter::FusedIterator for PagesMut<'a,K,V>{}
/// An iterator over elements in a `Page`.
pub struct PageElements<'a, K, V>(pub(crate) std::slice::Iter<'a, Option<(K,V)>>);
impl<'a, K, V> Iterator for PageElements<'a,K,V>
@ -50,7 +53,7 @@ impl<'a, K, V> Iterator for PageElements<'a,K,V>
}
impl<'a, K, V> std::iter::FusedIterator for PageElements<'a,K,V>{}
/// A mutable iterator over elements in a `Page`.
pub struct PageElementsMut<'a, K, V>(pub(crate) std::slice::IterMut<'a, Option<(K,V)>>);
impl<'a, K, V> Iterator for PageElementsMut<'a,K,V>
@ -67,6 +70,7 @@ impl<'a, K, V> Iterator for PageElementsMut<'a,K,V>
}
impl<'a, K, V> std::iter::FusedIterator for PageElementsMut<'a,K,V>{}
/// A consuming iterator over elements in a `Page`.
pub struct IntoPageElements<K,V>(pub(crate) [Option<(K,V)>; MAX], pub(crate) usize);
impl<K,V> Iterator for IntoPageElements<K,V>
@ -91,6 +95,7 @@ impl<K,V> Iterator for IntoPageElements<K,V>
}
impl<K, V> std::iter::FusedIterator for IntoPageElements<K,V>{}
/// An iterator over entries in a `Map`.
pub struct Iter<'a, K, V>(pub(crate) Option<PageElements<'a,K,V>>, pub(crate) Pages<'a, K,V>);
impl<'a, K,V> Iterator for Iter<'a, K,V>
@ -117,7 +122,7 @@ where K: Collapse
}
impl<'a, K: Collapse, V> std::iter::FusedIterator for Iter<'a, K,V>{}
/// A mutable iterator over entries in a `Map`.
pub struct IterMut<'a, K, V>(pub(crate) Option<PageElementsMut<'a,K,V>>, pub(crate) PagesMut<'a, K,V>);
impl<'a, K,V> Iterator for IterMut<'a, K,V>
@ -144,8 +149,7 @@ where K: Collapse
}
impl<'a, K: Collapse, V> std::iter::FusedIterator for IterMut<'a, K,V>{}
/// A consuming iterator over entries in a `Map`.
pub struct IntoIter<K, V>(pub(crate) Option<IntoPageElements<K,V>>, pub(crate) std::vec::IntoIter<Page<K,V>>);
impl<K, V> Iterator for IntoIter<K,V>

215
src/lib.rs

@ -1,58 +1,121 @@
#![feature(const_in_array_repeat_expressions)]
#![feature(const_fn)]
#![feature(drain_filter)]
#![cfg_attr(nightly, feature(test))]
//! # smallmap
//! A small table map with a byte sized key index.
//!
//! With a key type which all invariants can be represented as unique bytes, searching this map is a single index dereference.
//! With only a few bytes it is still very efficient.
//!
//! ## Usage
//! The API is a similar subset to `HashMap`, containing the same `insert`, `get`, and `entry` functions:
//!
//! ```
//! # use smallmap::Map;
//! fn max_char(chars: &str) -> (char, usize)
//! {
//! let mut map = Map::new();
//! for x in chars.chars() {
//! *map.entry(x).insert_or(0usize) += 1;
//! }
//!
//! map.into_iter().max_by_key(|(k, v)| v).unwrap_or_default()
//! }
//! ```
//!
//! ## Use cases
//! Designed for instances where you want a small map with small key types.
//! Performance greately outpaces complex hash-based maps in these cases.
//!
//! ### When not to use
//! Generally don't use this if your key would have a lot of collisions being represents in 8 bits, otherwise it might be a faster alternative to hash-based maps. You should check yourself before sticking with this crate instead of `std`'s vectorised map implementations.
#![allow(dead_code)]
#![cfg_attr(nightly, feature(test))]
#![cfg_attr(nightly, feature(drain_filter))]
#![cfg_attr(nightly, feature(const_fn))]
#[cfg(nightly)] extern crate test;
const MAX: usize = 256;
//TODO: Move test
//TODO: Document
//TODO: Readme
//TODO: LICENSE
//TODO: Publish and upload to githubxc
use std::borrow::Borrow;
pub mod iter;
use iter::*;
pub mod entry;
pub use entry::Entry;
use std::{
borrow::Borrow,
};
mod init;
/// Trait for types that can be used as `Map` keys.
///
/// Implementors should try to minimise collisions by making `collapse` return a relatively unique value if possible.
/// But it is not required.
/// Primitive `Eq` types already implement this, as well as `str` and `[u8]`.
/// A simple folding implementation is provided for byte slices here [`collapse_iter()`](collapse_iter).
///
/// Integer types implement this through the modulo of itself over 256, whereas byte slice types implement it through an XOR fold over itself. It doesn't matter though, the programmer is free to implement it how she chooses.
pub trait Collapse: Eq
{
/// Create the index key for this instance. This is similar in use to `Hash::hash()`.
fn collapse(&self) -> u8;
}
/// A single page in a `Map`. Contains up to 256 key-value entries.
#[repr(transparent)]
#[derive(Debug,Clone,PartialEq,Eq,Ord,PartialOrd,Hash)]
#[cfg_attr(nightly, derive(Debug,Clone,PartialEq,Eq,Ord,PartialOrd,Hash))]
pub struct Page<TKey,TValue>([Option<(TKey, TValue)>; MAX]);
#[cfg(not(nightly))] impl<K: Clone, V: Clone> Clone for Page<K,V>
{
fn clone(&self) -> Self
{
#[inline(always)] fn copy_slice<T: Clone>(dst: &mut [T], src: &[T])
{
for (d, s) in dst.iter_mut().zip(src.iter())
{
*d = s.clone()
}
}
let mut new = init::blank_page();
copy_slice(&mut new[..], &self.0[..]);
Self(new)
}
}
impl<K,V> Page<K,V>
where K: Collapse
{
/// Create a new blank page
#[cfg(nightly)]
pub const fn new() -> Self
{
Self([None; MAX])
Self(init::blank_page())
}
/// Create a new blank page
#[cfg(not(nightly))]
pub fn new() -> Self
{
Self(init::blank_page())
}
/// The number of entries currently in this page
///
/// This is a count that iterates over all slots, if possible store it in a temporary instead of re-calling it many times.
pub fn len(&self) -> usize
{
self.0.iter().map(Option::as_ref).filter_map(std::convert::identity).count()
}
/// An iterator over all entries currently in this page
pub fn iter(&self) -> PageElements<'_, K,V>
{
PageElements(self.0.iter())
}
/// A mutable iterator over all entries currently in this page
pub fn iter_mut(&mut self) -> PageElementsMut<'_, K,V>
{
PageElementsMut(self.0.iter_mut())
}
fn search<Q: ?Sized>(&self, key: &Q) -> &Option<(K,V)>
where Q: Collapse
{
@ -76,6 +139,7 @@ where K: Collapse
type Item= (K,V);
type IntoIter = IntoPageElements<K,V>;
/// Consume this `Page` into an iterator of all values currently in it.
fn into_iter(self) -> Self::IntoIter
{
IntoPageElements(self.0, 0)
@ -93,13 +157,17 @@ where K: Collapse
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
/// A small hashtable-like map with byte sized key indecies.
#[cfg_attr(nightly, derive(Debug, Clone, PartialEq, Eq, Hash, Default))]
pub struct Map<TKey, TValue>(Vec<Page<TKey,TValue>>);
pub mod iter;
use iter::*;
pub mod entry;
pub use entry::Entry;
#[cfg(not(nightly))] impl<K: Clone, V: Clone> Clone for Map<K,V>
{
fn clone(&self) -> Self
{
Self(self.0.clone())
}
}
impl<K,V> Map<K,V>
where K: Collapse
@ -129,73 +197,91 @@ where K: Collapse
}
pub fn entry(&mut self, key: K) -> Entry<'_, K, V>
{
if self.0.iter()
.filter(|x| x.search(&key).is_none())
.count() == 0 {
// somehow this is faster than using index, even though here we search twice????? i don't know why but there you go
if let None = self.0.iter()
.filter(|x| x.search(&key).as_ref().and_then(|(k, v)| if k==&key {None} else {Some((k,v))}).is_none())
.next() {
self.new_page();
}//so dumb..... SO dumb
//will need to completely reimplement all entry::* shit to just have mut reference to Map and then usize indecies for location I guess. Fuck this
}
self.fuck_entry(key).unwrap()
}
/// Remove all empty pages from this instance.
pub fn clean(&mut self)
{
#[cfg(nightly)]
self.0.drain_filter(|x| x.len() <1);
#[cfg(not(nightly))]
{
let mut i = 0;
while i != self.0.len() {
if self.0[i].len() <1 {
self.0.remove(i);
} else {
i += 1;
}
}
}
}
/// The number of entries currently in this map
///
/// This is an iterating count over all slots in all current pages, if possible store it in a temporary instead of re-calling it.
pub fn len(&self) -> usize
{
self.pages().map(Page::len).sum()
}
/// The number of pages currently in this map
pub fn num_pages(&self) -> usize
{
self.0.len()
}
/// Consume the instance, returning all pages.
pub fn into_pages(self) -> Vec<Page<K,V>>
{
self.0
}
/// An iterator over all pages
pub fn pages(&self) -> Pages<'_, K, V>
{
iter::Pages(self.0.iter())
}
/// A mutable iterator over all pages
pub fn pages_mut(&mut self) -> PagesMut<'_, K, V>
{
iter::PagesMut(self.0.iter_mut())
}
pub(crate) fn iter_opaque(&self) -> impl Iterator<Item = &(K, V)> + '_
{
self.pages().map(|x| x.iter()).flatten()
}
/// An iterator over all elements in the map
pub fn iter(&self) -> Iter<'_, K, V>
{
Iter(None, self.pages())
}
pub(crate) fn iter_mut_opaque(&mut self) -> impl Iterator<Item = &mut (K, V)> + '_
{
self.pages_mut().map(|x| x.iter_mut()).flatten()
}
/// A mutable iterator over all elements in the map
pub fn iter_mut(&mut self) -> IterMut<'_, K, V>
{
IterMut(None, self.pages_mut())
}
/// Create a new empty `Map`
pub fn new() -> Self
{
Self(vec![Page::new()])
}
/// Create a new empty `Map` with a specific number of pages pre-allocated
pub fn with_capacity(pages: usize) -> Self
{
if pages == 0 {
panic!("Got 0 capacity, this is invalid.");
}
let mut p = Vec::with_capacity(pages);
p.push(Page::new());
Self(p)
}
/// Get a mutable reference of the value corresponding to this key if it is in the map.
pub fn get_mut<Q: ?Sized>(&mut self, key: &Q) -> Option<&mut V>
where K: Borrow<Q>,
Q: Collapse + Eq
@ -212,13 +298,15 @@ where K: Collapse
None
}
/// Search the map for entry corresponding to this key
#[inline] pub fn contains_key<Q: ?Sized>(&self, key: &Q) -> bool
where K: Borrow<Q>,
Q: Collapse + Eq
{
self.get(key).is_some()
}
/// Get a reference of the value corresponding to this key if it is in the map.
pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<&V>
where K: Borrow<Q>,
Q: Collapse + Eq
@ -235,24 +323,7 @@ where K: Collapse
None
}
fn search_mut<Q: ?Sized>(&mut self, key: &Q) -> Option<&mut Option<(K,V)>>
where K: Borrow<Q>,
Q: Collapse + Eq
{
for page in self.0.iter_mut()
{
let se = page.search_mut(key);
match se {
Some((ref ok, _)) if key.eq(ok.borrow()) => {
return Some(se);
},
_ => (),
}
}
None
}
/// Remove the entry corresponding to this key in the map, returning the value if it was present
pub fn remove<Q: ?Sized>(&mut self, key: &Q) -> Option<V>
where K: Borrow<Q>,
Q: Collapse + Eq
@ -270,6 +341,7 @@ where K: Collapse
None
}
/// Insert a new key-value entry into this map, returning the pervious value if it was present
pub fn insert(&mut self, key: K, value: V) -> Option<V>
{
for page in self.0.iter_mut()
@ -298,6 +370,7 @@ impl<K: Collapse, V> IntoIterator for Map<K,V>
type Item= (K,V);
type IntoIter = IntoIter<K,V>;
/// Consume this map into an iterator over all currently inserted entries
fn into_iter(self) -> Self::IntoIter
{
IntoIter(None, self.0.into_iter())
@ -305,8 +378,12 @@ impl<K: Collapse, V> IntoIterator for Map<K,V>
}
/// Helper trait implementing `Collapse` for types that can be represents as a slice of bytes.
///
/// The `collapse` implementation used is a XOR fold over all bytes.
pub trait CollapseMemory: Eq
{
/// Get the memory representation of this instance to be used to key calculations in `Map`.
fn as_memory(&self) -> &[u8];
}
impl<T> Collapse for T
@ -324,11 +401,17 @@ pub use primitives::*;
mod defaults;
pub use defaults::*;
/// Collapse bytes with default XOR fold
pub fn collapse<T: AsRef<[u8]>>(bytes: T) -> u8
#[cfg(test)]
mod tests;
/// Collapse a slice of bytes with an XOR fold
#[inline] pub fn collapse<T: AsRef<[u8]>>(bytes: T) -> u8
{
bytes.as_ref().iter().copied().fold(0, |a, b| a ^ b)
}
#[cfg(test)]
mod tests;
/// Collapse an iterator of bytes with an XOR fold
#[inline] pub fn collapse_iter<T: IntoIterator<Item=u8>>(bytes: T) -> u8
{
bytes.into_iter().fold(0, |a, b| a ^ b)
}

2
src/tests.rs

@ -2,7 +2,6 @@
use super::*;
use std::collections::{
HashMap,
BTreeMap,
};
#[test]
@ -74,6 +73,7 @@ fn type_primitive()
mod benchmarks
{
use super::*;
use std::collections::BTreeMap;
use test::{Bencher, black_box};
macro_rules! map_bench {

Loading…
Cancel
Save