added smallmap impl

master
Avril 4 years ago
parent 362d41ef56
commit d1c37b2e17
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -10,9 +10,12 @@ license= "MIT"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
[dependencies] [dependencies]
sha2 = "0.9" sha2 = "0.9"
serde = {version = "1.0", optional = true, features=["derive"]} serde = {version = "1.0", optional = true, features=["derive"]}
smallmap = {version = "1.1", optional = true} # serde here is kinda broken for now
[dev-dependencies] [dev-dependencies]
serde_json = "1.0" serde_json = "1.0"

@ -48,5 +48,26 @@ Even when `Clone` is required to insert into `HashSet`, it can be ~10x faster fo
* The fallibility of potential (albeing extremely unlikely) collisions of the SHA512 algorithm is not a concern * The fallibility of potential (albeing extremely unlikely) collisions of the SHA512 algorithm is not a concern
* You need to insert an unsized type into a `HashSet` * You need to insert an unsized type into a `HashSet`
## Smallmap implementation
With the `smallmap` feature enabled, the `small` module also provides the same API as `HashRefSet` via `SmallRefMap`.
It is backed by `smallmap::Map` instead of `HashSet`, which could potentially have some performance or memory usage impacts, or not.
The hashing algorithm and usage is otherwise identical for now, but this may change.
### Benchmarks of `SmallRefMap`
Comparing with cloning or copying into `smallmap::Map`.
Largely there are the same performance penalties as the above table, with very minor differences.
| Benchmark | Tests | Result |
|--------------------|-----------------------------------------------|-----------------|
| owning_strings | Inserts `String` into `HashSet` by cloning | ~3,096 ns/iter |
| non_owning_strings | Inserts `str` into `HashRefSet` by reference | ~47,302 ns/iter |
| owning_ints | Inserts `u32` into `HashSet` by copy | ~316 ns/iter |
| non_owning_ints | Inserts `&u32` into `HashRefSet` by reference | ~30,046 ns/iter |
Each page of the `SmallRefMap` will consume at least 16kb of memory however.
This may not be very desireable, but is still an available feature.
# License # License
MIT MIT

@ -0,0 +1 @@
avril@eientei.1729:1603041184

@ -43,6 +43,8 @@ use std::{
mod hashing; mod hashing;
#[cfg(feature="smallmap")] pub mod small;
/// The type used to store the hash of each item. /// The type used to store the hash of each item.
/// ///
/// It is a result of the `SHA512` algorithm as a newtype 64 byte array marked with `#[repr(transparent)]`. /// It is a result of the `SHA512` algorithm as a newtype 64 byte array marked with `#[repr(transparent)]`.
@ -120,6 +122,10 @@ impl<T:?Sized + Hash> HashRefSet<T>
PhantomData PhantomData
) )
} }
pub fn into_inner(self) -> HashSet<HashType>
{
self.0
}
/// Create a new `HashRefSet` with a capacity /// Create a new `HashRefSet` with a capacity
pub fn with_capacity(cap: usize) -> Self pub fn with_capacity(cap: usize) -> Self
{ {

@ -0,0 +1,207 @@
//! `smallmap` implementation of a referencing set.
//!
//! Uses the crate `smallmap` to store the hashes and is an optional feature of this crate.
use super::*;
use smallmap::{Collapse, Map};
/// A `smallmap` of referneces to items.
///
/// The usage is the same as `HashRefSet`, except it is backed by `smallmap` instead of a `HashSet`.
#[derive(Debug, Clone, PartialEq, Eq, Default)]
#[cfg_attr(feature="serde", derive(serde::Serialize, serde::Deserialize))]
pub struct SmallRefMap<T: ?Sized>(Map<HashType, ()>, PhantomData<Map<*const T, ()>>);
unsafe impl<T: ?Sized + Send> Send for SmallRefMap<T>{}
unsafe impl<T: ?Sized + Send + Sync> Sync for SmallRefMap<T>{}
impl<T: ?Sized + Hash> SmallRefMap<T>
{
/// Creates a new empty `SmallRefMap`.
#[inline] pub fn new() -> Self
{
Self(Map::new(), PhantomData)
}
/// Consume into the inner `Map`.
pub fn into_inner(self) -> Map<HashType, ()>
{
self.0
}
/// Create a new `SmallRefMap` with this number of pages preallocated.
#[inline] pub fn with_capacity(pages: usize) -> Self
{
Self(Map::with_capacity(pages), PhantomData)
}
/// Insert a reference into the set. The reference can be any type that borrows to `T`.
///
/// Returns `true` if there was no previous item, `false` if there was.
pub fn insert<Q>(&mut self, value: &Q) -> bool
where Q: ?Sized + Borrow<T>
{
self.0.insert(compute_hash_for(value.borrow()), ()).is_none()
}
/// Remove a reference from the set.
///
/// Returns `true` if it existed.
pub fn remove<Q>(&mut self, value: &Q) -> bool
where Q: ?Sized + Borrow<T>
{
self.0.remove(&compute_hash_for(value.borrow())).is_some()
}
/// Check if this value has been inserted into the set.
pub fn contains<Q>(&mut self, value: &Q) -> bool
where Q: ?Sized + Borrow<T>
{
self.0.contains_key(&compute_hash_for(value.borrow()))
}
/// The number of items stored in the set
pub fn len(&self) -> usize
{
self.0.len()
}
/// Is the set empty
pub fn is_empty(&self) -> bool
{
self.0.iter().next().is_none()
}
/// An iterator over the hashes stored in the set.
pub fn hashes_iter(&self) -> Iter<'_, HashType>
{
Iter(self.0.iter())
}
#[inline] fn into_hashes_iter(self) -> IntoIter<HashType>
{
IntoIter(self.0.into_iter())
}
}
/// An iterator over the references to the hashes in `SmallRefMap`.
pub struct Iter<'a, T>(smallmap::iter::Iter<'a, T, ()>);
/// An iterator of the hashes inserted into `SmallRefMap`.
pub struct IntoIter<T>(smallmap::iter::IntoIter<T, ()>);
impl<'a, T: Collapse> Iterator for Iter<'a, T>
{
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item>
{
self.0.next().map(|x| &x.0)
}
fn size_hint(&self) -> (usize, Option<usize>)
{
self.0.size_hint()
}
}
impl<'a, T: Collapse> std::iter::FusedIterator for Iter<'a,T>{}
impl<'a, T: Collapse> Iterator for IntoIter<T>
{
type Item = T;
fn next(&mut self) -> Option<Self::Item>
{
self.0.next().map(|x| x.0)
}
fn size_hint(&self) -> (usize, Option<usize>)
{
self.0.size_hint()
}
}
impl<T: Collapse> std::iter::FusedIterator for IntoIter<T>{}
impl<T: Hash> IntoIterator for SmallRefMap<T>
{
type Item= HashType;
type IntoIter = IntoIter<HashType>;
fn into_iter(self) -> Self::IntoIter
{
self.into_hashes_iter()
}
}
#[cfg(test)]
mod tests
{
use super::*;
#[test]
fn insert()
{
let mut refset = SmallRefMap::new();
let values= vec![
"hi",
"hello",
"one",
"two",
];
for &string in values.iter()
{
refset.insert(string);
}
for string in values
{
assert!(refset.contains(string));
}
assert!(refset.insert("none"));
assert!(!refset.insert("two"));
}
#[cfg(nightly)]
mod benchmarks
{
use test::{black_box, Bencher};
const STRINGS: &str = "leo vel fringilla est ullamcorper eget nulla facilisi etiam dignissim diam quis enim lobortis scelerisque fermentum dui faucibus in ornare quam viverra orci sagittis eu volutpat odio facilisis mauris sit amet massa vitae tortor condimentum lacinia quis vel eros donec ac odio tempor orci dapibus ultrices in iaculis nunc sed augue lacus viverra vitae congue eu consequat ac felis donec et odio pellentesque diam volutpat commodo sed egestas egestas fringilla phasellus faucibus scelerisque eleifend donec pretium vulputate sapien nec sagittis aliquam malesuada bibendum arcu vitae elementum curabitur vitae nunc sed velit dignissim sodales ut eu sem integer vitae justo eget";
const INTS: &[u32] = &[182,248,69,225,164,219,73,122,14,205,148,221,24,107,209,83,210,87,148,249,234,181,217,154,180,240,132,145,208,15,77,4,117,16,43,1,95,49,150,18,207,161,107,216,215,100,76,198,43,21,99,177,77,28,29,172,117,136,151,96,66,208,244,138,90];
#[bench] fn non_owning_strings(b: &mut Bencher)
{
let strings: Vec<String> = STRINGS.split(char::is_whitespace).map(ToOwned::to_owned).collect();
let mut map = super::SmallRefMap::new();
b.iter(|| {
for string in strings.iter() {
black_box(map.insert(string.as_str()));
}
})
}
#[bench] fn owning_strings(b: &mut Bencher)
{
let strings: Vec<String> = STRINGS.split(char::is_whitespace).map(ToOwned::to_owned).collect();
let mut map = smallmap::Map::new();
b.iter(|| {
for string in strings.iter() {
black_box(map.insert(string.clone(), ())); //clone is needed here :/
}
})
}
#[bench] fn non_owning_ints(b: &mut Bencher)
{
let mut map = super::SmallRefMap::new();
b.iter(|| {
for int in INTS.iter() {
black_box(map.insert(int));
}
})
}
#[bench] fn owning_ints(b: &mut Bencher)
{
let mut map = smallmap::Map::new();
b.iter(|| {
for int in INTS.iter() {
black_box(map.insert(int, ()));
}
})
}
}
}
Loading…
Cancel
Save