faster hasher for data set maybe

master
Avril 4 years ago
parent 2155e729d7
commit a1c9f4cd39
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -38,7 +38,7 @@ pub struct Store
{
metadata: StoreMetadata,
data: HashSet<Entry>, // The entry sha256 hash is used as the `key` here, as `Entry` both hasshes to, and `Borrow`s to `Sha256Hash`.
data: HashSet<Entry, Sha256TopBuildHasher>, // The entry sha256 hash is used as the `key` here, as `Entry` both hasshes to, and `Borrow`s to `Sha256Hash`.
data_hashes: Arena<sha256::Sha256Hash>, // used to lookup in `data`.
tag_mappings: Arena<Vec<ArenaIndex>>,
@ -70,7 +70,7 @@ impl Store
assert!(metadata.root.exists() && metadata.root.is_dir(), "Metadata root {:?} passed to `new` not existant or not a directory", metadata.root);
Self {
metadata,
data: HashSet::new(),
data: HashSet::with_hasher(Default::default()),
data_hashes: Arena::new(),
tag_mappings: Arena::new(),
@ -87,7 +87,7 @@ impl Store
Self {
metadata,
data: HashSet::with_capacity(cap),
data: HashSet::with_capacity_and_hasher(cap, Default::default()),
data_hashes: Arena::with_capacity(cap),
tag_mappings: Arena::with_capacity(cap),

@ -0,0 +1,36 @@
use super::*;
use std::hash::{BuildHasherDefault, Hasher};
use smallvec::SmallVec;
use cryptohelpers::sha256;
/// A hasher that takes the first 8 bytes from SHA256 hash as its output.
///
/// # Notes
/// Intended for use for `HashSet` with a SHA256 key.
/// Hashing anything other than a SHA256 hash with this hasher is undefined.
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Default)]
pub struct Sha256TopHasher(SmallVec<[u8; sha256::SIZE]>);
/// A `BuildHasher` for [`Sha256TopHasher`].
pub type Sha256TopBuildHasher = BuildHasherDefault<Sha256TopHasher>;
impl Sha256TopHasher
{
/// Create a new hasher
#[inline] fn new() -> Self
{
Self(SmallVec::new())
}
}
impl Hasher for Sha256TopHasher
{
#[inline] fn finish(&self) -> u64 {
let mut bytes = [0u8; std::mem::size_of::<u64>()];
crate::slice::copy_bytes(self.0.as_ref(), &mut bytes[..]);
u64::from_le_bytes(bytes)
}
#[inline] fn write(&mut self, bytes: &[u8]) {
self.0.extend_from_slice(bytes);
}
}

@ -1,16 +1,10 @@
use std::iter::FusedIterator;
use std::collections::BTreeSet;
use std::borrow::Borrow;
use futures::prelude::*;
use super::*;
/// An iterator that may be empty.
#[derive(Debug, Clone)]
pub struct MaybeIter<I, T>(Option<I>)
where I: Iterator<Item=T>;
mod streams;
pub use streams::*;
pub trait OptionIterExt<I, T>: Sized
where I: Iterator<Item=T>
{

@ -0,0 +1,13 @@
use std::iter::FusedIterator;
use std::collections::BTreeSet;
use std::borrow::Borrow;
use futures::prelude::*;
mod iters;
pub use iters::*;
mod streams;
pub use streams::*;
mod hashers;
pub use hashers::*;
Loading…
Cancel
Save