|
|
@ -3,20 +3,63 @@ use std::{
|
|
|
|
marker::PhantomData,
|
|
|
|
marker::PhantomData,
|
|
|
|
hash::{
|
|
|
|
hash::{
|
|
|
|
Hash,
|
|
|
|
Hash,
|
|
|
|
Hasher,
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
iter::{
|
|
|
|
|
|
|
|
self,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
|
|
|
iter,
|
|
|
|
};
|
|
|
|
};
|
|
|
|
use smallmap::Map;
|
|
|
|
use smallmap::Map;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(feature="low-prec-arg-dedup"))]
|
|
|
|
|
|
|
|
mod paranoid
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
use sha2::{Digest, Sha256};
|
|
|
|
|
|
|
|
use std::hash::Hasher;
|
|
|
|
|
|
|
|
use crate::util::bytes;
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
|
|
|
|
|
|
|
pub struct Sha256Hash([u8; 32]);
|
|
|
|
|
|
|
|
pub struct Sha256Hasher(Sha256);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
impl Hasher for Sha256Hasher
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
fn write(&mut self, bytes: &[u8]) {
|
|
|
|
|
|
|
|
self.0.update(bytes);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
fn finish(&self) -> u64 {
|
|
|
|
|
|
|
|
let mut out = [0u8; std::mem::size_of::<u64>()];
|
|
|
|
|
|
|
|
let def = self.0.clone().finalize();
|
|
|
|
|
|
|
|
bytes::copy_slice(&mut out[..], &def[..]);
|
|
|
|
|
|
|
|
u64::from_le_bytes(out)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
impl Sha256Hasher
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
pub fn new() -> Self
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
Self(Sha256::new())
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn finish(self) -> Sha256Hash
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
Sha256Hash(self.0.finalize().into())
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
cfg_if::cfg_if! {
|
|
|
|
|
|
|
|
if #[cfg(feature="low-prec-arg-dedup")] {
|
|
|
|
|
|
|
|
use std::hash::Hasher;
|
|
|
|
|
|
|
|
type HashType = u64;
|
|
|
|
|
|
|
|
type DefaultHasher = std::collections::hash_map::DefaultHasher;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
type HashType = paranoid::Sha256Hash;
|
|
|
|
|
|
|
|
type DefaultHasher = paranoid::Sha256Hasher;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//TODO: Use SHA256 or 512 when not using feature flag `low-prec-arg-dedup`.
|
|
|
|
//TODO: Use SHA256 or 512 when not using feature flag `low-prec-arg-dedup`.
|
|
|
|
//This will produce more false-positives as it it now.
|
|
|
|
//This will produce more false-positives as it it now.
|
|
|
|
fn compute_hash_single<T: Hash>(value: &T) -> u64
|
|
|
|
fn compute_hash_single<T: Hash>(value: &T) -> HashType
|
|
|
|
{
|
|
|
|
{
|
|
|
|
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
|
|
|
let mut hasher = DefaultHasher::new();
|
|
|
|
value.hash(&mut hasher);
|
|
|
|
value.hash(&mut hasher);
|
|
|
|
hasher.finish()
|
|
|
|
hasher.finish()
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -27,7 +70,7 @@ pub struct DedupIter<I, T>
|
|
|
|
where T: Hash,
|
|
|
|
where T: Hash,
|
|
|
|
{
|
|
|
|
{
|
|
|
|
iter: I,
|
|
|
|
iter: I,
|
|
|
|
hashes: Map<u64, ()>,
|
|
|
|
hashes: Map<HashType, ()>,
|
|
|
|
_output: PhantomData<Map<T, ()>>,
|
|
|
|
_output: PhantomData<Map<T, ()>>,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|