You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
genmarkov/src/dedup.rs

95 lines
2.0 KiB

//! De-duplicating inputs
use super::*;
use std::{
hash::{Hash, Hasher},
collections::{HashSet, hash_set},
marker::PhantomData,
borrow::Borrow,
};
use sha2::{Sha256, Digest};
use cryptohelpers::sha256::Sha256Hash;
fn compute<T: Hash>(thing: &T) -> Sha256Hash
{
use std::mem::size_of;
struct Sha256Hasher(Sha256);
impl Hasher for Sha256Hasher
{
fn write(&mut self, bytes: &[u8])
{
self.0.update(bytes);
}
fn finish(&self) -> u64
{
let ar = self.0.clone().finalize();
let mut rest = [0u8; size_of::<u64>()];
bytes::copy_slice(&mut rest[..], &ar[..]);
u64::from_le_bytes(rest)
}
}
let mut hasher = Sha256Hasher(Sha256::new());
thing.hash(&mut hasher);
hasher.0.into()
}
/// A HashSet that doesn't own its items.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HashRefSet<T: Hash + ?Sized>(HashSet<Sha256Hash>, PhantomData<HashSet<*const T>>);
unsafe impl<T: Send + Hash + ?Sized> Send for HashRefSet<T>{}
unsafe impl<T: Send + Sync + Hash + ?Sized> Sync for HashRefSet<T>{}
impl<T: Hash + ?Sized> HashRefSet<T>
{
pub fn new() -> Self
{
Self(HashSet::new(), PhantomData)
}
pub fn with_capacity(cap: usize) -> Self
{
Self(HashSet::with_capacity(cap), PhantomData)
}
}
impl<T: Hash + ?Sized> util::NewCapacity for HashRefSet<T>
{
fn new() -> Self
{
Self::new()
}
fn with_capacity(cap: usize) -> Self
{
Self::with_capacity(cap)
}
}
impl<T: Hash> HashRefSet<T>
{
pub fn insert<U>(&mut self, value: &U) -> bool
where U: Borrow<T>
{
self.0.insert(compute(value.borrow()))
}
pub fn remove<U>(&mut self, value: &U) -> bool
where U: Borrow<T>
{
self.0.remove(&compute(value.borrow()))
}
pub fn contains<U>(&self, value: &U) -> bool
where U: Borrow<T>
{
self.0.contains(&compute(value.borrow()))
}
pub fn len(&self) -> usize
{
self.0.len()
}
pub fn hashes(&self) -> hash_set::Iter<'_, Sha256Hash>
{
self.0.iter()
}
}