Added functionality. Needs testing and an API now. Fortune for phash's current commit: Blessing − 吉master
commit
c05d9f8304
6 changed files with 528 additions and 0 deletions
@ -0,0 +1,2 @@ |
||||
/target |
||||
/Cargo.lock |
@ -0,0 +1,14 @@ |
||||
[package] |
||||
name = "phash" |
||||
version = "0.1.0" |
||||
edition = "2021" |
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html |
||||
|
||||
[dependencies] |
||||
atomic_refcell = "0.1.8" |
||||
digest = "0.10.3" |
||||
parking_lot = { version = "0.12.0", features = ["hardware-lock-elision", "arc_lock"] } |
||||
rayon = "1.5.2" |
||||
sha2 = "0.10.2" |
||||
smallmap = "1.3.3" |
@ -0,0 +1,93 @@ |
||||
//! Encoding data
|
||||
use std::fmt::{ |
||||
self, |
||||
Write, |
||||
}; |
||||
use smallmap::{ |
||||
Primitive, |
||||
Map, |
||||
}; |
||||
pub trait Encoder |
||||
{ |
||||
fn encode_byte<W: Write + ?Sized>(&self, byte: u8, f: &mut W) -> fmt::Result; |
||||
#[inline]
|
||||
fn encode_slice<W: Write + ?Sized>(&self, slice: &[u8], f: &mut W) -> fmt::Result |
||||
{ |
||||
slice.iter().try_for_each(|&byte| self.encode_byte(byte, f)) |
||||
} |
||||
} |
||||
|
||||
pub trait EncodeExt |
||||
{ |
||||
fn encode_with<E: Encoder + Default, T: Write + Default>(&self) -> T; |
||||
} |
||||
|
||||
impl<S: ?Sized> EncodeExt for S |
||||
where S: AsRef<[u8]> |
||||
{ |
||||
//TODO: A version for `bytes::Buf`
|
||||
fn encode_with<E: Encoder + Default, T: Write + Default>(&self) -> T |
||||
{ |
||||
let mut w = T::default(); |
||||
let e = E::default(); |
||||
e.encode_slice(self.as_ref(), &mut w).expect("Failed to encode"); |
||||
w |
||||
} |
||||
} |
||||
|
||||
#[derive(Debug, Default)] |
||||
pub struct HexEncoder; |
||||
|
||||
const fn gen_hexmap() -> [[u8; 2]; 256] |
||||
{ |
||||
const MAP: &[u8; 16] = b"0123456789abcdef"; |
||||
let mut output = [[0u8; 2]; 256]; |
||||
let mut i=0u8; |
||||
loop { |
||||
output[i as usize] = [ |
||||
// top
|
||||
MAP[(i >> 4) as usize], |
||||
// bottom
|
||||
MAP[(i & 0xf) as usize] |
||||
]; |
||||
if i == 255 { |
||||
break; |
||||
} else { |
||||
i += 1; |
||||
} |
||||
} |
||||
output |
||||
} |
||||
|
||||
impl HexEncoder |
||||
{ |
||||
pub const HEXMAP: [[u8; 2]; 256] = gen_hexmap(); |
||||
} |
||||
|
||||
|
||||
impl Encoder for HexEncoder |
||||
{ |
||||
#[inline(always)]
|
||||
fn encode_byte<W: Write + ?Sized>(&self, byte: u8, f: &mut W) -> fmt::Result { |
||||
f.write_str(unsafe { std::str::from_utf8_unchecked(&Self::HEXMAP[byte as usize][..]) }) |
||||
} |
||||
#[inline]
|
||||
fn encode_slice<W: Write + ?Sized>(&self, slice: &[u8], f: &mut W) -> fmt::Result { |
||||
use std::mem; |
||||
type ChunkTo = u128; //XXX: TODO: Benchmark if `usize` works better than `u128`.
|
||||
|
||||
slice.chunks(mem::size_of::<ChunkTo>())//array_chunks::<mem::size_of::<usize>()>()
|
||||
.try_for_each(|chunk| { |
||||
// Work on multiples of the pointer size, if possible.
|
||||
if chunk.len() == mem::size_of::<ChunkTo>() { |
||||
// SAFETY: The length is equal to the size of `usize`.
|
||||
let chunk: &[u8; mem::size_of::<ChunkTo>()] = unsafe { |
||||
&*chunk.as_ptr().cast()//::<[u8; mem::size_of::<usize>()]>()
|
||||
}; |
||||
write!(f, "{:x}", ChunkTo::from_ne_bytes(*chunk)) //TODO: Test this; we might need BE or LE, idk. I think we'll need BE.
|
||||
} else { |
||||
chunk.iter().try_for_each(|&byte| self.encode_byte(byte, f)) |
||||
} |
||||
}) |
||||
} |
||||
} |
@ -0,0 +1,120 @@ |
||||
//! Hashing
|
||||
//use super::pool;
|
||||
use digest::{ |
||||
Digest, |
||||
DynDigest, |
||||
Output, |
||||
}; |
||||
use std::marker::PhantomData; |
||||
use std::{ |
||||
fmt, |
||||
}; |
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)] |
||||
pub struct ParallelDigest<D: Digest + Send + Sync>(usize, PhantomData<fn () -> D>); |
||||
|
||||
pub struct ParallelHash<D: Digest>(Vec<Output<D>>); |
||||
|
||||
impl<D: Digest> fmt::Display for ParallelHash<D> |
||||
{ |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result |
||||
{ |
||||
use crate::encode::*; |
||||
let mut d = D::new(); |
||||
write!(f, "{}:", self.0.len())?; |
||||
|
||||
for output in self.0.iter() { |
||||
d.update(&output[..]); |
||||
} |
||||
HexEncoder::encode_slice(&HexEncoder, &d.finalize()[..], f) |
||||
} |
||||
} |
||||
|
||||
|
||||
impl<D: Digest + Send + Sync> ParallelDigest<D> |
||||
{ |
||||
/// Create a new digest
|
||||
pub const fn new(chunk: usize) -> Self |
||||
{ |
||||
Self(chunk, PhantomData) |
||||
} |
||||
|
||||
/// Compute a parallel digest from `data`.
|
||||
#[inline] |
||||
pub fn digest(&self, data: impl AsRef<[u8]>) -> ParallelHash<D> |
||||
{ |
||||
ParallelHash(phash_digest_into::<D>(data.as_ref(), self.0)) |
||||
} |
||||
} |
||||
|
||||
#[inline(always)] |
||||
fn phash_raw<'a, T: Send, F: 'a>(data: &'a [u8], chunk: usize, digest: F) -> impl rayon::iter::IndexedParallelIterator<Item = T> + 'a |
||||
where F: Fn(&[u8]) -> T + Send + Sync |
||||
{ |
||||
use rayon::prelude::*; |
||||
|
||||
data.par_chunks(chunk).map(digest) |
||||
} |
||||
|
||||
#[inline(always)] |
||||
fn into_output<'a, T,F, U>(data: &'a [u8], chunk: usize, func: F) -> Vec<T> |
||||
where F: FnOnce(&'a [u8], usize) -> U, |
||||
U: rayon::iter::IndexedParallelIterator<Item = T> + 'a |
||||
{ |
||||
let mut output = Vec::with_capacity(1 + (data.len() / chunk)); |
||||
func(data, chunk).collect_into_vec(&mut output); |
||||
output |
||||
} |
||||
|
||||
|
||||
#[inline(always)] |
||||
pub(crate) fn phash_raw_into<T: Send, F>(data: &[u8], chunk: usize, digest: F) -> Vec<T>//Vec<Output<D>>
|
||||
where F: Fn(&[u8]) -> T + Send + Sync |
||||
{ |
||||
use rayon::prelude::*; |
||||
let mut output = Vec::with_capacity(1 + (data.len() / chunk)); |
||||
|
||||
phash_raw(data, chunk, digest) |
||||
.collect_into_vec(&mut output); |
||||
output |
||||
} |
||||
/* |
||||
pub(crate) fn phash_pool<'a, D: Digest + digest::FixedOutputReset + Send + Sync + 'a>(data: &'a [u8], chunk: usize) -> impl rayon::iter::IndexedParallelIterator<Item= Output<D>> + 'a |
||||
{ |
||||
let pool = pool::Pool::new_with(rayon::max_num_threads(), D::new); |
||||
|
||||
phash_raw(data.as_ref(), chunk, move |range| { |
||||
let mut digest = pool.take_one_or_else(D::new); |
||||
<D as Digest>::update(&mut digest, range); |
||||
digest.finalize_reset() |
||||
}) |
||||
} |
||||
|
||||
#[inline]
|
||||
pub(crate) fn phash_pool_into<D: Digest + digest::FixedOutputReset + Send + Sync>(data: &[u8], chunk: usize) -> Vec<Output<D>> |
||||
{ |
||||
into_output(data, chunk, phash_pool::<D>) |
||||
} |
||||
*/ |
||||
|
||||
pub(crate) fn phash_digest<'a, D: Digest>(data: &'a [u8], chunk: usize) -> impl rayon::iter::IndexedParallelIterator<Item = Output<D>> + 'a |
||||
where Output<D>: Send |
||||
{ |
||||
phash_raw(data, chunk, |range| { |
||||
let mut digest = D::new(); |
||||
digest.update(range); |
||||
digest.finalize() |
||||
}) |
||||
} |
||||
|
||||
pub(crate) fn phash_digest_into<'a, D: Digest>(data: &[u8], chunk: usize) -> Vec<Output<D>> |
||||
where Output<D>: Send |
||||
{ |
||||
into_output(data, chunk, phash_digest::<D>) |
||||
} |
||||
|
||||
/* |
||||
fn phash<D: Digest + Send + Sync>(data: impl AsRef<[u8]>, chunk: usize) -> ParallelHash<D> |
||||
{ |
||||
|
||||
}*/ |
@ -0,0 +1,4 @@ |
||||
|
||||
//mod pool; A)JR AOISJOAIJOIA JODIJAOSI JDFUCK THIS DOESN@T WORK FUCK ATOMICREFCELL FUCK YOU FFUCK EVERYTHING AAAAAAAAAAAAA
|
||||
pub mod encode; |
||||
pub mod hash; |
@ -0,0 +1,295 @@ |
||||
//! Pool of objects
|
||||
use parking_lot::{ |
||||
RwLock, |
||||
Mutex, |
||||
}; |
||||
use atomic_refcell::{ |
||||
AtomicRefCell, |
||||
AtomicRefMut, |
||||
}; |
||||
use std::{ |
||||
iter, |
||||
ops, |
||||
mem::{ ManuallyDrop, self }, |
||||
borrow::{ |
||||
Borrow, |
||||
BorrowMut, |
||||
}, |
||||
}; |
||||
|
||||
#[derive(Debug)] |
||||
pub struct Pool<T>(RwLock<Vec<AtomicRefCell<T>>>); |
||||
|
||||
#[derive(Debug)] |
||||
pub enum PoolRef<'a, T> |
||||
{ |
||||
Inside(AtomicRefMut<'a, T>), |
||||
Outside(DeferredRef<'a, T>), |
||||
} |
||||
|
||||
impl<'o, T: Send + Sync> PoolRef<'o, T> |
||||
{ |
||||
#[inline]
|
||||
pub fn as_ref(&self) -> &T |
||||
{ |
||||
self.borrow() |
||||
} |
||||
#[inline]
|
||||
pub fn as_mut(&mut self) -> &mut T |
||||
{ |
||||
self.borrow_mut() |
||||
} |
||||
/// Force insert into pool, then return the mutable ref guard.
|
||||
pub fn force(self) -> AtomicRefMut<'o, T> |
||||
{ |
||||
match self { |
||||
Self::Inside(s) => s, |
||||
Self::Outside(deferred) => { |
||||
let pool = deferred.1; |
||||
let value = AtomicRefCell::new(deferred.detach()); |
||||
let mut writer = pool.0.write(); |
||||
let len = writer.len(); |
||||
writer.push(value); |
||||
parking_lot::lock_api::RwLockWriteGuard::downgrade(writer)[len].borrow_mut() |
||||
}, |
||||
} |
||||
} |
||||
pub fn forget(self) |
||||
{ |
||||
if let Self::Outside(v) = self { |
||||
v.forget() |
||||
} |
||||
} |
||||
pub fn try_detach(self) -> Result<T, AtomicRefMut<'o, T>> |
||||
{ |
||||
match self { |
||||
Self::Inside(v) => Err(v), |
||||
Self::Outside(v) => Ok(v.detach()) |
||||
} |
||||
} |
||||
pub fn replace_with<F>(&mut self, with: F) -> T |
||||
where F: FnOnce() -> T |
||||
{ |
||||
mem::replace(self.borrow_mut(), with()) |
||||
} |
||||
#[inline]
|
||||
pub fn replace(&mut self, val: T) -> T |
||||
{ |
||||
self.replace_with(move || val) |
||||
} |
||||
} |
||||
impl<'o, T: Send + Sync + Default> PoolRef<'o, T> |
||||
{ |
||||
#[inline(always)]
|
||||
pub fn take(&mut self) -> T |
||||
{ |
||||
self.replace_with(T::default) |
||||
} |
||||
|
||||
#[inline]
|
||||
pub fn into_inner(mut self) -> T |
||||
{ |
||||
self.take() |
||||
} |
||||
} |
||||
|
||||
|
||||
impl<'o, T> Borrow<T> for PoolRef<'o, T> |
||||
{ |
||||
#[inline]
|
||||
fn borrow(&self) -> &T { |
||||
ops::Deref::deref(self) |
||||
} |
||||
} |
||||
|
||||
impl<'o, T> BorrowMut<T> for PoolRef<'o, T> |
||||
{ |
||||
#[inline]
|
||||
fn borrow_mut(&mut self) -> &mut T { |
||||
&mut self |
||||
} |
||||
} |
||||
|
||||
|
||||
impl<'o, T> ops::Deref for PoolRef<'o, T> |
||||
{ |
||||
type Target = T; |
||||
#[inline]
|
||||
fn deref(&self) -> &Self::Target { |
||||
match self { |
||||
Self::Inside(s) => s.deref(), |
||||
Self::Outside(s) => s.deref(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl<'o, T> ops::DerefMut for PoolRef<'o, T> |
||||
{ |
||||
#[inline]
|
||||
fn deref_mut(&mut self) -> &mut Self::Target { |
||||
match self { |
||||
Self::Inside(s) => s.deref_mut(), |
||||
Self::Outside(s) => s.deref_mut(), |
||||
}
|
||||
} |
||||
} |
||||
|
||||
#[derive(Debug)] |
||||
pub struct DeferredRef<'owner, T>(ManuallyDrop<T>, &'owner Pool<T>); |
||||
|
||||
|
||||
impl<'o, T: Send + Sync> DeferredRef<'o, T> |
||||
{ |
||||
#[inline(always)]
|
||||
pub fn detach(mut self) -> T |
||||
{ |
||||
let v= unsafe { |
||||
ManuallyDrop::take(&mut self.0) |
||||
}; |
||||
mem::forget(self); |
||||
v |
||||
} |
||||
#[inline(always)]
|
||||
pub fn forget(mut self) |
||||
{ |
||||
unsafe { |
||||
ManuallyDrop::drop(&mut self.0); |
||||
} |
||||
mem::forget(self); |
||||
} |
||||
} |
||||
|
||||
impl<'o, T> ops::Drop for DeferredRef<'o, T> |
||||
{ |
||||
fn drop(&mut self) { |
||||
self.1.0.write().push(AtomicRefCell::new( unsafe { ManuallyDrop::take(&mut self.0) } )); |
||||
} |
||||
} |
||||
|
||||
impl<'o, T> ops::Deref for DeferredRef<'o, T> |
||||
{ |
||||
type Target = T; |
||||
#[inline(always)]
|
||||
fn deref(&self) -> &Self::Target { |
||||
self.0.deref() |
||||
} |
||||
} |
||||
|
||||
impl<'o, T> ops::DerefMut for DeferredRef<'o, T> |
||||
{ |
||||
#[inline(always)]
|
||||
fn deref_mut(&mut self) -> &mut Self::Target { |
||||
self.0.deref_mut() |
||||
} |
||||
} |
||||
|
||||
impl<T: Send+ Sync + Default> Pool<T> |
||||
{ |
||||
pub fn new(size: usize) -> Self |
||||
{ |
||||
Self(RwLock::new(iter::repeat_with(T::default).map(AtomicRefCell::new).take(size).collect())) |
||||
} |
||||
|
||||
#[inline]
|
||||
pub fn take_one(&self) -> PoolRef<'_, T> |
||||
{ |
||||
self.take_one_or_else(T::default) |
||||
} |
||||
#[inline]
|
||||
pub fn take_one_or_default(&self) -> AtomicRefMut<'_, T> |
||||
{ |
||||
self.take_one_or_insert_with(T::default) |
||||
} |
||||
} |
||||
|
||||
impl<T: Send+Sync> iter::FromIterator<T> for Pool<T> |
||||
{ |
||||
fn from_iter<I: IntoIterator<Item=T>>(iter: I) -> Self |
||||
{ |
||||
Self(RwLock::new(iter.into_iter().map(AtomicRefCell::new).collect())) |
||||
} |
||||
} |
||||
|
||||
|
||||
impl<T: Send+Sync> Pool<T> |
||||
{ |
||||
#[inline]
|
||||
pub fn take_one_or(&self, value: T) -> PoolRef<'_, T> |
||||
{ |
||||
self.take_one_or_else(move || value) |
||||
} |
||||
pub fn try_take_one(&self) -> Option<AtomicRefMut<'_, T>> |
||||
{ |
||||
for possible in self.0.read().iter() { |
||||
possible.try_borrow_mut().ok()?; |
||||
} |
||||
None
|
||||
} |
||||
|
||||
pub fn take_one_or_else<F>(&self, new: F) -> PoolRef<'_, T> |
||||
where F: FnOnce() -> T |
||||
{ |
||||
let try_borrow = { |
||||
let this = &self; |
||||
move || -> Option<AtomicRefMut<T>> { |
||||
for possible in this.0.read().iter() { |
||||
possible.try_borrow_mut().ok()?; |
||||
} |
||||
None |
||||
} |
||||
}; |
||||
if let Some(v) = try_borrow() { |
||||
PoolRef::Inside(v) |
||||
} else { |
||||
PoolRef::Outside(DeferredRef(ManuallyDrop::new(new()), self)) |
||||
} |
||||
} |
||||
|
||||
#[inline]
|
||||
pub fn new_with_value(size: usize, value: T) -> Self |
||||
where T: Clone |
||||
{ |
||||
iter::repeat(value).take(size).collect() |
||||
} |
||||
pub fn new_with<F>(size: usize, with: F) -> Self |
||||
where F: FnMut() -> T |
||||
{ |
||||
Self(RwLock::new(iter::repeat_with(with).map(AtomicRefCell::new).take(size).collect())) |
||||
} |
||||
|
||||
#[inline]
|
||||
pub const fn new_empty() -> Self |
||||
{ |
||||
Self(parking_lot::const_rwlock(Vec::new())) |
||||
} |
||||
|
||||
pub fn take_one_or_insert_with<F>(&self, with: F) -> AtomicRefMut<'_, T> |
||||
where F: FnOnce() -> T |
||||
{ |
||||
let try_borrow = { |
||||
let this = &self; |
||||
move || -> Result<AtomicRefMut<T>, parking_lot::RwLockWriteGuard<_>> { |
||||
let reader = this.0.upgradable_read(); |
||||
for possible in reader.iter() { |
||||
if let Ok(val) = possible.try_borrow_mut() { |
||||
return Ok(val); |
||||
} |
||||
} |
||||
Err(parking_lot::lock_api::RwLockUpgradableReadGuard::upgrade(reader)) |
||||
} |
||||
}; |
||||
match try_borrow() { |
||||
Ok(val) => val, |
||||
Err(writer) => { |
||||
let l = writer.len(); |
||||
writer.push(AtomicRefCell::new(with())); |
||||
parking_lot::lock_api::RwLockWriteGuard::downgrade(writer)[l].borrow_mut() |
||||
}, |
||||
} |
||||
} |
||||
#[inline]
|
||||
pub fn take_one_or_insert_value(&self, value: T) -> AtomicRefMut<'_, T> |
||||
{ |
||||
self.take_one_or_insert_with(move || value) |
||||
} |
||||
} |
Loading…
Reference in new issue