From c05d9f83049b7caa894110b2bf549b266e6797ac Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 14 Apr 2022 19:51:35 +0100 Subject: [PATCH] Initial commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added functionality. Needs testing and an API now. Fortune for phash's current commit: Blessing − 吉 --- .gitignore | 2 + Cargo.toml | 14 +++ src/encode.rs | 93 ++++++++++++++++ src/hash.rs | 120 ++++++++++++++++++++ src/lib.rs | 4 + src/pool.rs | 295 ++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 528 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/encode.rs create mode 100644 src/hash.rs create mode 100644 src/lib.rs create mode 100644 src/pool.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4fffb2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..075e3f6 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "phash" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +atomic_refcell = "0.1.8" +digest = "0.10.3" +parking_lot = { version = "0.12.0", features = ["hardware-lock-elision", "arc_lock"] } +rayon = "1.5.2" +sha2 = "0.10.2" +smallmap = "1.3.3" diff --git a/src/encode.rs b/src/encode.rs new file mode 100644 index 0000000..02aaf14 --- /dev/null +++ b/src/encode.rs @@ -0,0 +1,93 @@ +//! Encoding data +use std::fmt::{ + self, + Write, +}; +use smallmap::{ + Primitive, + Map, +}; +pub trait Encoder +{ + fn encode_byte(&self, byte: u8, f: &mut W) -> fmt::Result; + #[inline] + fn encode_slice(&self, slice: &[u8], f: &mut W) -> fmt::Result + { + slice.iter().try_for_each(|&byte| self.encode_byte(byte, f)) + } +} + +pub trait EncodeExt +{ + fn encode_with(&self) -> T; +} + +impl EncodeExt for S +where S: AsRef<[u8]> +{ + //TODO: A version for `bytes::Buf` + fn encode_with(&self) -> T + { + let mut w = T::default(); + let e = E::default(); + e.encode_slice(self.as_ref(), &mut w).expect("Failed to encode"); + w + } +} + +#[derive(Debug, Default)] +pub struct HexEncoder; + +const fn gen_hexmap() -> [[u8; 2]; 256] +{ + const MAP: &[u8; 16] = b"0123456789abcdef"; + let mut output = [[0u8; 2]; 256]; + let mut i=0u8; + loop { + output[i as usize] = [ + // top + MAP[(i >> 4) as usize], + // bottom + MAP[(i & 0xf) as usize] + ]; + if i == 255 { + break; + } else { + i += 1; + } + } + output +} + +impl HexEncoder +{ + pub const HEXMAP: [[u8; 2]; 256] = gen_hexmap(); +} + + +impl Encoder for HexEncoder +{ + #[inline(always)] + fn encode_byte(&self, byte: u8, f: &mut W) -> fmt::Result { + f.write_str(unsafe { std::str::from_utf8_unchecked(&Self::HEXMAP[byte as usize][..]) }) + } + #[inline] + fn encode_slice(&self, slice: &[u8], f: &mut W) -> fmt::Result { + use std::mem; + type ChunkTo = u128; //XXX: TODO: Benchmark if `usize` works better than `u128`. + + slice.chunks(mem::size_of::())//array_chunks::()>() + .try_for_each(|chunk| { + // Work on multiples of the pointer size, if possible. + if chunk.len() == mem::size_of::() { + // SAFETY: The length is equal to the size of `usize`. + let chunk: &[u8; mem::size_of::()] = unsafe { + &*chunk.as_ptr().cast()//::<[u8; mem::size_of::()]>() + }; + write!(f, "{:x}", ChunkTo::from_ne_bytes(*chunk)) //TODO: Test this; we might need BE or LE, idk. I think we'll need BE. + } else { + chunk.iter().try_for_each(|&byte| self.encode_byte(byte, f)) + } + }) + } +} diff --git a/src/hash.rs b/src/hash.rs new file mode 100644 index 0000000..605c02b --- /dev/null +++ b/src/hash.rs @@ -0,0 +1,120 @@ +//! Hashing +//use super::pool; +use digest::{ + Digest, + DynDigest, + Output, +}; +use std::marker::PhantomData; +use std::{ + fmt, +}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ParallelDigest(usize, PhantomData D>); + +pub struct ParallelHash(Vec>); + +impl fmt::Display for ParallelHash +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + use crate::encode::*; + let mut d = D::new(); + write!(f, "{}:", self.0.len())?; + + for output in self.0.iter() { + d.update(&output[..]); + } + HexEncoder::encode_slice(&HexEncoder, &d.finalize()[..], f) + } +} + + +impl ParallelDigest +{ + /// Create a new digest + pub const fn new(chunk: usize) -> Self + { + Self(chunk, PhantomData) + } + + /// Compute a parallel digest from `data`. + #[inline] + pub fn digest(&self, data: impl AsRef<[u8]>) -> ParallelHash + { + ParallelHash(phash_digest_into::(data.as_ref(), self.0)) + } +} + +#[inline(always)] +fn phash_raw<'a, T: Send, F: 'a>(data: &'a [u8], chunk: usize, digest: F) -> impl rayon::iter::IndexedParallelIterator + 'a +where F: Fn(&[u8]) -> T + Send + Sync +{ + use rayon::prelude::*; + + data.par_chunks(chunk).map(digest) +} + +#[inline(always)] +fn into_output<'a, T,F, U>(data: &'a [u8], chunk: usize, func: F) -> Vec +where F: FnOnce(&'a [u8], usize) -> U, + U: rayon::iter::IndexedParallelIterator + 'a +{ + let mut output = Vec::with_capacity(1 + (data.len() / chunk)); + func(data, chunk).collect_into_vec(&mut output); + output +} + + +#[inline(always)] +pub(crate) fn phash_raw_into(data: &[u8], chunk: usize, digest: F) -> Vec//Vec> +where F: Fn(&[u8]) -> T + Send + Sync +{ + use rayon::prelude::*; + let mut output = Vec::with_capacity(1 + (data.len() / chunk)); + + phash_raw(data, chunk, digest) + .collect_into_vec(&mut output); + output +} +/* +pub(crate) fn phash_pool<'a, D: Digest + digest::FixedOutputReset + Send + Sync + 'a>(data: &'a [u8], chunk: usize) -> impl rayon::iter::IndexedParallelIterator> + 'a +{ +let pool = pool::Pool::new_with(rayon::max_num_threads(), D::new); + +phash_raw(data.as_ref(), chunk, move |range| { +let mut digest = pool.take_one_or_else(D::new); +::update(&mut digest, range); +digest.finalize_reset() + }) +} + +#[inline] +pub(crate) fn phash_pool_into(data: &[u8], chunk: usize) -> Vec> +{ + into_output(data, chunk, phash_pool::) +} + */ + +pub(crate) fn phash_digest<'a, D: Digest>(data: &'a [u8], chunk: usize) -> impl rayon::iter::IndexedParallelIterator> + 'a +where Output: Send +{ + phash_raw(data, chunk, |range| { + let mut digest = D::new(); + digest.update(range); + digest.finalize() + }) +} + +pub(crate) fn phash_digest_into<'a, D: Digest>(data: &[u8], chunk: usize) -> Vec> +where Output: Send +{ + into_output(data, chunk, phash_digest::) +} + +/* +fn phash(data: impl AsRef<[u8]>, chunk: usize) -> ParallelHash +{ + +}*/ diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..f829e2a --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,4 @@ + +//mod pool; A)JR AOISJOAIJOIA JODIJAOSI JDFUCK THIS DOESN@T WORK FUCK ATOMICREFCELL FUCK YOU FFUCK EVERYTHING AAAAAAAAAAAAA +pub mod encode; +pub mod hash; diff --git a/src/pool.rs b/src/pool.rs new file mode 100644 index 0000000..7e45073 --- /dev/null +++ b/src/pool.rs @@ -0,0 +1,295 @@ +//! Pool of objects +use parking_lot::{ + RwLock, + Mutex, +}; +use atomic_refcell::{ + AtomicRefCell, + AtomicRefMut, +}; +use std::{ + iter, + ops, + mem::{ ManuallyDrop, self }, + borrow::{ + Borrow, + BorrowMut, + }, +}; + +#[derive(Debug)] +pub struct Pool(RwLock>>); + +#[derive(Debug)] +pub enum PoolRef<'a, T> +{ + Inside(AtomicRefMut<'a, T>), + Outside(DeferredRef<'a, T>), +} + +impl<'o, T: Send + Sync> PoolRef<'o, T> +{ + #[inline] + pub fn as_ref(&self) -> &T + { + self.borrow() + } + #[inline] + pub fn as_mut(&mut self) -> &mut T + { + self.borrow_mut() + } + /// Force insert into pool, then return the mutable ref guard. + pub fn force(self) -> AtomicRefMut<'o, T> + { + match self { + Self::Inside(s) => s, + Self::Outside(deferred) => { + let pool = deferred.1; + let value = AtomicRefCell::new(deferred.detach()); + let mut writer = pool.0.write(); + let len = writer.len(); + writer.push(value); + parking_lot::lock_api::RwLockWriteGuard::downgrade(writer)[len].borrow_mut() + }, + } + } + pub fn forget(self) + { + if let Self::Outside(v) = self { + v.forget() + } + } + pub fn try_detach(self) -> Result> + { + match self { + Self::Inside(v) => Err(v), + Self::Outside(v) => Ok(v.detach()) + } + } + pub fn replace_with(&mut self, with: F) -> T + where F: FnOnce() -> T + { + mem::replace(self.borrow_mut(), with()) + } + #[inline] + pub fn replace(&mut self, val: T) -> T + { + self.replace_with(move || val) + } +} +impl<'o, T: Send + Sync + Default> PoolRef<'o, T> +{ + #[inline(always)] + pub fn take(&mut self) -> T + { + self.replace_with(T::default) + } + + #[inline] + pub fn into_inner(mut self) -> T + { + self.take() + } +} + + +impl<'o, T> Borrow for PoolRef<'o, T> +{ + #[inline] + fn borrow(&self) -> &T { + ops::Deref::deref(self) + } +} + +impl<'o, T> BorrowMut for PoolRef<'o, T> +{ + #[inline] + fn borrow_mut(&mut self) -> &mut T { + &mut self + } +} + + +impl<'o, T> ops::Deref for PoolRef<'o, T> +{ + type Target = T; + #[inline] + fn deref(&self) -> &Self::Target { + match self { + Self::Inside(s) => s.deref(), + Self::Outside(s) => s.deref(), + } + } +} + +impl<'o, T> ops::DerefMut for PoolRef<'o, T> +{ + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + match self { + Self::Inside(s) => s.deref_mut(), + Self::Outside(s) => s.deref_mut(), + } + } +} + +#[derive(Debug)] +pub struct DeferredRef<'owner, T>(ManuallyDrop, &'owner Pool); + + +impl<'o, T: Send + Sync> DeferredRef<'o, T> +{ + #[inline(always)] + pub fn detach(mut self) -> T + { + let v= unsafe { + ManuallyDrop::take(&mut self.0) + }; + mem::forget(self); + v + } + #[inline(always)] + pub fn forget(mut self) + { + unsafe { + ManuallyDrop::drop(&mut self.0); + } + mem::forget(self); + } +} + +impl<'o, T> ops::Drop for DeferredRef<'o, T> +{ + fn drop(&mut self) { + self.1.0.write().push(AtomicRefCell::new( unsafe { ManuallyDrop::take(&mut self.0) } )); + } +} + +impl<'o, T> ops::Deref for DeferredRef<'o, T> +{ + type Target = T; + #[inline(always)] + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +impl<'o, T> ops::DerefMut for DeferredRef<'o, T> +{ + #[inline(always)] + fn deref_mut(&mut self) -> &mut Self::Target { + self.0.deref_mut() + } +} + +impl Pool +{ + pub fn new(size: usize) -> Self + { + Self(RwLock::new(iter::repeat_with(T::default).map(AtomicRefCell::new).take(size).collect())) + } + + #[inline] + pub fn take_one(&self) -> PoolRef<'_, T> + { + self.take_one_or_else(T::default) + } + #[inline] + pub fn take_one_or_default(&self) -> AtomicRefMut<'_, T> + { + self.take_one_or_insert_with(T::default) + } +} + +impl iter::FromIterator for Pool +{ + fn from_iter>(iter: I) -> Self + { + Self(RwLock::new(iter.into_iter().map(AtomicRefCell::new).collect())) + } +} + + +impl Pool +{ + #[inline] + pub fn take_one_or(&self, value: T) -> PoolRef<'_, T> + { + self.take_one_or_else(move || value) + } + pub fn try_take_one(&self) -> Option> + { + for possible in self.0.read().iter() { + possible.try_borrow_mut().ok()?; + } + None + } + + pub fn take_one_or_else(&self, new: F) -> PoolRef<'_, T> + where F: FnOnce() -> T + { + let try_borrow = { + let this = &self; + move || -> Option> { + for possible in this.0.read().iter() { + possible.try_borrow_mut().ok()?; + } + None + } + }; + if let Some(v) = try_borrow() { + PoolRef::Inside(v) + } else { + PoolRef::Outside(DeferredRef(ManuallyDrop::new(new()), self)) + } + } + + #[inline] + pub fn new_with_value(size: usize, value: T) -> Self + where T: Clone + { + iter::repeat(value).take(size).collect() + } + pub fn new_with(size: usize, with: F) -> Self + where F: FnMut() -> T + { + Self(RwLock::new(iter::repeat_with(with).map(AtomicRefCell::new).take(size).collect())) + } + + #[inline] + pub const fn new_empty() -> Self + { + Self(parking_lot::const_rwlock(Vec::new())) + } + + pub fn take_one_or_insert_with(&self, with: F) -> AtomicRefMut<'_, T> + where F: FnOnce() -> T + { + let try_borrow = { + let this = &self; + move || -> Result, parking_lot::RwLockWriteGuard<_>> { + let reader = this.0.upgradable_read(); + for possible in reader.iter() { + if let Ok(val) = possible.try_borrow_mut() { + return Ok(val); + } + } + Err(parking_lot::lock_api::RwLockUpgradableReadGuard::upgrade(reader)) + } + }; + match try_borrow() { + Ok(val) => val, + Err(writer) => { + let l = writer.len(); + writer.push(AtomicRefCell::new(with())); + parking_lot::lock_api::RwLockWriteGuard::downgrade(writer)[l].borrow_mut() + }, + } + } + #[inline] + pub fn take_one_or_insert_value(&self, value: T) -> AtomicRefMut<'_, T> + { + self.take_one_or_insert_with(move || value) + } +}