Added functionality. Needs testing and an API now. Fortune for phash's current commit: Blessing − 吉master
commit
c05d9f8304
@ -0,0 +1,2 @@
|
||||
/target
|
||||
/Cargo.lock
|
@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "phash"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
atomic_refcell = "0.1.8"
|
||||
digest = "0.10.3"
|
||||
parking_lot = { version = "0.12.0", features = ["hardware-lock-elision", "arc_lock"] }
|
||||
rayon = "1.5.2"
|
||||
sha2 = "0.10.2"
|
||||
smallmap = "1.3.3"
|
@ -0,0 +1,93 @@
|
||||
//! Encoding data
|
||||
use std::fmt::{
|
||||
self,
|
||||
Write,
|
||||
};
|
||||
use smallmap::{
|
||||
Primitive,
|
||||
Map,
|
||||
};
|
||||
pub trait Encoder
|
||||
{
|
||||
fn encode_byte<W: Write + ?Sized>(&self, byte: u8, f: &mut W) -> fmt::Result;
|
||||
#[inline]
|
||||
fn encode_slice<W: Write + ?Sized>(&self, slice: &[u8], f: &mut W) -> fmt::Result
|
||||
{
|
||||
slice.iter().try_for_each(|&byte| self.encode_byte(byte, f))
|
||||
}
|
||||
}
|
||||
|
||||
pub trait EncodeExt
|
||||
{
|
||||
fn encode_with<E: Encoder + Default, T: Write + Default>(&self) -> T;
|
||||
}
|
||||
|
||||
impl<S: ?Sized> EncodeExt for S
|
||||
where S: AsRef<[u8]>
|
||||
{
|
||||
//TODO: A version for `bytes::Buf`
|
||||
fn encode_with<E: Encoder + Default, T: Write + Default>(&self) -> T
|
||||
{
|
||||
let mut w = T::default();
|
||||
let e = E::default();
|
||||
e.encode_slice(self.as_ref(), &mut w).expect("Failed to encode");
|
||||
w
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct HexEncoder;
|
||||
|
||||
const fn gen_hexmap() -> [[u8; 2]; 256]
|
||||
{
|
||||
const MAP: &[u8; 16] = b"0123456789abcdef";
|
||||
let mut output = [[0u8; 2]; 256];
|
||||
let mut i=0u8;
|
||||
loop {
|
||||
output[i as usize] = [
|
||||
// top
|
||||
MAP[(i >> 4) as usize],
|
||||
// bottom
|
||||
MAP[(i & 0xf) as usize]
|
||||
];
|
||||
if i == 255 {
|
||||
break;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
impl HexEncoder
|
||||
{
|
||||
pub const HEXMAP: [[u8; 2]; 256] = gen_hexmap();
|
||||
}
|
||||
|
||||
|
||||
impl Encoder for HexEncoder
|
||||
{
|
||||
#[inline(always)]
|
||||
fn encode_byte<W: Write + ?Sized>(&self, byte: u8, f: &mut W) -> fmt::Result {
|
||||
f.write_str(unsafe { std::str::from_utf8_unchecked(&Self::HEXMAP[byte as usize][..]) })
|
||||
}
|
||||
#[inline]
|
||||
fn encode_slice<W: Write + ?Sized>(&self, slice: &[u8], f: &mut W) -> fmt::Result {
|
||||
use std::mem;
|
||||
type ChunkTo = u128; //XXX: TODO: Benchmark if `usize` works better than `u128`.
|
||||
|
||||
slice.chunks(mem::size_of::<ChunkTo>())//array_chunks::<mem::size_of::<usize>()>()
|
||||
.try_for_each(|chunk| {
|
||||
// Work on multiples of the pointer size, if possible.
|
||||
if chunk.len() == mem::size_of::<ChunkTo>() {
|
||||
// SAFETY: The length is equal to the size of `usize`.
|
||||
let chunk: &[u8; mem::size_of::<ChunkTo>()] = unsafe {
|
||||
&*chunk.as_ptr().cast()//::<[u8; mem::size_of::<usize>()]>()
|
||||
};
|
||||
write!(f, "{:x}", ChunkTo::from_ne_bytes(*chunk)) //TODO: Test this; we might need BE or LE, idk. I think we'll need BE.
|
||||
} else {
|
||||
chunk.iter().try_for_each(|&byte| self.encode_byte(byte, f))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@ -0,0 +1,120 @@
|
||||
//! Hashing
|
||||
//use super::pool;
|
||||
use digest::{
|
||||
Digest,
|
||||
DynDigest,
|
||||
Output,
|
||||
};
|
||||
use std::marker::PhantomData;
|
||||
use std::{
|
||||
fmt,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ParallelDigest<D: Digest + Send + Sync>(usize, PhantomData<fn () -> D>);
|
||||
|
||||
pub struct ParallelHash<D: Digest>(Vec<Output<D>>);
|
||||
|
||||
impl<D: Digest> fmt::Display for ParallelHash<D>
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
use crate::encode::*;
|
||||
let mut d = D::new();
|
||||
write!(f, "{}:", self.0.len())?;
|
||||
|
||||
for output in self.0.iter() {
|
||||
d.update(&output[..]);
|
||||
}
|
||||
HexEncoder::encode_slice(&HexEncoder, &d.finalize()[..], f)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<D: Digest + Send + Sync> ParallelDigest<D>
|
||||
{
|
||||
/// Create a new digest
|
||||
pub const fn new(chunk: usize) -> Self
|
||||
{
|
||||
Self(chunk, PhantomData)
|
||||
}
|
||||
|
||||
/// Compute a parallel digest from `data`.
|
||||
#[inline]
|
||||
pub fn digest(&self, data: impl AsRef<[u8]>) -> ParallelHash<D>
|
||||
{
|
||||
ParallelHash(phash_digest_into::<D>(data.as_ref(), self.0))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn phash_raw<'a, T: Send, F: 'a>(data: &'a [u8], chunk: usize, digest: F) -> impl rayon::iter::IndexedParallelIterator<Item = T> + 'a
|
||||
where F: Fn(&[u8]) -> T + Send + Sync
|
||||
{
|
||||
use rayon::prelude::*;
|
||||
|
||||
data.par_chunks(chunk).map(digest)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn into_output<'a, T,F, U>(data: &'a [u8], chunk: usize, func: F) -> Vec<T>
|
||||
where F: FnOnce(&'a [u8], usize) -> U,
|
||||
U: rayon::iter::IndexedParallelIterator<Item = T> + 'a
|
||||
{
|
||||
let mut output = Vec::with_capacity(1 + (data.len() / chunk));
|
||||
func(data, chunk).collect_into_vec(&mut output);
|
||||
output
|
||||
}
|
||||
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn phash_raw_into<T: Send, F>(data: &[u8], chunk: usize, digest: F) -> Vec<T>//Vec<Output<D>>
|
||||
where F: Fn(&[u8]) -> T + Send + Sync
|
||||
{
|
||||
use rayon::prelude::*;
|
||||
let mut output = Vec::with_capacity(1 + (data.len() / chunk));
|
||||
|
||||
phash_raw(data, chunk, digest)
|
||||
.collect_into_vec(&mut output);
|
||||
output
|
||||
}
|
||||
/*
|
||||
pub(crate) fn phash_pool<'a, D: Digest + digest::FixedOutputReset + Send + Sync + 'a>(data: &'a [u8], chunk: usize) -> impl rayon::iter::IndexedParallelIterator<Item= Output<D>> + 'a
|
||||
{
|
||||
let pool = pool::Pool::new_with(rayon::max_num_threads(), D::new);
|
||||
|
||||
phash_raw(data.as_ref(), chunk, move |range| {
|
||||
let mut digest = pool.take_one_or_else(D::new);
|
||||
<D as Digest>::update(&mut digest, range);
|
||||
digest.finalize_reset()
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn phash_pool_into<D: Digest + digest::FixedOutputReset + Send + Sync>(data: &[u8], chunk: usize) -> Vec<Output<D>>
|
||||
{
|
||||
into_output(data, chunk, phash_pool::<D>)
|
||||
}
|
||||
*/
|
||||
|
||||
pub(crate) fn phash_digest<'a, D: Digest>(data: &'a [u8], chunk: usize) -> impl rayon::iter::IndexedParallelIterator<Item = Output<D>> + 'a
|
||||
where Output<D>: Send
|
||||
{
|
||||
phash_raw(data, chunk, |range| {
|
||||
let mut digest = D::new();
|
||||
digest.update(range);
|
||||
digest.finalize()
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn phash_digest_into<'a, D: Digest>(data: &[u8], chunk: usize) -> Vec<Output<D>>
|
||||
where Output<D>: Send
|
||||
{
|
||||
into_output(data, chunk, phash_digest::<D>)
|
||||
}
|
||||
|
||||
/*
|
||||
fn phash<D: Digest + Send + Sync>(data: impl AsRef<[u8]>, chunk: usize) -> ParallelHash<D>
|
||||
{
|
||||
|
||||
}*/
|
@ -0,0 +1,4 @@
|
||||
|
||||
//mod pool; A)JR AOISJOAIJOIA JODIJAOSI JDFUCK THIS DOESN@T WORK FUCK ATOMICREFCELL FUCK YOU FFUCK EVERYTHING AAAAAAAAAAAAA
|
||||
pub mod encode;
|
||||
pub mod hash;
|
@ -0,0 +1,295 @@
|
||||
//! Pool of objects
|
||||
use parking_lot::{
|
||||
RwLock,
|
||||
Mutex,
|
||||
};
|
||||
use atomic_refcell::{
|
||||
AtomicRefCell,
|
||||
AtomicRefMut,
|
||||
};
|
||||
use std::{
|
||||
iter,
|
||||
ops,
|
||||
mem::{ ManuallyDrop, self },
|
||||
borrow::{
|
||||
Borrow,
|
||||
BorrowMut,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Pool<T>(RwLock<Vec<AtomicRefCell<T>>>);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum PoolRef<'a, T>
|
||||
{
|
||||
Inside(AtomicRefMut<'a, T>),
|
||||
Outside(DeferredRef<'a, T>),
|
||||
}
|
||||
|
||||
impl<'o, T: Send + Sync> PoolRef<'o, T>
|
||||
{
|
||||
#[inline]
|
||||
pub fn as_ref(&self) -> &T
|
||||
{
|
||||
self.borrow()
|
||||
}
|
||||
#[inline]
|
||||
pub fn as_mut(&mut self) -> &mut T
|
||||
{
|
||||
self.borrow_mut()
|
||||
}
|
||||
/// Force insert into pool, then return the mutable ref guard.
|
||||
pub fn force(self) -> AtomicRefMut<'o, T>
|
||||
{
|
||||
match self {
|
||||
Self::Inside(s) => s,
|
||||
Self::Outside(deferred) => {
|
||||
let pool = deferred.1;
|
||||
let value = AtomicRefCell::new(deferred.detach());
|
||||
let mut writer = pool.0.write();
|
||||
let len = writer.len();
|
||||
writer.push(value);
|
||||
parking_lot::lock_api::RwLockWriteGuard::downgrade(writer)[len].borrow_mut()
|
||||
},
|
||||
}
|
||||
}
|
||||
pub fn forget(self)
|
||||
{
|
||||
if let Self::Outside(v) = self {
|
||||
v.forget()
|
||||
}
|
||||
}
|
||||
pub fn try_detach(self) -> Result<T, AtomicRefMut<'o, T>>
|
||||
{
|
||||
match self {
|
||||
Self::Inside(v) => Err(v),
|
||||
Self::Outside(v) => Ok(v.detach())
|
||||
}
|
||||
}
|
||||
pub fn replace_with<F>(&mut self, with: F) -> T
|
||||
where F: FnOnce() -> T
|
||||
{
|
||||
mem::replace(self.borrow_mut(), with())
|
||||
}
|
||||
#[inline]
|
||||
pub fn replace(&mut self, val: T) -> T
|
||||
{
|
||||
self.replace_with(move || val)
|
||||
}
|
||||
}
|
||||
impl<'o, T: Send + Sync + Default> PoolRef<'o, T>
|
||||
{
|
||||
#[inline(always)]
|
||||
pub fn take(&mut self) -> T
|
||||
{
|
||||
self.replace_with(T::default)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn into_inner(mut self) -> T
|
||||
{
|
||||
self.take()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<'o, T> Borrow<T> for PoolRef<'o, T>
|
||||
{
|
||||
#[inline]
|
||||
fn borrow(&self) -> &T {
|
||||
ops::Deref::deref(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'o, T> BorrowMut<T> for PoolRef<'o, T>
|
||||
{
|
||||
#[inline]
|
||||
fn borrow_mut(&mut self) -> &mut T {
|
||||
&mut self
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<'o, T> ops::Deref for PoolRef<'o, T>
|
||||
{
|
||||
type Target = T;
|
||||
#[inline]
|
||||
fn deref(&self) -> &Self::Target {
|
||||
match self {
|
||||
Self::Inside(s) => s.deref(),
|
||||
Self::Outside(s) => s.deref(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'o, T> ops::DerefMut for PoolRef<'o, T>
|
||||
{
|
||||
#[inline]
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
match self {
|
||||
Self::Inside(s) => s.deref_mut(),
|
||||
Self::Outside(s) => s.deref_mut(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DeferredRef<'owner, T>(ManuallyDrop<T>, &'owner Pool<T>);
|
||||
|
||||
|
||||
impl<'o, T: Send + Sync> DeferredRef<'o, T>
|
||||
{
|
||||
#[inline(always)]
|
||||
pub fn detach(mut self) -> T
|
||||
{
|
||||
let v= unsafe {
|
||||
ManuallyDrop::take(&mut self.0)
|
||||
};
|
||||
mem::forget(self);
|
||||
v
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn forget(mut self)
|
||||
{
|
||||
unsafe {
|
||||
ManuallyDrop::drop(&mut self.0);
|
||||
}
|
||||
mem::forget(self);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'o, T> ops::Drop for DeferredRef<'o, T>
|
||||
{
|
||||
fn drop(&mut self) {
|
||||
self.1.0.write().push(AtomicRefCell::new( unsafe { ManuallyDrop::take(&mut self.0) } ));
|
||||
}
|
||||
}
|
||||
|
||||
impl<'o, T> ops::Deref for DeferredRef<'o, T>
|
||||
{
|
||||
type Target = T;
|
||||
#[inline(always)]
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.0.deref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'o, T> ops::DerefMut for DeferredRef<'o, T>
|
||||
{
|
||||
#[inline(always)]
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
self.0.deref_mut()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send+ Sync + Default> Pool<T>
|
||||
{
|
||||
pub fn new(size: usize) -> Self
|
||||
{
|
||||
Self(RwLock::new(iter::repeat_with(T::default).map(AtomicRefCell::new).take(size).collect()))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn take_one(&self) -> PoolRef<'_, T>
|
||||
{
|
||||
self.take_one_or_else(T::default)
|
||||
}
|
||||
#[inline]
|
||||
pub fn take_one_or_default(&self) -> AtomicRefMut<'_, T>
|
||||
{
|
||||
self.take_one_or_insert_with(T::default)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send+Sync> iter::FromIterator<T> for Pool<T>
|
||||
{
|
||||
fn from_iter<I: IntoIterator<Item=T>>(iter: I) -> Self
|
||||
{
|
||||
Self(RwLock::new(iter.into_iter().map(AtomicRefCell::new).collect()))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<T: Send+Sync> Pool<T>
|
||||
{
|
||||
#[inline]
|
||||
pub fn take_one_or(&self, value: T) -> PoolRef<'_, T>
|
||||
{
|
||||
self.take_one_or_else(move || value)
|
||||
}
|
||||
pub fn try_take_one(&self) -> Option<AtomicRefMut<'_, T>>
|
||||
{
|
||||
for possible in self.0.read().iter() {
|
||||
possible.try_borrow_mut().ok()?;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn take_one_or_else<F>(&self, new: F) -> PoolRef<'_, T>
|
||||
where F: FnOnce() -> T
|
||||
{
|
||||
let try_borrow = {
|
||||
let this = &self;
|
||||
move || -> Option<AtomicRefMut<T>> {
|
||||
for possible in this.0.read().iter() {
|
||||
possible.try_borrow_mut().ok()?;
|
||||
}
|
||||
None
|
||||
}
|
||||
};
|
||||
if let Some(v) = try_borrow() {
|
||||
PoolRef::Inside(v)
|
||||
} else {
|
||||
PoolRef::Outside(DeferredRef(ManuallyDrop::new(new()), self))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn new_with_value(size: usize, value: T) -> Self
|
||||
where T: Clone
|
||||
{
|
||||
iter::repeat(value).take(size).collect()
|
||||
}
|
||||
pub fn new_with<F>(size: usize, with: F) -> Self
|
||||
where F: FnMut() -> T
|
||||
{
|
||||
Self(RwLock::new(iter::repeat_with(with).map(AtomicRefCell::new).take(size).collect()))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn new_empty() -> Self
|
||||
{
|
||||
Self(parking_lot::const_rwlock(Vec::new()))
|
||||
}
|
||||
|
||||
pub fn take_one_or_insert_with<F>(&self, with: F) -> AtomicRefMut<'_, T>
|
||||
where F: FnOnce() -> T
|
||||
{
|
||||
let try_borrow = {
|
||||
let this = &self;
|
||||
move || -> Result<AtomicRefMut<T>, parking_lot::RwLockWriteGuard<_>> {
|
||||
let reader = this.0.upgradable_read();
|
||||
for possible in reader.iter() {
|
||||
if let Ok(val) = possible.try_borrow_mut() {
|
||||
return Ok(val);
|
||||
}
|
||||
}
|
||||
Err(parking_lot::lock_api::RwLockUpgradableReadGuard::upgrade(reader))
|
||||
}
|
||||
};
|
||||
match try_borrow() {
|
||||
Ok(val) => val,
|
||||
Err(writer) => {
|
||||
let l = writer.len();
|
||||
writer.push(AtomicRefCell::new(with()));
|
||||
parking_lot::lock_api::RwLockWriteGuard::downgrade(writer)[l].borrow_mut()
|
||||
},
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
pub fn take_one_or_insert_value(&self, value: T) -> AtomicRefMut<'_, T>
|
||||
{
|
||||
self.take_one_or_insert_with(move || value)
|
||||
}
|
||||
}
|
Loading…
Reference in new issue