Initial commit

Added functionality. Needs testing and an API now.

Fortune for phash's current commit: Blessing − 吉
master
Avril 10 months ago
commit c05d9f8304
Signed by: flanchan
GPG Key ID: 284488987C31F630
  1. 2
      .gitignore
  2. 14
      Cargo.toml
  3. 93
      src/encode.rs
  4. 120
      src/hash.rs
  5. 4
      src/lib.rs
  6. 295
      src/pool.rs

2
.gitignore vendored

@ -0,0 +1,2 @@
/target
/Cargo.lock

@ -0,0 +1,14 @@
[package]
name = "phash"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
atomic_refcell = "0.1.8"
digest = "0.10.3"
parking_lot = { version = "0.12.0", features = ["hardware-lock-elision", "arc_lock"] }
rayon = "1.5.2"
sha2 = "0.10.2"
smallmap = "1.3.3"

@ -0,0 +1,93 @@
//! Encoding data
use std::fmt::{
self,
Write,
};
use smallmap::{
Primitive,
Map,
};
pub trait Encoder
{
fn encode_byte<W: Write + ?Sized>(&self, byte: u8, f: &mut W) -> fmt::Result;
#[inline]
fn encode_slice<W: Write + ?Sized>(&self, slice: &[u8], f: &mut W) -> fmt::Result
{
slice.iter().try_for_each(|&byte| self.encode_byte(byte, f))
}
}
pub trait EncodeExt
{
fn encode_with<E: Encoder + Default, T: Write + Default>(&self) -> T;
}
impl<S: ?Sized> EncodeExt for S
where S: AsRef<[u8]>
{
//TODO: A version for `bytes::Buf`
fn encode_with<E: Encoder + Default, T: Write + Default>(&self) -> T
{
let mut w = T::default();
let e = E::default();
e.encode_slice(self.as_ref(), &mut w).expect("Failed to encode");
w
}
}
#[derive(Debug, Default)]
pub struct HexEncoder;
const fn gen_hexmap() -> [[u8; 2]; 256]
{
const MAP: &[u8; 16] = b"0123456789abcdef";
let mut output = [[0u8; 2]; 256];
let mut i=0u8;
loop {
output[i as usize] = [
// top
MAP[(i >> 4) as usize],
// bottom
MAP[(i & 0xf) as usize]
];
if i == 255 {
break;
} else {
i += 1;
}
}
output
}
impl HexEncoder
{
pub const HEXMAP: [[u8; 2]; 256] = gen_hexmap();
}
impl Encoder for HexEncoder
{
#[inline(always)]
fn encode_byte<W: Write + ?Sized>(&self, byte: u8, f: &mut W) -> fmt::Result {
f.write_str(unsafe { std::str::from_utf8_unchecked(&Self::HEXMAP[byte as usize][..]) })
}
#[inline]
fn encode_slice<W: Write + ?Sized>(&self, slice: &[u8], f: &mut W) -> fmt::Result {
use std::mem;
type ChunkTo = u128; //XXX: TODO: Benchmark if `usize` works better than `u128`.
slice.chunks(mem::size_of::<ChunkTo>())//array_chunks::<mem::size_of::<usize>()>()
.try_for_each(|chunk| {
// Work on multiples of the pointer size, if possible.
if chunk.len() == mem::size_of::<ChunkTo>() {
// SAFETY: The length is equal to the size of `usize`.
let chunk: &[u8; mem::size_of::<ChunkTo>()] = unsafe {
&*chunk.as_ptr().cast()//::<[u8; mem::size_of::<usize>()]>()
};
write!(f, "{:x}", ChunkTo::from_ne_bytes(*chunk)) //TODO: Test this; we might need BE or LE, idk. I think we'll need BE.
} else {
chunk.iter().try_for_each(|&byte| self.encode_byte(byte, f))
}
})
}
}

@ -0,0 +1,120 @@
//! Hashing
//use super::pool;
use digest::{
Digest,
DynDigest,
Output,
};
use std::marker::PhantomData;
use std::{
fmt,
};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParallelDigest<D: Digest + Send + Sync>(usize, PhantomData<fn () -> D>);
pub struct ParallelHash<D: Digest>(Vec<Output<D>>);
impl<D: Digest> fmt::Display for ParallelHash<D>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
use crate::encode::*;
let mut d = D::new();
write!(f, "{}:", self.0.len())?;
for output in self.0.iter() {
d.update(&output[..]);
}
HexEncoder::encode_slice(&HexEncoder, &d.finalize()[..], f)
}
}
impl<D: Digest + Send + Sync> ParallelDigest<D>
{
/// Create a new digest
pub const fn new(chunk: usize) -> Self
{
Self(chunk, PhantomData)
}
/// Compute a parallel digest from `data`.
#[inline]
pub fn digest(&self, data: impl AsRef<[u8]>) -> ParallelHash<D>
{
ParallelHash(phash_digest_into::<D>(data.as_ref(), self.0))
}
}
#[inline(always)]
fn phash_raw<'a, T: Send, F: 'a>(data: &'a [u8], chunk: usize, digest: F) -> impl rayon::iter::IndexedParallelIterator<Item = T> + 'a
where F: Fn(&[u8]) -> T + Send + Sync
{
use rayon::prelude::*;
data.par_chunks(chunk).map(digest)
}
#[inline(always)]
fn into_output<'a, T,F, U>(data: &'a [u8], chunk: usize, func: F) -> Vec<T>
where F: FnOnce(&'a [u8], usize) -> U,
U: rayon::iter::IndexedParallelIterator<Item = T> + 'a
{
let mut output = Vec::with_capacity(1 + (data.len() / chunk));
func(data, chunk).collect_into_vec(&mut output);
output
}
#[inline(always)]
pub(crate) fn phash_raw_into<T: Send, F>(data: &[u8], chunk: usize, digest: F) -> Vec<T>//Vec<Output<D>>
where F: Fn(&[u8]) -> T + Send + Sync
{
use rayon::prelude::*;
let mut output = Vec::with_capacity(1 + (data.len() / chunk));
phash_raw(data, chunk, digest)
.collect_into_vec(&mut output);
output
}
/*
pub(crate) fn phash_pool<'a, D: Digest + digest::FixedOutputReset + Send + Sync + 'a>(data: &'a [u8], chunk: usize) -> impl rayon::iter::IndexedParallelIterator<Item= Output<D>> + 'a
{
let pool = pool::Pool::new_with(rayon::max_num_threads(), D::new);
phash_raw(data.as_ref(), chunk, move |range| {
let mut digest = pool.take_one_or_else(D::new);
<D as Digest>::update(&mut digest, range);
digest.finalize_reset()
})
}
#[inline]
pub(crate) fn phash_pool_into<D: Digest + digest::FixedOutputReset + Send + Sync>(data: &[u8], chunk: usize) -> Vec<Output<D>>
{
into_output(data, chunk, phash_pool::<D>)
}
*/
pub(crate) fn phash_digest<'a, D: Digest>(data: &'a [u8], chunk: usize) -> impl rayon::iter::IndexedParallelIterator<Item = Output<D>> + 'a
where Output<D>: Send
{
phash_raw(data, chunk, |range| {
let mut digest = D::new();
digest.update(range);
digest.finalize()
})
}
pub(crate) fn phash_digest_into<'a, D: Digest>(data: &[u8], chunk: usize) -> Vec<Output<D>>
where Output<D>: Send
{
into_output(data, chunk, phash_digest::<D>)
}
/*
fn phash<D: Digest + Send + Sync>(data: impl AsRef<[u8]>, chunk: usize) -> ParallelHash<D>
{
}*/

@ -0,0 +1,4 @@
//mod pool; A)JR AOISJOAIJOIA JODIJAOSI JDFUCK THIS DOESN@T WORK FUCK ATOMICREFCELL FUCK YOU FFUCK EVERYTHING AAAAAAAAAAAAA
pub mod encode;
pub mod hash;

@ -0,0 +1,295 @@
//! Pool of objects
use parking_lot::{
RwLock,
Mutex,
};
use atomic_refcell::{
AtomicRefCell,
AtomicRefMut,
};
use std::{
iter,
ops,
mem::{ ManuallyDrop, self },
borrow::{
Borrow,
BorrowMut,
},
};
#[derive(Debug)]
pub struct Pool<T>(RwLock<Vec<AtomicRefCell<T>>>);
#[derive(Debug)]
pub enum PoolRef<'a, T>
{
Inside(AtomicRefMut<'a, T>),
Outside(DeferredRef<'a, T>),
}
impl<'o, T: Send + Sync> PoolRef<'o, T>
{
#[inline]
pub fn as_ref(&self) -> &T
{
self.borrow()
}
#[inline]
pub fn as_mut(&mut self) -> &mut T
{
self.borrow_mut()
}
/// Force insert into pool, then return the mutable ref guard.
pub fn force(self) -> AtomicRefMut<'o, T>
{
match self {
Self::Inside(s) => s,
Self::Outside(deferred) => {
let pool = deferred.1;
let value = AtomicRefCell::new(deferred.detach());
let mut writer = pool.0.write();
let len = writer.len();
writer.push(value);
parking_lot::lock_api::RwLockWriteGuard::downgrade(writer)[len].borrow_mut()
},
}
}
pub fn forget(self)
{
if let Self::Outside(v) = self {
v.forget()
}
}
pub fn try_detach(self) -> Result<T, AtomicRefMut<'o, T>>
{
match self {
Self::Inside(v) => Err(v),
Self::Outside(v) => Ok(v.detach())
}
}
pub fn replace_with<F>(&mut self, with: F) -> T
where F: FnOnce() -> T
{
mem::replace(self.borrow_mut(), with())
}
#[inline]
pub fn replace(&mut self, val: T) -> T
{
self.replace_with(move || val)
}
}
impl<'o, T: Send + Sync + Default> PoolRef<'o, T>
{
#[inline(always)]
pub fn take(&mut self) -> T
{
self.replace_with(T::default)
}
#[inline]
pub fn into_inner(mut self) -> T
{
self.take()
}
}
impl<'o, T> Borrow<T> for PoolRef<'o, T>
{
#[inline]
fn borrow(&self) -> &T {
ops::Deref::deref(self)
}
}
impl<'o, T> BorrowMut<T> for PoolRef<'o, T>
{
#[inline]
fn borrow_mut(&mut self) -> &mut T {
&mut self
}
}
impl<'o, T> ops::Deref for PoolRef<'o, T>
{
type Target = T;
#[inline]
fn deref(&self) -> &Self::Target {
match self {
Self::Inside(s) => s.deref(),
Self::Outside(s) => s.deref(),
}
}
}
impl<'o, T> ops::DerefMut for PoolRef<'o, T>
{
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
match self {
Self::Inside(s) => s.deref_mut(),
Self::Outside(s) => s.deref_mut(),
}
}
}
#[derive(Debug)]
pub struct DeferredRef<'owner, T>(ManuallyDrop<T>, &'owner Pool<T>);
impl<'o, T: Send + Sync> DeferredRef<'o, T>
{
#[inline(always)]
pub fn detach(mut self) -> T
{
let v= unsafe {
ManuallyDrop::take(&mut self.0)
};
mem::forget(self);
v
}
#[inline(always)]
pub fn forget(mut self)
{
unsafe {
ManuallyDrop::drop(&mut self.0);
}
mem::forget(self);
}
}
impl<'o, T> ops::Drop for DeferredRef<'o, T>
{
fn drop(&mut self) {
self.1.0.write().push(AtomicRefCell::new( unsafe { ManuallyDrop::take(&mut self.0) } ));
}
}
impl<'o, T> ops::Deref for DeferredRef<'o, T>
{
type Target = T;
#[inline(always)]
fn deref(&self) -> &Self::Target {
self.0.deref()
}
}
impl<'o, T> ops::DerefMut for DeferredRef<'o, T>
{
#[inline(always)]
fn deref_mut(&mut self) -> &mut Self::Target {
self.0.deref_mut()
}
}
impl<T: Send+ Sync + Default> Pool<T>
{
pub fn new(size: usize) -> Self
{
Self(RwLock::new(iter::repeat_with(T::default).map(AtomicRefCell::new).take(size).collect()))
}
#[inline]
pub fn take_one(&self) -> PoolRef<'_, T>
{
self.take_one_or_else(T::default)
}
#[inline]
pub fn take_one_or_default(&self) -> AtomicRefMut<'_, T>
{
self.take_one_or_insert_with(T::default)
}
}
impl<T: Send+Sync> iter::FromIterator<T> for Pool<T>
{
fn from_iter<I: IntoIterator<Item=T>>(iter: I) -> Self
{
Self(RwLock::new(iter.into_iter().map(AtomicRefCell::new).collect()))
}
}
impl<T: Send+Sync> Pool<T>
{
#[inline]
pub fn take_one_or(&self, value: T) -> PoolRef<'_, T>
{
self.take_one_or_else(move || value)
}
pub fn try_take_one(&self) -> Option<AtomicRefMut<'_, T>>
{
for possible in self.0.read().iter() {
possible.try_borrow_mut().ok()?;
}
None
}
pub fn take_one_or_else<F>(&self, new: F) -> PoolRef<'_, T>
where F: FnOnce() -> T
{
let try_borrow = {
let this = &self;
move || -> Option<AtomicRefMut<T>> {
for possible in this.0.read().iter() {
possible.try_borrow_mut().ok()?;
}
None
}
};
if let Some(v) = try_borrow() {
PoolRef::Inside(v)
} else {
PoolRef::Outside(DeferredRef(ManuallyDrop::new(new()), self))
}
}
#[inline]
pub fn new_with_value(size: usize, value: T) -> Self
where T: Clone
{
iter::repeat(value).take(size).collect()
}
pub fn new_with<F>(size: usize, with: F) -> Self
where F: FnMut() -> T
{
Self(RwLock::new(iter::repeat_with(with).map(AtomicRefCell::new).take(size).collect()))
}
#[inline]
pub const fn new_empty() -> Self
{
Self(parking_lot::const_rwlock(Vec::new()))
}
pub fn take_one_or_insert_with<F>(&self, with: F) -> AtomicRefMut<'_, T>
where F: FnOnce() -> T
{
let try_borrow = {
let this = &self;
move || -> Result<AtomicRefMut<T>, parking_lot::RwLockWriteGuard<_>> {
let reader = this.0.upgradable_read();
for possible in reader.iter() {
if let Ok(val) = possible.try_borrow_mut() {
return Ok(val);
}
}
Err(parking_lot::lock_api::RwLockUpgradableReadGuard::upgrade(reader))
}
};
match try_borrow() {
Ok(val) => val,
Err(writer) => {
let l = writer.len();
writer.push(AtomicRefCell::new(with()));
parking_lot::lock_api::RwLockWriteGuard::downgrade(writer)[l].borrow_mut()
},
}
}
#[inline]
pub fn take_one_or_insert_value(&self, value: T) -> AtomicRefMut<'_, T>
{
self.take_one_or_insert_with(move || value)
}
}
Loading…
Cancel
Save