You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

236 lines
6.0 KiB

use super::*;
use std::{
collections::{HashMap, HashSet},
io::{
self,
Write,
Read,
},
path::{
Path,
PathBuf
},
};
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct DupeMap
{
iteration: HashSet<hash::Sha256Hash>, // What we calculate
table: HashMap<PathBuf, (hash::Sha256Hash, bool)>, // What we save and load, and if it's transient (ignored in calculate)
}
/// Do we care about windows? nah
#[inline]
fn path_bytes(path: &Path) -> &[u8]
{
use std::os::unix::ffi::OsStrExt;
path.as_os_str().as_bytes()
}
#[inline]
fn bytes_path(bytes: &[u8]) -> &Path
{
use std::os::unix::ffi::OsStrExt;
std::ffi::OsStr::from_bytes(bytes).as_ref()
}
const ENTRY_HEADER: &[u8] = &[0x00, 0xde, 0xad];
impl DupeMap
{
/// Create a new empty dupe map
pub fn new() -> Self
{
Self{iteration: HashSet::new(), table: HashMap::new()}
}
/// Iterator over all hashes
pub fn iter(&self) -> std::collections::hash_set::Iter<hash::Sha256Hash>
{
self.iteration.iter()
}
/// Cache this path's hash
///
/// # Returns
///
/// True if caching was okay, false if key already added.
///
/// # Notes
///
/// If value is added and is transient, it is counted as not existing.
pub fn cache<T: AsRef<Path>>(&mut self, id: T, hash: hash::Sha256Hash) -> bool
{
if self.table.contains_key(id.as_ref()) {
if let Some((got_hash, trans @ true)) = self.table.get_mut(id.as_ref()) {
*trans = false;
*got_hash = hash;
true
} else {
false
}
} else {
self.table.insert(id.as_ref().to_owned(), (hash, false));
true
}
}
/// Cache this path's hash as transient.
/// Transient means it is ignored in calculations but is still saved.
///
/// # Returns
///
/// True if caching was okay, false if already added (transient or not).
pub fn cache_trans<T: AsRef<Path>>(&mut self, id: T, hash: hash::Sha256Hash) -> bool
{
if self.table.contains_key(id.as_ref()) {
false
} else {
self.table.insert(id.as_ref().to_owned(), (hash,true));
true
}
}
/// Get a mutable reference to the transience of this path, if it is added
pub fn transience_mut<T: AsRef<Path>>(&mut self, id: T) -> Option<&mut bool>
{
match self.table.get_mut(id.as_ref()) {
Some((_, trans)) => Some(trans),
_ => None,
}
}
/// Get the transience of this path, if it is added
pub fn transience<T: AsRef<Path>>(&self, id: T) -> Option<bool>
{
if let Some((_, trans)) = self.table.get(id.as_ref()) {
Some(*trans)
} else {
None
}
}
/// Look for path `id` in cache.
pub fn get_cache<T: AsRef<Path>>(&self, id: T) -> Option<&hash::Sha256Hash>
{
match self.table.get(id.as_ref()) {
Some((hash, false)) => Some(hash),
_ => None
}
}
/// Try to add to store. True if adding was oke, false if already exists.
pub fn try_add(&mut self, hash: hash::Sha256Hash) -> bool
{
if self.iteration.contains(&hash) {
false
} else {
self.iteration.insert(hash);
true
}
}
/// Save this list to a file
pub fn save<W: Write>(&self, to: &mut W) -> io::Result<usize>
{
let mut done=0;
for (path, (hash, _)) in self.table.iter()
{
let path = path_bytes(path.as_ref());
let hash: &[u8] = hash.as_ref();
to.write(ENTRY_HEADER)?;
to.write(bytes::reinterpret(&path.len()))?;
to.write(path)?;
to.write(hash)?;
done+=1;
}
Ok(done)
}
/// Save this list to a file async
#[cfg(feature="threads")]
pub async fn save_async<W>(&self, to: &mut W) -> io::Result<usize>
where W: tokio::io::AsyncWrite + std::marker::Send + std::marker::Sync + std::marker::Unpin
{
use tokio::prelude::*;
let mut done=0;
for (path, (hash, _)) in self.table.iter()
{
let path = path_bytes(path.as_ref());
let hash: &[u8] = hash.as_ref();
to.write(ENTRY_HEADER).await?;
to.write(bytes::reinterpret(&path.len())).await?;
to.write(path).await?;
to.write(hash).await?;
done+=1;
}
Ok(done)
}
/// Load from file.
pub fn load<R: Read>(&mut self, from: &mut R, trans: bool) -> io::Result<usize>
{
let mut done=0;
let mut read;
let mut header_buffer = [0u8; ENTRY_HEADER.len() + std::mem::size_of::<usize>()];
let mut hash_buffer = [0u8; hash::SHA256_SIZE];
while {read = from.read(&mut header_buffer[..])?; read == header_buffer.len() && &header_buffer[..ENTRY_HEADER.len()] == ENTRY_HEADER}
{
let sz = *bytes::reinterpret_back(&header_buffer[ENTRY_HEADER.len()..]);
if sz > 0 {
let mut path = vec![0u8; sz];
if from.read(&mut path[..])? == sz {
let path = bytes_path(&path[..]);
if from.read(&mut hash_buffer[..])? == hash::SHA256_SIZE
{
if !trans && self.cache(path, hash::Sha256Hash::new(hash_buffer)) {
done +=1;
} else if trans && self.cache_trans(path, hash::Sha256Hash::new(hash_buffer)) {
done +=1;
}
}
}
}
}
Ok(done)
}
/// Load from file.
#[cfg(feature="threads")]
pub async fn load_async<R>(&mut self, from: &mut R, trans: bool) -> io::Result<usize>
where R: tokio::io::AsyncRead + std::marker::Send + std::marker::Sync + std::marker::Unpin
{
use tokio::prelude::*;
let mut done=0;
let mut read;
let mut header_buffer = [0u8; ENTRY_HEADER.len() + std::mem::size_of::<usize>()];
let mut hash_buffer = [0u8; hash::SHA256_SIZE];
while {read = from.read(&mut header_buffer[..]).await?; read == header_buffer.len() && &header_buffer[..ENTRY_HEADER.len()] == ENTRY_HEADER}
{
let sz = *bytes::reinterpret_back(&header_buffer[ENTRY_HEADER.len()..]);
if sz > 0 {
let mut path = vec![0u8; sz];
if from.read(&mut path[..]).await? == sz {
let path = bytes_path(&path[..]);
if from.read(&mut hash_buffer[..]).await? == hash::SHA256_SIZE
{
if !trans && self.cache(path, hash::Sha256Hash::new(hash_buffer)) {
done +=1;
} else if trans && self.cache_trans(path, hash::Sha256Hash::new(hash_buffer)) {
done +=1;
}
}
}
}
}
Ok(done)
}
}