From 214d259a0610f03c8a7ce5df5f0f7d55d4486593 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 23 Jul 2020 19:42:51 +0100 Subject: [PATCH] added transience --- TODO | 3 --- src/container.rs | 70 +++++++++++++++++++++++++++++++++++++++++------- src/main.rs | 4 +-- 3 files changed, 63 insertions(+), 14 deletions(-) diff --git a/TODO b/TODO index 0a5429d..ea6d973 100644 --- a/TODO +++ b/TODO @@ -1,4 +1 @@ -Have -s (save) load as well if exists, but just insert into table instead of reading from it also - - Have top level handle single files properly diff --git a/src/container.rs b/src/container.rs index 49b2000..e81e600 100644 --- a/src/container.rs +++ b/src/container.rs @@ -16,7 +16,7 @@ use std::{ pub struct DupeMap { iteration: HashSet, // What we calculate - table: HashMap, // What we save and load + table: HashMap, // What we save and load, and if it's transient (ignored in calculate) } /// Do we care about windows? nah @@ -55,20 +55,68 @@ impl DupeMap /// # Returns /// /// True if caching was okay, false if key already added. + /// + /// # Notes + /// + /// If value is added and is transient, it is counted as not existing. pub fn cache>(&mut self, id: T, hash: hash::Sha256Hash) -> bool + { + if self.table.contains_key(id.as_ref()) { + if let Some((got_hash, trans @ true)) = self.table.get_mut(id.as_ref()) { + *trans = false; + *got_hash = hash; + true + } else { + false + } + } else { + self.table.insert(id.as_ref().to_owned(), (hash, false)); + true + } + } + + /// Cache this path's hash as transient. + /// Transient means it is ignored in calculations but is still saved. + /// + /// # Returns + /// + /// True if caching was okay, false if already added (transient or not). + pub fn cache_trans>(&mut self, id: T, hash: hash::Sha256Hash) -> bool { if self.table.contains_key(id.as_ref()) { false } else { - self.table.insert(id.as_ref().to_owned(), hash); + self.table.insert(id.as_ref().to_owned(), (hash,true)); true } } + /// Get a mutable reference to the transience of this path, if it is added + pub fn transience_mut>(&mut self, id: T) -> Option<&mut bool> + { + match self.table.get_mut(id.as_ref()) { + Some((_, trans)) => Some(trans), + _ => None, + } + } + + /// Get the transience of this path, if it is added + pub fn transience>(&self, id: T) -> Option + { + if let Some((_, trans)) = self.table.get(id.as_ref()) { + Some(*trans) + } else { + None + } + } + /// Look for path `id` in cache. pub fn get_cache>(&self, id: T) -> Option<&hash::Sha256Hash> { - self.table.get(id.as_ref()) + match self.table.get(id.as_ref()) { + Some((hash, false)) => Some(hash), + _ => None + } } /// Try to add to store. True if adding was oke, false if already exists. @@ -86,7 +134,7 @@ impl DupeMap pub fn save(&self, to: &mut W) -> io::Result { let mut done=0; - for (path, hash) in self.table.iter() + for (path, (hash, _)) in self.table.iter() { let path = path_bytes(path.as_ref()); let hash: &[u8] = hash.as_ref(); @@ -107,7 +155,7 @@ impl DupeMap use tokio::prelude::*; let mut done=0; - for (path, hash) in self.table.iter() + for (path, (hash, _)) in self.table.iter() { let path = path_bytes(path.as_ref()); let hash: &[u8] = hash.as_ref(); @@ -122,7 +170,7 @@ impl DupeMap } /// Load from file. - pub fn load(&mut self, from: &mut R) -> io::Result + pub fn load(&mut self, from: &mut R, trans: bool) -> io::Result { let mut done=0; let mut read; @@ -138,7 +186,9 @@ impl DupeMap let path = bytes_path(&path[..]); if from.read(&mut hash_buffer[..])? == hash::SHA256_SIZE { - if self.cache(path, hash::Sha256Hash::new(hash_buffer)) { + if !trans && self.cache(path, hash::Sha256Hash::new(hash_buffer)) { + done +=1; + } else if trans && self.cache_trans(path, hash::Sha256Hash::new(hash_buffer)) { done +=1; } } @@ -151,7 +201,7 @@ impl DupeMap /// Load from file. #[cfg(feature="threads")] - pub async fn load_async(&mut self, from: &mut R) -> io::Result + pub async fn load_async(&mut self, from: &mut R, trans: bool) -> io::Result where R: tokio::io::AsyncRead + std::marker::Send + std::marker::Sync + std::marker::Unpin { use tokio::prelude::*; @@ -170,7 +220,9 @@ impl DupeMap let path = bytes_path(&path[..]); if from.read(&mut hash_buffer[..]).await? == hash::SHA256_SIZE { - if self.cache(path, hash::Sha256Hash::new(hash_buffer)) { + if !trans && self.cache(path, hash::Sha256Hash::new(hash_buffer)) { + done +=1; + } else if trans && self.cache_trans(path, hash::Sha256Hash::new(hash_buffer)) { done +=1; } } diff --git a/src/main.rs b/src/main.rs index b80f9eb..fe5cecb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -106,7 +106,7 @@ async fn main() -> Result<(), Box> let mut hashes = container::DupeMap::new(); // Load hashes - for load in args.load.iter() + for (transient, load) in args.load.iter().map(|x| (false, x)).chain(args.save.iter().map(|x| (true, x))) { let load = Path::new(load); if load.exists() { @@ -116,7 +116,7 @@ async fn main() -> Result<(), Box> .open(load).await.log_and_forget(lmode, log::Level::Warning)? { log!(Info, lmode => "Hashes loading from {:?}", load); - args.mode.error_mode.handle(hashes.load_async(&mut file).await).log_and_forget(lmode, log::Level::Warning)?; + args.mode.error_mode.handle(hashes.load_async(&mut file, transient).await).log_and_forget(lmode, if transient {log::Level::Info} else {log::Level::Warning})?; } } else { log!(Warning, lmode => "Exclusing directory from load path {:?}", load);