fixed canonical paths

master
Avril 4 years ago
parent 552035e5f1
commit 7c7125dcd9
Signed by: flanchan
GPG Key ID: 284488987C31F630

2
Cargo.lock generated

@ -555,7 +555,7 @@ dependencies = [
[[package]] [[package]]
name = "rmdupe" name = "rmdupe"
version = "1.1.1" version = "1.2.1"
dependencies = [ dependencies = [
"chrono", "chrono",
"futures", "futures",

@ -1,6 +1,6 @@
[package] [package]
name = "rmdupe" name = "rmdupe"
version = "1.1.1" version = "1.2.1"
authors = ["Avril <flanchan@cumallover.me>"] authors = ["Avril <flanchan@cumallover.me>"]
edition = "2018" edition = "2018"

@ -20,6 +20,7 @@ pub fn program() -> &'static str
pub fn usage() -> ! pub fn usage() -> !
{ {
println!("Usage: {} [OPTIONS] [--] <dirs...>", program()); println!("Usage: {} [OPTIONS] [--] <dirs...>", program());
println!("Usage: {} --rebase [<files>]", program());
println!("Usage: {} --help", program()); println!("Usage: {} --help", program());
println!("OPTIONS:"); println!("OPTIONS:");
println!(" --load -l:\t\tLoad the hashes from `load-file` if possible."); println!(" --load -l:\t\tLoad the hashes from `load-file` if possible.");
@ -39,6 +40,7 @@ pub fn usage() -> !
println!(" --\t\t\tStop reading args"); println!(" --\t\t\tStop reading args");
println!("Other:"); println!("Other:");
println!(" --help -h:\t\tPrint this message"); println!(" --help -h:\t\tPrint this message");
println!(" --rebase:\t\tRebuild hash stores created by `--save`. If no files are provided, use default.");
#[cfg(feature="threads")] #[cfg(feature="threads")]
println!("Compiled with threading support"); println!("Compiled with threading support");
std::process::exit(1) std::process::exit(1)
@ -97,6 +99,28 @@ pub enum Output
{ {
Normal(config::Config), Normal(config::Config),
Help, Help,
Rebase(config::Rebase),
}
/// Arg parse for rebase
fn parse_rebase<I>(args: I) -> Result<config::Rebase, Error>
where I: IntoIterator<Item=String>
{
let mut files = Vec::new();
for file in args.into_iter().map(|path| validate_path(path, Ensure::File, true))
{
files.push(file?);
}
if files.len() < 1 {
files.push(validate_path(config::DEFAULT_HASHNAME.to_string(), Ensure::File, false)?.to_owned());
}
Ok(config::Rebase{
save: files.clone(), //TODO: Seperate save+loads
load: files,
})
} }
/// Try to parse args /// Try to parse args
@ -139,6 +163,7 @@ where I: IntoIterator<Item=String>
if reading && arg.chars().next().unwrap_or('\0') == '-' { if reading && arg.chars().next().unwrap_or('\0') == '-' {
match &arg[..] { match &arg[..] {
"--help" => return Ok(Output::Help), "--help" => return Ok(Output::Help),
"--rebase" => return Ok(Output::Rebase(parse_rebase(args)?)),
"--" => reading = false, "--" => reading = false,

@ -65,3 +65,12 @@ pub struct Config
/// Load hashes from /// Load hashes from
pub load: Vec<String>, pub load: Vec<String>,
} }
#[derive(Debug)]
pub struct Rebase
{
/// Load from here
pub load: Vec<String>,
/// Rebase to here
pub save: Vec<String>,
}

@ -62,6 +62,41 @@ impl DupeMap
self.iteration.iter() self.iteration.iter()
} }
/// Forcefully update the cache
pub fn cache_force(&mut self, id: impl AsRef<Path>, hash: hash::Sha256Hash, trans: bool)
{
if let Some((h,t)) = self.table.get_mut(id.as_ref()) {
*h = hash;
*t = trans;
} else {
self.table.insert(id.as_ref().to_owned(), (hash,true));
}
}
/// Remove from the cache if it exists
pub fn uncache(&mut self, id: impl AsRef<Path>) -> Option<(hash::Sha256Hash, bool)>
{
self.table.remove(id.as_ref())
}
/// The amount of cached items (inc. transient ones)
pub fn cache_len(&self) -> usize
{
self.table.len()
}
/// Iterate through the cache
pub fn cache_iter(&self) -> std::collections::hash_map::Iter<PathBuf, (hash::Sha256Hash, bool)>
{
self.table.iter()
}
/// Iterate through the cache
pub fn cache_iter_mut(&mut self) -> std::collections::hash_map::IterMut<PathBuf, (hash::Sha256Hash, bool)>
{
self.table.iter_mut()
}
/// Cache this path's hash /// Cache this path's hash
/// ///
/// # Returns /// # Returns

@ -72,108 +72,248 @@ mod test {
} }
} }
fn parse_args() -> Result<config::Config, error::Error>
#[inline]
fn absolute(path: impl AsRef<std::path::Path>) -> std::path::PathBuf
{ {
match arg::parse_args()? { std::fs::canonicalize(path).expect("Invalid path internal")
arg::Output::Normal(conf) => Ok(conf),
_ => arg::usage(),
}
} }
#[cfg_attr(feature="threads", tokio::main)]
#[cfg(feature="threads")] #[cfg(feature="threads")]
async fn main() -> Result<(), Box<dyn std::error::Error>> async fn rebase_one_async(path: impl AsRef<std::path::Path>, hash: hash::Sha256Hash) -> Result<Option<(std::path::PathBuf, hash::Sha256Hash)>, error::Error>
{ {
use std::{
convert::TryInto,
};
use tokio::{ use tokio::{
fs::{ fs::{
OpenOptions, OpenOptions,
}, },
sync::{
Mutex
},
}; };
use std::{ let path = path.as_ref();
path::Path, let mut file = OpenOptions::new()
sync::Arc, .read(true)
}; .open(path).await?;
let args = parse_args().into_string()?; let sz: usize = file.metadata().await?.len().try_into().or(Err(error::Error::Arch(Some("Filesize is too large to be known. you have likely compiled the binary for 32-bit architecture or less. This shouldn't happen on 64-bit systems."))))?;
let lmode = &args.mode.logging_mode; let mut result = hash::Sha256Hash::default();
error::check_size(sz, hash::compute_async(&mut file, &mut result).await?)?;
log!(Debug, lmode => "Args parsed: {:?}", args);
let mut children = Vec::new(); println!("Computed {:?}", path);
if hash != result {
Ok(Some((path.to_owned(), result)))
} else {
Ok(None)
}
}
#[cfg(feature="threads")]
async fn rebase(config: config::Rebase) -> Result<(), Box<dyn std::error::Error>>
{
use std::{
path::{
Path,
},
};
use tokio::{
fs::{
OpenOptions,
},
};
let mut hashes = container::DupeMap::new(); let mut hashes = container::DupeMap::new();
for (transient, load) in config.load.iter().map(|x| (false, x)).chain(config.save.iter().map(|x| (true, x)))
// Load hashes
for (transient, load) in args.load.iter().map(|x| (false, x)).chain(args.save.iter().map(|x| (true, x)))
{ {
let load = Path::new(load); let load = Path::new(load);
if load.exists() { if load.exists() {
if load.is_file() { if load.is_file() {
if let Some(mut file) = OpenOptions::new() if let Ok(mut file) = OpenOptions::new()
.read(true) .read(true)
.open(load).await.log_and_forget(lmode, log::Level::Warning)? .open(load).await
{ {
log!(Info, lmode => "Hashes loading from {:?}", load); match hashes.load_async(&mut file, transient).await {
args.mode.error_mode.handle(hashes.load_async(&mut file, transient).await).log_and_forget(lmode, if transient {log::Level::Info} else {log::Level::Warning})?; Err(e) if !transient=> return Err(format!("Failed to load required {:?}: {}", file, e))?,
_ => (),
};
} }
} else {
log!(Warning, lmode => "Exclusing directory from load path {:?}", load);
} }
} else {
log!(Info, lmode => "Ignoring non-existant load path {:?}", load);
} }
} }
log!(Debug, lmode => "Loaded hashes: {}", hashes); let mut remove = Vec::new();
log!(Info, lmode => "Starting checks (threaded)"); let mut children = Vec::with_capacity(hashes.cache_len());
let hashes = Arc::new(Mutex::new(hashes)); for (path, (hash, trans)) in hashes.cache_iter()
for path in args.paths.iter()
{ {
let path = Path::new(path); if !trans { //Don't rebuild transient ones, this is desired I think? Maybe not... Dunno.
if path.is_dir() { if path.exists() && path.is_file() {
log!(Debug, lmode => "Spawning for {:?}", path); //Getting hash
let mode = args.mode.clone(); let path = path.clone();
let path = path.to_owned(); let hash = *hash;
let hashes= Arc::clone(&hashes); children.push(tokio::task::spawn(async move {
children.push(tokio::task::spawn(async move { rebase_one_async(path, hash).await
log!(Debug, mode.logging_mode => " + {:?}", path); }));
let res = mode.error_mode.handle(proc::do_dir_async(path.clone(), 0, hashes, mode.clone()).await).log_and_forget(&mode.logging_mode, log::Level::Error); } else {
log!(Info, mode.logging_mode => " - {:?}", path); remove.push(path.clone());
res }
}));
} }
} }
log!(Info, lmode => "Waiting on children");
let mut done = proc::DupeCount::default(); let (mut changed, mut removed) = (0usize, 0usize);
for child in children.into_iter() for child in children.into_iter()
{ {
done += args.mode.error_mode.handle(child.await?)?.unwrap_or_default().unwrap_or_default().unwrap_or_default(); if let Some((path, hash)) = child.await.expect("Child panic")?
{
println!("Updating {:?} -> {}", path, hash);
hashes.cache_force(path, hash, false);
changed +=1;
}
}
for remove in remove.into_iter()
{
println!("Removing {:?}", remove);
hashes.uncache(remove);
removed +=1;
} }
log!(Info, lmode => "Found: {:?}", done);
let hashes = hashes.lock().await; println!("Updated. {} changed, {} removed.", changed, removed);
log!(Debug, lmode => "New hashes: {}", hashes);
for save in args.save.iter() for save in config.save.iter()
{ {
let save = Path::new(save); let save = Path::new(save);
log!(Info, lmode => "Saving hashes to {:?}", save); let mut file = match OpenOptions::new()
if let Some(mut file) = OpenOptions::new()
.create(true) .create(true)
//.append(true)
.truncate(true) .truncate(true)
.write(true) .write(true)
.open(save).await.log_and_forget(lmode, log::Level::Warning)? .open(&save).await {
{ Ok(v) => v,
args.mode.error_mode.handle(hashes.save_async(&mut file).await).log_and_forget(lmode, log::Level::Warning)?; Err(e) => {println!("Warning: Failed to open output {:?}, ignoring: {}", save, e); continue;},
} };
match hashes.save_async(&mut file).await {
Err(e) => println!("Warning: Failed to write to output {:?}: ignoring: {}", file, e),
_ => (),
};
} }
Ok(()) Ok(())
} }
#[cfg(not(feature="threads"))]
fn rebase(config: config::Rebase) -> Result<(), Box<dyn std::error::Error>>
{
todo!()
}
fn parse_args() -> Result<arg::Output, error::Error>
{
match arg::parse_args()? {
arg::Output::Help => arg::usage(),
conf => Ok(conf),
}
}
#[cfg_attr(feature="threads", tokio::main)]
#[cfg(feature="threads")]
async fn main() -> Result<(), Box<dyn std::error::Error>>
{
use tokio::{
fs::{
OpenOptions,
},
sync::{
Mutex
},
};
use std::{
path::Path,
sync::Arc,
};
match parse_args().into_string()? {
arg::Output::Rebase(r) => {
return rebase(r).await;
},
arg::Output::Normal(args) => {
let lmode = &args.mode.logging_mode;
log!(Debug, lmode => "Args parsed: {:?}", args);
let mut children = Vec::new();
let mut hashes = container::DupeMap::new();
// Load hashes
for (transient, load) in args.load.iter().map(|x| (false, x)).chain(args.save.iter().map(|x| (true, x)))
{
let load = Path::new(load);
if load.exists() {
if load.is_file() {
if let Some(mut file) = OpenOptions::new()
.read(true)
.open(load).await.log_and_forget(lmode, log::Level::Warning)?
{
log!(Info, lmode => "Hashes loading from {:?}", load);
args.mode.error_mode.handle(hashes.load_async(&mut file, transient).await).log_and_forget(lmode, if transient {log::Level::Info} else {log::Level::Warning})?;
}
} else {
log!(Warning, lmode => "Exclusing directory from load path {:?}", load);
}
} else {
log!(Info, lmode => "Ignoring non-existant load path {:?}", load);
}
}
log!(Debug, lmode => "Loaded hashes: {}", hashes);
log!(Info, lmode => "Starting checks (threaded)");
let hashes = Arc::new(Mutex::new(hashes));
for path in args.paths.iter()
{
let path = Path::new(path);
if path.is_dir() {
log!(Debug, lmode => "Spawning for {:?}", path);
let mode = args.mode.clone();
let path = absolute(&path);
let hashes= Arc::clone(&hashes);
children.push(tokio::task::spawn(async move {
log!(Debug, mode.logging_mode => " + {:?}", path);
let res = mode.error_mode.handle(proc::do_dir_async(path.clone(), 0, hashes, mode.clone()).await).log_and_forget(&mode.logging_mode, log::Level::Error);
log!(Info, mode.logging_mode => " - {:?}", path);
res
}));
}
}
log!(Info, lmode => "Waiting on children");
let mut done = proc::DupeCount::default();
for child in children.into_iter()
{
done += args.mode.error_mode.handle(child.await?)?.unwrap_or_default().unwrap_or_default().unwrap_or_default();
}
log!(Info, lmode => "Found: {:?}", done);
let hashes = hashes.lock().await;
log!(Debug, lmode => "New hashes: {}", hashes);
for save in args.save.iter()
{
let save = Path::new(save);
log!(Info, lmode => "Saving hashes to {:?}", save);
if let Some(mut file) = OpenOptions::new()
.create(true)
//.append(true)
.truncate(true)
.write(true)
.open(save).await.log_and_forget(lmode, log::Level::Warning)?
{
args.mode.error_mode.handle(hashes.save_async(&mut file).await).log_and_forget(lmode, log::Level::Warning)?;
}
}
},
_ => unreachable!(),
};
Ok(())
}
#[cfg(not(feature="threads"))] #[cfg(not(feature="threads"))]
fn main() -> Result<(), Box<dyn std::error::Error>> fn main() -> Result<(), Box<dyn std::error::Error>>
{ {

Loading…
Cancel
Save