diff --git a/Cargo.lock b/Cargo.lock index 207b240..26af54a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,7 +102,9 @@ dependencies = [ "color-eyre", "futures", "jemallocator", + "lazy_static", "log", + "num_cpus", "pretty_env_logger", "tokio", "tokio-stream", @@ -277,6 +279,12 @@ dependencies = [ "libc", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.137" diff --git a/Cargo.toml b/Cargo.toml index bb9d83a..0c01502 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,9 @@ strip = false color-eyre = { version = "0.6.2", default-features = false } futures = "0.3.25" jemallocator = "0.5.0" +lazy_static = "1.4.0" log = "0.4.17" +num_cpus = "1.14.0" pretty_env_logger = "0.4.0" tokio = { version = "1.22.0", features = ["full"] } tokio-stream = { version = "0.1.11", features = ["sync", "fs", "io-util", "net", "signal", "tokio-util"] } diff --git a/src/args.rs b/src/args.rs index d868583..1233ce5 100644 --- a/src/args.rs +++ b/src/args.rs @@ -6,6 +6,7 @@ use std::{ Path, PathBuf, }, borrow::Cow, + fmt, }; use tokio::{ sync::{ @@ -17,9 +18,11 @@ use futures::{ }; /// Parsed command-line args -#[derive(Debug)] +#[derive(Debug, Default, Clone)] pub struct Args { + walker: walk::Config, + worker: work::Config, paths: Option>, } @@ -83,11 +86,278 @@ impl Args } } +#[derive(Debug, Clone)] +pub enum Mode +{ + Normal(Args), + Help, +} + #[inline] -pub fn parse_args() -> eyre::Result +pub fn parse_args() -> eyre::Result { //return Ok(Args { paths: None }); - todo!("parse(std::env::args().skip(1))") + parse(std::env::args().skip(1)) + .with_context(|| format!("{:?}", std::env::args().collect::>()).header("ARGV was")) +} + +/// The executable name, if readable from argv as a valid UTF8 string. +/// +/// If not readable, the project name will be returned. +#[inline] +pub fn prog_name() -> &'static str +{ + lazy_static! { + static ref PROG_NAME: &'static str = std::env::args().next().map(|x| &*Box::leak(x.into_boxed_str())).unwrap_or(env!("CARGO_PKG_NAME")); + } + *PROG_NAME +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)] +enum Arg<'a> +{ + Long(&'a str), + Short(&'a [u8]), + ShortSingle(u8), +} + +impl<'a> Arg<'a> +{ + #[inline] + pub fn as_long(&self) -> Option<&'a str> + { + match self { + Self::Long(l) => Some(l), + _ => None, + } + } + #[inline] + pub fn as_short_ascii(&self) -> Option<&'a [u8]> + { + match self { + Self::Short(l) => Some(l), + //Self::ShortSingle(s) => Some(&[*s]), + _ => None, + } + } + #[inline] + pub fn split_short(&self) -> Option + 'a> + { + self.as_short_ascii().map(|x| std::str::from_utf8(x).ok() /* XXX: Silent failure is not a good idea.. We should return an error (or maybe just panic? if there's invalid utf8 here, it shouldn't happen)*/).flatten().map(|s| s.chars()) + } + + #[inline] + pub fn split_short_ascii(&self) -> Option + 'a> + { + self.as_short_ascii().map(|opt| opt.into_iter().copied()) + } + + #[inline] + pub fn explode(self) -> impl Iterator> + 'a + { + std::iter::once(self) + .chain(std::iter::once(if let Self::Short(short) = self { Some(short.into_iter().copied().map(|x| Arg::ShortSingle(x))) } else { None }) + .flat_map(std::convert::identity).flatten()) + } + + #[inline] + pub fn is_any<'b: 'a, I: 'b, A>(&self, these: I) -> bool + where I: IntoIterator, + A: Into> + 'b + { + let iter: Vec<_> = these.into_iter().map(Into::into).map(|x| x.explode()).flatten().collect(); + for split in self.explode() { + if iter.iter().any(|arg| arg == &split) { + return true; + } + } + false + } + + #[inline(always)] + pub fn is_long(&self) -> bool + { + self.as_long().is_some() + } + + #[inline(always)] + pub fn is_short(&self) -> bool + { + !self.is_long() + } +} + +impl<'a> fmt::Display for Arg<'a> +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + match self { + Self::Long(s) => write!(f, "--{s}"), + Self::Short(short) => write!(f, "-{}", std::str::from_utf8(short).unwrap()), + Self::ShortSingle(one) => write!(f, "-{}", *one as char), + } + } +} + +impl<'a> From<&'a [u8]> for Arg<'a> +{ + #[inline] + fn from(from: &'a [u8]) -> Self + { + Self::Short(from) + } +} + +impl<'a, const N: usize> From<&'a [u8; N]> for Arg<'a> +{ + #[inline] + fn from(from: &'a [u8; N]) -> Self + { + Self::Short(&from[..]) + } +} + + +impl<'a> From<&'a str> for Arg<'a> +{ + #[inline] + fn from(from: &'a str) -> Self + { + Self::Long(from) + } +} + +impl From for Arg<'static> +{ + #[inline] + fn from(from: u8) -> Self + { + Self::ShortSingle(from) + } +} + +#[inline] +fn parse_single<'a, I: ?Sized + 'a>(input: Arg<'a>, args: &mut I, output: &mut Args) -> eyre::Result> +where I: Iterator +{ + macro_rules! take { + ($fmt:literal $(, $ag:expr)*) => { + match args.next() { + Some(n) => n, + None => return Err(eyre!($fmt $(, $ag)*)), + } + }; + () => { + take!("`{}` expects an argument", &input) + } + } + + macro_rules! args { + ($($arg:expr),*) => { + [$(Arg::from($arg)),*] + }; + } + + // -r, --recursive + if input.is_any(args![b'r', "recursive"]) { + output.walker.recursion_depth = if input.is_long() { + let limit = take!(); + let limit: usize = (&limit).parse().wrap_err("`--recursive` expects a positive integer") + .with_section(move || limit.header("Invalid parameter was"))?; + match limit { + 0 => None, + n => Some(n), + } + } else { + None + }; + } + + if input.is_any(args![b'a', "atime"]) { + output.worker.by = work::OrderBy::AccessTime; + } + if input.is_any(args![b'c', "ctime"]) { + output.worker.by = work::OrderBy::CreationTime; + } + if input.is_any(args![b'm', "mtime"]) { + output.worker.by = work::OrderBy::ModifiedTime; + } + + // -P, -p, --parallel cpus| + // -1 + if input.is_any(args![b'P', b'p', "parallel"]) { + if input.is_long() { + let mut num = take!(); + if let Ok(n) = num.parse() { + output.walker.max_walkers = std::num::NonZeroUsize::new(n); + } else { + num.make_ascii_lowercase(); + match &num[..] { + "cpus" => output.walker.max_walkers = std::num::NonZeroUsize::new(*walk::NUM_CPUS), + _ => return Err(eyre!("`--parallel` expects a positive integer or the string 'cpus'")).with_context(move || num.header("Invalid parameter was")), + } + } + } else { + output.walker.max_walkers = if input.is_any(*b"P") { + None + } else { + std::num::NonZeroUsize::new(*walk::NUM_CPUS) + }; + } + } else if input.is_any(args![b'1']) { + output.walker.max_walkers = std::num::NonZeroUsize::new(1); + } + + Ok(None) +} + +fn parse(args: impl IntoIterator) -> eyre::Result +{ + let mut output = Args::default(); + let mut args = args.into_iter().fuse(); + + let mut rest = Vec::new(); + while let Some(current) = args.next() + { + + macro_rules! single { + ($input:expr) => { + { + let input = Arg::from($input); + if let Some(mode) = parse_single($input, &mut args, &mut output) + .wrap_err(eyre!("Parsing error for argument '{}'", &input)) + .with_section(|| current.clone().header("Current arg was"))? + { + return Ok(mode); + } + } + }; + } + match current.as_bytes() { + b"-" | + b"--" => break, + [b'-', b'-', ..] => { + // Long opt + single!(Arg::Long(¤t[2..])); + }, + [b'-', short @ ..] => { + // Short opts + single!(Arg::Short(short)) + }, + _ => { + // Not an opt, a path. + rest.push(PathBuf::from(current)); + break; + }, + } + } + rest.extend(args.map(Into::into)); + output.paths = match rest { + empty if empty.is_empty() => None, + rest => Some(rest), + }; + + Ok(Mode::Normal(output)) } //TODO: fn parse(args: impl IntoIterator) -> eyre::Result diff --git a/src/main.rs b/src/main.rs index a41e211..5d4ce94 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ #[macro_use] extern crate log; - +#[macro_use] extern crate lazy_static; use color_eyre::{ eyre::{ self, @@ -57,6 +57,54 @@ where I: futures::stream::Stream, Ok(children.into_iter().sum()) } + +fn print_help(to: &mut W) -> std::io::Result<()> + where W: std::io::Write, +{ + let execp = args::prog_name(); + writeln!(to, "{} v{} - {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"), env!("CARGO_PKG_DESCRIPTION"))?; + writeln!(to, " GPL'd with <3 by {}", env!("CARGO_PKG_AUTHORS"))?; + writeln!(to, "\nUsage:")?; + writeln!(to, "{execp} [OPTIONS] [--] []")?; + writeln!(to, "\tAccording to OPTIONS, given input file paths `files...` (or, if empty, paths read from `stdin`), write them to `stdout` ordered by their metadata's timecodes")?; + writeln!(to, "{execp} --help")?; + writeln!(to, "\tPrint this message to `stderr`, then exit with code 0.")?; + writeln!(to, "")?; + writeln!(to, "OPTIONS:")?; + macro_rules! write_opt { + ($($name:literal),+ => $explain:literal $(, $format:expr)*) => { + { + let names = [$($name),+].into_iter().fold(String::default(), |prev, n| format!("{prev}, {n}")); + writeln!(to, concat!(" {}\t\t", $explain), names $(, $format)*) + } + }; + } + + write_opt!("-r", "--recursive " => "Recursively sort input files, up to `` (set to 0 for infniite); if limit is not specified, recursion is infinite")?; + write_opt!("-a", "--atime" => "Sort by atime")?; + write_opt!("-c", "--ctime" => "Sort by ctime (default)")?; + write_opt!("-m", "--mtime" => "Sort by mtime")?; + write_opt!("-p", "--parallel cpus|" => "Run tasks in parallel, with a max number of tasks being equal ``, or, if 0, to infinity (see `-P`), if 'cpus', to the number of logical CPU cores ({}, default)", *walk::NUM_CPUS)?; + write_opt!("-P", "--parallel 0" => "Run tasks with unbounded parallelism, no limit to the number of walker tasks running at once (note: the physical thread pool will always be the same size regardless of these flags)")?; + write_opt!("-1", "--parallel 1" => "Only let one directory be processed at once")?; + write_opt!("-", "--" => "Stop parsing arguments, treat the rest as input paths")?; + + //TODO: Allow controlling failure modes (currently it's hardcoded when walking will fail and why and also kind arbitary; it being controllable would be better). + + writeln!(to, "")?; + + writeln!(to, "ENV VARS:")?; + writeln!(to, "`RUST_LOG` - Control the logging (to stderr) level.")?; + writeln!(to, r#" "none" - No output. + "error" - Errors only. + "warn" - Warnings and above. + "info" - Information and above. + "debug" - Debug information and above. + "trace" - All recorded information."#)?; + + Ok(()) +} + #[tokio::main] async fn main() -> eyre::Result<()> { init_logging().wrap_err("Failed to set logging handlers")?; @@ -65,9 +113,14 @@ async fn main() -> eyre::Result<()> { //TODO: Read main config from args - let args = args::parse_args() + let args = match args::parse_args() .wrap_err("Failed to parse command line args") - .with_suggestion(|| "Try `--help`")?; + .with_suggestion(|| "Try `--help`")? + { + args::Mode::Normal(n) => n, + args::Mode::Help => return print_help(&mut std::io::stderr().lock()).wrap_err("Failed to write help to stderr"), + }; + debug!("Parsed args: {:?}", args); let worker_cfg = { //TODO: Read worker config from main config diff --git a/src/walk.rs b/src/walk.rs index 9feb0ee..6c1478b 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -19,15 +19,41 @@ use futures::future::{ BoxFuture, }; -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] + +lazy_static! { + pub(super) static ref NUM_CPUS: usize = num_cpus::get(); +} + +/// Default number of max walkers allowed to work at once on the thread pool. +/// See `Config`. +#[inline] +pub fn default_max_walkers() -> Option +{ + NonZeroUsize::new(*NUM_CPUS) +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Config { // None: unlimited. 0: no recursion pub recursion_depth: Option, - // None: Unlimited tasks. + // None: Unlimited tasks. pub max_walkers: Option, } +impl Default for Config +{ + #[inline] + fn default() -> Self + { + Self { + recursion_depth: Some(0), + max_walkers: default_max_walkers(), + } + } +} + + #[derive(Debug, Clone)] struct UniqueState {