From 227abc0d7db06e4b1cd58d8f2bd8a79a4eea533d Mon Sep 17 00:00:00 2001 From: Avril Date: Sat, 13 Aug 2022 00:21:10 +0100 Subject: [PATCH] args: Added visitor-pattern argument parsing. Fixed bug in `try_parse_for!()` which `continue`d on the wrong branch. Improved specific and general error messages regarding arguments. Parsing for `-exec/{}` works! A correct `Options` struct is produced, edge-and error- cases are handled correctly with informative messages (TODO: Implement `--help`.) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TODO: Should `-exec{} {}` be valid? That is, should we allow the use of `fexecve()` instead of `execve()` in the special case where the command itself of a positional exec is the positional replacement string? This may be useful, but also dangerous and perhaps not possible with a memfd or with a pipe. We"d also need to `ioctl()` set to permissions to RX instead of RO, and any subsequent instances of the replacement string *should probably* be `dup()`"d file-descriptors with -X permission set. Note for later. For now, a warning is ommitted if this is present, explaining to the user that executing the input is not (yet??) supported. Fortune for collect's current commit: Future small blessing − 末小吉 --- src/args.rs | 138 ++++++++++++++++++++++++++++++++++++++++++++-------- src/main.rs | 35 +++++++++++-- 2 files changed, 147 insertions(+), 26 deletions(-) diff --git a/src/args.rs b/src/args.rs index fa82477..4c6ec2f 100644 --- a/src/args.rs +++ b/src/args.rs @@ -9,6 +9,7 @@ use std::{ fmt, error, borrow::Cow, }; +use std::any::type_name; //TODO: When added, the `args` comptime feature will need to enable `lazy_static`. use ::lazy_static::lazy_static; @@ -341,13 +342,32 @@ pub fn program_name() -> Cow<'static, str> /// Parse the program's arguments into an `Options` array. /// If parsing fails, an `ArgParseError` is returned detailing why it failed. #[inline] -#[cfg_attr(feature="logging", instrument(err(Display)))] +#[cfg_attr(feature="logging", instrument(err(Debug)))] pub fn parse_args() -> Result { - parse_from(std::env::args_os().skip(1)) + let iter = std::env::args_os(); + if_trace!(trace!("argc == {}, argv == {iter:?}", iter.len())); + + parse_from(iter.skip(1)) } -#[cfg_attr(feature="logging", instrument(level="debug", skip_all, fields(args = ?std::any::type_name::())))] +#[inline(always)] +pub fn type_name_short() -> &'static str +{ + let mut s = std::any::type_name::(); + if let Some(idx) = memchr::memrchr(b':', s.as_bytes()) { + s = &s[idx.saturating_sub(1)..]; + if s.len() >= 2 && &s[..2] == "::" { + s = &s[2..]; + } + } + if s.len() > 0 && (s.as_bytes()[s.len()-1] == b'>' && s.as_bytes()[0] != b'<') { + s = &s[..(s.len()-1)]; + } + s +} + +#[cfg_attr(feature="logging", instrument(level="debug", skip_all, fields(args = ?type_name_short::())))] fn parse_from(args: I) -> Result where I: IntoIterator, T: Into @@ -368,24 +388,28 @@ where I: IntoIterator, _assert_is_parser::<$parser>(); }; }; - (try $parser:path => $then:expr$(, $or:expr)?) => { + ($parser:path => $then:expr) => { { try_parse_for!(@ assert_parser_okay $parser); //_assert_is_closure(&$then); //XXX: There isn't a good way to tell without having to specify some bound on return type... if let Some(result) = parsers::try_parse_with::<$parser>(&mut arg, &mut args) { - $(let result = result.map_err($or);)? - result.map($then) - } else { - continue + + // Result succeeded on visitation, use this parser for this argument and then continue to the next one. + $then(result?); + continue; } + // Result failed on visitation, so continue the control flow to the next `try_parse_for!()` visitation attempt. } }; - ($parser:path => $then:expr) => { + /*($parser:path => $then:expr) => { $then(try_parse_for!(try $parser => std::convert::identity)?) - } + }*/ } - try_parse_for!(try parsers::ExecMode => |result| output.exec.push(result))?; + try_parse_for!(parsers::ExecMode => |result| output.exec.push(result)); + //TODO: try_parse_for!(parsers::SomeOtherOption => |result| output.some_other_option.set(result.something)), etc, for any newly added arguments. + if_trace!(debug!("reached end of parser visitation for argument #{idx} {arg:?}! Failing now with `UnknownOption`")); + return Err(ArgParseError::UnknownOption(arg)); } Ok(()) }; @@ -403,6 +427,8 @@ pub enum ArgParseError UnknownOption(OsString), /// Returned when the argument, `argument`, is passed in an invalid context by the user. InvalidUsage { argument: String, message: String, inner: Option> }, + //VisitationFailed, + } trait ArgParseErrorExt: Sized @@ -442,7 +468,11 @@ impl fmt::Display for ArgParseError { match self { Self::WithIndex(index, inner) => write!(f, "Argument #{index}: {inner}"), - Self::UnknownOption(opt) => f.write_str(String::from_utf8_lossy(opt.as_bytes()).as_ref()), + Self::UnknownOption(opt) => { + f.write_str("Invalid/unknown argument: `")?; + f.write_str(String::from_utf8_lossy(opt.as_bytes()).as_ref())?; + f.write_str("`") + }, Self::InvalidUsage { argument, message, .. } => write!(f, "Invalid usage for argument `{argument}`: {message}") } } @@ -484,20 +514,57 @@ impl From for ArgParseError } } +#[inline(always)] +fn extract_last_pathspec<'a>(s: &'a str) -> &'a str +{ + //#[cfg_attr(feature="logging", feature(instrument(ret)))] + #[allow(dead_code)] + fn string_diff<'a>(a: &'a str, b: &'a str) -> usize + { + #[cold] + #[inline(never)] + fn _panic_non_inclusive(swap: bool) -> ! + { + let a = swap.then(|| "b").unwrap_or("a"); + let b = swap.then(|| "a").unwrap_or("b"); + panic!("String {a} was not inside string {b}") + } + let a_addr = a.as_ptr() as usize; + let b_addr = b.as_ptr() as usize; + let (a_addr, b_addr, sw) = + if !(a_addr + a.len() > b_addr + b.len() && b_addr + b.len() < a_addr + a.len()) { + (b_addr, a_addr, true) + } else { + (a_addr, a_addr, false) + }; + + if b_addr < a_addr /*XXX || (b_addr + b.len()) > (a_addr + a.len())*/ { + _panic_non_inclusive(sw) + } + return a_addr.abs_diff(b_addr); + } + s.rsplit_once("::") + .map(|(_a, b)| /*XXX: This doesn't work...match _a.rsplit_once("::") { + Some((_, last)) => &s[string_diff(s, last)..], + _ => b + }*/ b) + .unwrap_or(s) +} + mod parsers { use super::*; #[inline(always)] - #[cfg_attr(feature="logging", instrument(level="trace", skip(rest), fields(parser = ?std::any::type_name::

())))] + #[cfg_attr(feature="logging", instrument(level="debug", skip(rest), fields(parser = %extract_last_pathspec(type_name::

()))))] pub(super) fn try_parse_with

(arg: &mut OsString, rest: &mut impl Iterator) -> Option> where P: TryParse { #[cfg(feature="logging")] - let _span = tracing::debug_span!("parse"); + let _span = tracing::warn_span!("parse", parser= %extract_last_pathspec(type_name::

()), ?arg); P::visit(arg.as_os_str()).map(move |parser| { #[cfg(feature="logging")] let _in = _span.enter(); - parser.parse(std::mem::replace(arg, OsString::default()), rest).map_err(Into::into) + parser.parse(/*if_trace!{true arg.clone(); */std::mem::replace(arg, OsString::default()) /*}*/, rest).map_err(Into::into) //This clone is not needed, the argument is captured by `try_parse_with` (in debug) and `parse` (in warning) already. }).map(|res| { #[cfg(feature="logging")] match res.as_ref() { @@ -509,7 +576,7 @@ mod parsers { res }).or_else(|| { #[cfg(feature="logging")] - ::tracing::event!(::tracing::Level::TRACE, "no match"); + ::tracing::event!(::tracing::Level::TRACE, "no match for this parser with this arg, continuing visitation."); None }) } @@ -586,28 +653,54 @@ mod parsers { use super::*; /// Issue a warning when `-exec{}` is provided as an argument, but no positional arguments (`{}`) are specified in the argument list to the command. #[cold] - #[cfg_attr(feature="logging", inline(never), instrument(level="trace"))] + #[cfg_attr(feature="logging", inline(never))] #[cfg_attr(not(feature="logging"), inline(always))] pub fn execp_no_positional_replacements() { - if_trace!(warn!("-exec{{}} provided with no positional arguments ({}), there will be no replacement with the data. Did you mean `-exec`?", POSITIONAL_ARG_STRING)); + if_trace!(warn!("-exec{{}} provided with no positional arguments `{}`, there will be no replacement with the data. Did you mean `-exec`?", POSITIONAL_ARG_STRING)); } /// Issue a warning if the user apparently meant to specify two `-exec/{}` arguments to `collect`, but seemingly is accidentally is passing the `-exec/{}` string as an argument to the first. #[cold] - #[cfg_attr(feature="logging", inline(never), instrument(level="trace"))] + #[cfg_attr(feature="logging", inline(never))] #[cfg_attr(not(feature="logging"), inline(always))] pub fn exec_apparent_missing_terminator(first_is_positional: bool, second_is_positional: bool, command: &OsStr, argument_number: usize) { if_trace! { warn!("{} provided, but argument to command {command:?} number #{argument_number} is `{}`. Are you missing the terminator '{}' before this argument?", if first_is_positional {"-exec{}"} else {"-exec"}, if second_is_positional {"-exec{}"} else {"-exec"}, EXEC_MODE_STRING_TERMINATOR) } - } + } + + /// Issue a warning if the user apparently missed a command to `-exec{}`, and has typed `-exec{} {}`... + #[cold] + #[cfg_attr(feature="logging", inline(never))] + #[cfg_attr(not(feature="logging"), inline(always))] + //TODO: Do we make this a feature in the future? Being able to `fexecve()` the input received? + pub fn execp_command_not_substituted() + { + if_trace! { + warn!("-exec{{}} provided with a command as the positional replacement string `{}`. Commands are not substituted. Are you missing a command? (Note: Currently, `fexecve()`ing the input is not supported.)", POSITIONAL_ARG_STRING) + } + } + + /// Issue a warning if the user apparently attempted to terminate a `-exec/{}` argument, but instead made the command of the `-exec/{}` itself the terminator. + #[cold] + #[cfg_attr(feature="logging", inline(never))] + #[cfg_attr(not(feature="logging"), inline(always))] + pub fn exec_terminator_as_command(exec_arg_str: &str) + { + if_trace! { + warn!("{exec_arg_str} provided with a command that is the -exec/-exec{{}} terminator character `{}`. The sequence is not terminated, and instead the terminator character itself is taken as the command to execute. Did you miss a command before the terminator?", EXEC_MODE_STRING_TERMINATOR) + } + } } let command = rest.next().ok_or_else(|| ExecModeParseError(self))?; + if command == EXEC_MODE_STRING_TERMINATOR { + warnings::exec_terminator_as_command(self.command_string()); + } let test_warn_missing_term = |(idx , string) : (usize, OsString)| { if let Some(val) = Self::visit(&string) { - warnings::exec_apparent_missing_terminator(self.is_positional(), val.is_positional(), &command, idx); + warnings::exec_apparent_missing_terminator(self.is_positional(), val.is_positional(), &command, idx + 1); } string }; @@ -623,6 +716,9 @@ mod parsers { }, Self::Postional => { let mut repl_warn = true; + if command == POSITIONAL_ARG_STRING { + warnings::execp_command_not_substituted(); + } let res = super::ExecMode::Positional { args: rest .take_while(|argument| argument.as_bytes() != EXEC_MODE_STRING_TERMINATOR.as_bytes()) diff --git a/src/main.rs b/src/main.rs index 70c9cb6..02dc898 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,6 +22,21 @@ macro_rules! if_trace { } } }; + (true $yes:expr$(; $no:expr)?) => { + { + #[allow(unused_variables)] + { + let val = cfg!(feature="logging"); + #[cfg(feature="logging")] + let val = { $yes }; + $( + #[cfg(not(feature="logging"))] + let val = { $no }; + )? + val + } + } + }; } #[cfg(feature="jemalloc")] @@ -159,7 +174,7 @@ mod work { #[inline] pub(super) fn buffered() -> eyre::Result<()> { - if_trace!(trace!("strategy: allocated buffer")); + if_trace!(info!("strategy: allocated buffer")); let (bytes, read) = { let stdin = io::stdin(); @@ -210,7 +225,7 @@ mod work { } }; - if_trace!(trace!("strategy: mapped memory file")); + if_trace!(info!("strategy: mapped memory file")); use std::borrow::Borrow; @@ -504,10 +519,12 @@ fn parse_args() -> eyre::Result args::parse_args() .wrap_err("Parsing arguments failed") .with_section(|| std::env::args_os().skip(1) - .map(|x| std::borrow::Cow::Owned(String::from_utf8_lossy(&x.into_vec()).into_owned())) + .map(|x| std::borrow::Cow::Owned(format!("{x:?}"))) .join_by_clone(std::borrow::Cow::Borrowed(" ")) //XXX: this can be replaced by `flat_map() -> [x, " "]` really... Dunno which will be faster... .collect::() - .header("The program arguments were")) + .header("Program arguments (argv+1) were")) + .with_section(|| args::program_name().header("Program name (*argv) was")) + .with_section(|| std::env::args_os().len().header("Total numer of arguments, including program name (argc) was")) .with_suggestion(|| "Try passing `--help`") } @@ -518,7 +535,15 @@ fn main() -> eyre::Result<()> { if_trace!(debug!("initialised")); //TODO: How to cleanly feature-gate `args`? - let opt = parse_args()?; + let opt = { + #[cfg(feature="logging")] + let _span = debug_span!("args"); + #[cfg(feature="logging")] + let _in_span = _span.enter(); + let parsed = parse_args()?; + if_trace!(debug!("Parsed arguments: {parsed:?}")); + parsed + }; //TODO: maybe look into fd SEALing? Maybe we can prevent a consumer process from reading from stdout until we've finished the transfer. The name SEAL sounds like it might have something to do with that? cfg_if!{