diff --git a/src/args.rs b/src/args.rs new file mode 100644 index 0000000..3199edf --- /dev/null +++ b/src/args.rs @@ -0,0 +1,290 @@ +//! Arguments and Cli-parsing +use super::*; +use std::{ + str, + error, fmt, + borrow::{ + Borrow, Cow, ToOwned, + }, + path::{ + Path, PathBuf, + }, + //collections::BTreeSet as Set, +}; +use clap::{ + Parser, + Args, + Subcommand, + ValueEnum, +}; + +/// A value that may be provided, or may be deferred to be provided by `stdin` (/ written to `stdout`.) +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] +pub enum MaybeValue +{ + Stdio, + Value(T), +} + +impl MaybeValue +{ + pub const STDIO_SYMBOL: &'static str = "-"; + + #[inline] + pub const fn is_stdio(&self) -> bool + { + match self { + Self::Stdio => true, + _ => false, + } + } + #[inline] + pub const fn value(&self) -> Option<&T> + { + match self { + Self::Value(v) => Some(&v), + _ => None + } + } + #[inline(always)] + pub const fn has_value(&self) -> bool + { + self.value().is_some() + } + + /// Convert the value type to `U` (if there is one.) + /// + /// e.g. to convert `let _: MaybeValue = MaybeString::map_into();` + #[inline] + pub fn map_into>(self) -> MaybeValue + { + match self { + Self::Value(v) => MaybeValue::Value(v.into()), + Self::Stdio => MaybeValue::Stdio, + } + } + + /// Consume into the `Value(T)` if possible, if not, return `Err(Self)`. + #[inline] + #[must_use] + pub fn try_into_value(self) -> Result + { + match self { + x @ Self::Stdio => Err(x), + Self::Value(v) => Ok(v), + } + } +} + +impl> AsRef for MaybeValue +{ + #[inline] + fn as_ref(&self) -> &str + { + match self { + Self::Stdio => Self::STDIO_SYMBOL, + Self::Value(v) => v.as_ref(), + } + } +} + +impl> Borrow for MaybeValue +{ + #[inline] + fn borrow(&self) -> &str + { + match self { + Self::Stdio => Self::STDIO_SYMBOL, + Self::Value(v) => v.borrow(), + } + } +} + + +impl Default for MaybeValue +{ + #[inline] + fn default() -> Self + { + Self::Stdio + } +} + +impl> MaybeValue +{ + #[inline] + pub fn into_string(self) -> Cow<'static, str> + { + match self { + Self::Value(v) => Cow::Owned(v.into()), + Self::Stdio => Cow::Borrowed(Self::STDIO_SYMBOL), + } + } +} + +impl> MaybeValue +{ + #[inline] + pub fn into_path(self) -> Cow<'static, Path> + { + match self { + Self::Value(v) => Cow::Owned(v.into()), + Self::Stdio => Cow::Borrowed(Path::new(Self::STDIO_SYMBOL)), + } + } +} + +impl> From> for Box +{ + #[inline] + fn from(value: MaybeValue) -> Self { + value.into_path().into_owned().into_boxed_path() + } +} +impl> From> for Box +{ + #[inline] + fn from(value: MaybeValue) -> Self { + value.into_string().into_owned().into_boxed_str() + } +} + +impl> From> for Cow<'static, str> +{ + fn from(from: MaybeValue) -> Self + { + from.into_string() + } +} + + +impl> From for MaybeValue +{ + #[inline] + fn from(from: String) -> Self + { + match &from[..] { + Self::STDIO_SYMBOL => Self::Stdio, + _ => Self::Value(from.into()), + } + } +} + +impl str::FromStr for MaybeValue +where T: str::FromStr { + type Err = T::Err; + + #[inline] + fn from_str(s: &str) -> Result { + match s { + Self::STDIO_SYMBOL => Ok(Self::Stdio), + s => T::from_str(s).map(Self::Value) + } + } +} + +/// User-provied configuration of how the program should behave here +#[derive(Debug, Args)] +pub struct Config +{ + /// Use the PCRE (JS-like) extended regular expression compiler + /// + /// # Feature difference + /// By default, the expression syntax does not support things like negative lookahead and other backtrack-requiring regex features. + /// + /// ## Efficiency + /// Note that non-PCRE expressions are more efficient in general, and can also enable parallel processing of strings where there are many (e.g. a long list of lines from `stdin` can be matched against in parallel.) + /// + /// It is ill-advised to enable PCRE on large inputs unless those features are required. + //TODO: Should we have PCRE on by default or not...? I think we should maybe have it on by default if the feature is enabled... But that will mess with input parallelism... XXX: Perhaps we can auto-detect if to use PCRE or not (e.g. try compiling to regex first, then PCRE if that fails?) + #[arg(short, long)] + #[cfg(feature="perl")] //XXX: Do we want this option to be feature-gated? Or should we fail with error `if (! cfg!(feature="perl")) && self.extended)`? I think the latter would make things more easily (since the Regex engine gates PCRE-compilation transparently to the API user [see `crate::re::Regex`], we don't need to gate it this way outside of `re`, if we remove this gate we can just use `cfg!()` everywhere here which makes things **MUCH** cleaner..) It also means the user of a non-PCRE build will at least know why their PCRE flag is failing and that it can be built with the "perl" feature, instead of it being *totally* invisible to the user if the feature is off. + extended: bool, + + /// Delimit read input/output strings from/to `stdin`/`stdout` by NUL ('\0') characters instead of newlines. + /// + /// This only affects the output of each string's match groups, not the groups themselves, those will still be delimited by TAB literals in the output. + #[arg(short='0', long)] + pub zero: bool, //XXX: Add `--field=`/`--ifs` option, put these in same group. Maybe add `--delimit-groups=` to change the group delimiter from `\t` to user-specified value. +} + +impl Config +{ + /// Whether it is requested to use PCRE regex instead of regular regex. + /// + /// # Interaction with feature gating of ~actual~ PCRE support via `feature="perl"` + /// Note that if the "perl" feature is not enabled, this may still return `true`. + /// If the user requests PCRE where it is not available, the caller should return an error/panic to the user telling her that. + #[inline] + pub fn use_pcre(&self) -> bool + { + #![allow(unreachable_code)] + #[cfg(feature="perl")] return self.extended; //TODO: See above comment on un-gating `self.extended` + false + } +} + +/// A string value that may be provided to the CLI, or delegated to `stdio`. +pub type MaybeString = MaybeValue>; +/// A path that may represent an `stdio` file-descriptor instead of a named file. +pub type MaybePath = MaybeValue>; + +/// `rematch` is a simple command-line tool for matching & printing capture groups of an input string(s) against a regular expression. +/// +/// The input string(s) can be provided in the command-line, or they can be provided as line delimited (by default) stream from `stdin`. +#[derive(Debug, Parser)] +#[command(name = env!("CARGO_PKG_NAME"), version, about, long_about)] +pub struct Cli +{ + /// Configuration of the execution + #[command(flatten)] + pub config: Config, + + //XXX: Should we make these fields public? + /// The input string to use, or `-` to read from stdin. + //TODO: Support multiple input strings in non-`stdin` case too. (XXX: How should this be handled...?) + string: MaybeString, + /// The regular expression to match `string` on. + regex: String, + /// The regex capture group indecies to print when matches on `string`. + #[arg(required= true, trailing_var_arg = true, allow_hyphen_values = false, num_args=1..)] + //TODO: Allow ranges & fallible captures, so lines that match group 1 but not 2 will not cause output failure if given `1 2?` but will if given `1 2` (XXX: Is this actually meaningful/possible? Can we do this at all? I'm pretty sure `/(?:(.))?/` still creates an (empty) group? So perhaps, syntax for failing on *empty* group matches...? like, `1! 2` for "group #1 *required*, group #2 is not requested?") + groups: Vec, // TODO: How to dedup (XXX: Do we want to de-dup? Maybe the user wants group `1` twice? I think it's fine (also we need to preserve user ordering of group indecied)) +} + +impl Cli { + /// Get the input string to match on + /// + /// If the requested input is `stdin`, `None` is returned. + #[inline] + pub fn input_string(&self) -> Option<&str> + { + self.string.value().map(AsRef::as_ref) + } + + /// Get the string to build the regular expression from + pub fn regex_string(&self) -> &str + { + &self.regex[..] + } + + /// Get the match group(s) to print in the output + #[inline] + pub fn groups(&self) -> &[usize] + { + &self.groups[..] + } + + /// Get the number of match groups requested. + #[inline] + pub fn num_groups(&self) -> usize + { + self.groups.len() + } +} + +/// Parse the command-line arguments passed to the program +pub fn parse_cli() -> Cli +{ + clap::Parser::parse() +} diff --git a/src/main.rs b/src/main.rs index 9c2ace6..4fad2dc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ mod re; mod text; +mod args; use color_eyre::{ eyre::{ @@ -20,6 +21,10 @@ fn initialise() -> eyre::Result<()> fn main() -> eyre::Result<()> { initialise().wrap_err("Fatal: Failed to install panic handle")?; + let cli = args::parse_cli();//.wrap_err("Error parsing command-line arguments")?; + + eprintln!("{:#?}", cli); + return Ok(()); let args: Vec = std::env::args().collect();