//! Arguments and Cli-parsing use super::*; use std::{ str, error, fmt, borrow::{ Borrow, Cow, ToOwned, }, path::{ Path, PathBuf, }, //collections::BTreeSet as Set, }; use clap::{ Parser, Args, Subcommand, ValueEnum, }; /// A value that may be provided, or may be deferred to be provided by `stdin` (/ written to `stdout`.) #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] pub enum MaybeValue<T = String> { Stdio, Value(T), } impl<T> MaybeValue<T> { pub const STDIO_SYMBOL: &'static str = "-"; #[inline] pub const fn is_stdio(&self) -> bool { match self { Self::Stdio => true, _ => false, } } #[inline] pub const fn value(&self) -> Option<&T> { match self { Self::Value(v) => Some(&v), _ => None } } #[inline(always)] pub const fn has_value(&self) -> bool { self.value().is_some() } /// Convert the value type to `U` (if there is one.) /// /// e.g. to convert `let _: MaybeValue<PathBuf> = MaybeString::map_into();` #[inline] pub fn map_into<U: From<T>>(self) -> MaybeValue<U> { match self { Self::Value(v) => MaybeValue::Value(v.into()), Self::Stdio => MaybeValue::Stdio, } } /// Consume into the `Value(T)` if possible, if not, return `Err(Self)`. #[inline] #[must_use] pub fn try_into_value(self) -> Result<T, Self> { match self { x @ Self::Stdio => Err(x), Self::Value(v) => Ok(v), } } } impl<T: AsRef<str>> AsRef<str> for MaybeValue<T> { #[inline] fn as_ref(&self) -> &str { match self { Self::Stdio => Self::STDIO_SYMBOL, Self::Value(v) => v.as_ref(), } } } impl<T: Borrow<str>> Borrow<str> for MaybeValue<T> { #[inline] fn borrow(&self) -> &str { match self { Self::Stdio => Self::STDIO_SYMBOL, Self::Value(v) => v.borrow(), } } } impl<T> Default for MaybeValue<T> { #[inline] fn default() -> Self { Self::Stdio } } impl<T: Into<String>> MaybeValue<T> { #[inline] pub fn into_string(self) -> Cow<'static, str> { match self { Self::Value(v) => Cow::Owned(v.into()), Self::Stdio => Cow::Borrowed(Self::STDIO_SYMBOL), } } } impl<T: Into<PathBuf>> MaybeValue<T> { #[inline] pub fn into_path(self) -> Cow<'static, Path> { match self { Self::Value(v) => Cow::Owned(v.into()), Self::Stdio => Cow::Borrowed(Path::new(Self::STDIO_SYMBOL)), } } } impl<T: Into<PathBuf>> From<MaybeValue<T>> for Box<Path> { #[inline] fn from(value: MaybeValue<T>) -> Self { value.into_path().into_owned().into_boxed_path() } } impl<T: Into<String>> From<MaybeValue<T>> for Box<str> { #[inline] fn from(value: MaybeValue<T>) -> Self { value.into_string().into_owned().into_boxed_str() } } impl<T: Into<String>> From<MaybeValue<T>> for Cow<'static, str> { fn from(from: MaybeValue<T>) -> Self { from.into_string() } } impl<T: From<String>> From<String> for MaybeValue<T> { #[inline] fn from(from: String) -> Self { match &from[..] { Self::STDIO_SYMBOL => Self::Stdio, _ => Self::Value(from.into()), } } } impl<T> str::FromStr for MaybeValue<T> where T: str::FromStr { type Err = T::Err; #[inline] fn from_str(s: &str) -> Result<Self, Self::Err> { match s { Self::STDIO_SYMBOL => Ok(Self::Stdio), s => T::from_str(s).map(Self::Value) } } } /// User-provied configuration of how the program should behave here #[derive(Debug, Args)] pub struct Config { /// Use the PCRE (JS-like) extended regular expression compiler. /// /// __NOTE__: The binary must have been compiled with build feature `perl` to use this option. /// /// # Feature difference /// By default, the expression syntax does not support things like negative lookahead and other backtrack-requiring regex features. /// /// ## Efficiency /// Note that non-PCRE expressions are more efficient in general, and can also enable parallel processing of strings where there are many (e.g. a long list of lines from `stdin` can be matched against in parallel.) /// /// It is ill-advised to enable PCRE on large inputs unless those features are required. //TODO: Should we have PCRE on by default or not...? I think we should maybe have it on by default if the feature is enabled... But that will mess with input parallelism... XXX: Perhaps we can auto-detect if to use PCRE or not (e.g. try compiling to regex first, then PCRE if that fails?) #[arg(short, long)] // XXX: Can we add a clap `value_parser!(FeatureOnBool<"perl">)` which fails to parse its `from_str()` impl if the feature is not enabled. Is this possible with what we currently have? We may be able to with macros, e.g expand a macro to `FeatureOnBool<"perl", const { cfg!(feature="perl") }>` or something similar? (NOTE: If `clap` has a better mechanism for this, use that instead of re-inventing it tho.) // #[cfg(feature="perl")] //XXX: Do we want this option to be feature-gated? Or should we fail with error `if (! cfg!(feature="perl")) && self.extended)`? I think the latter would make things more easily (since the Regex engine gates PCRE-compilation transparently to the API user [see `crate::re::Regex`], we don't need to gate it this way outside of `re`, if we remove this gate we can just use `cfg!()` everywhere here which makes things **MUCH** cleaner..) It also means the user of a non-PCRE build will at least know why their PCRE flag is failing and that it can be built with the "perl" feature, instead of it being *totally* invisible to the user if the feature is off. pub extended: bool, /// Delimit read input/output strings from/to `stdin`/`stdout` by NUL ('\0') characters instead of newlines. /// /// This only affects the output of each string's match groups, not the groups themselves, those will still be delimited by TAB literals in the output. #[arg(short='0', long)] pub zero: bool, //XXX: Add `--field=`/`--ifs` option, put these in same group. Maybe add `--delimit-groups=` to change the group delimiter from `\t` to user-specified value. } impl Config { /// Whether it is requested to use PCRE regex instead of regular regex. /// /// # Interaction with feature gating of ~actual~ PCRE support via `feature="perl"` /// Note that if the "perl" feature is not enabled, this may still return `true`. /// If the user requests PCRE where it is not available, the caller should return an error/panic to the user telling her that. #[inline(always)] #[deprecated(note = "Access field `extended` instead.")] //TODO: Make `extended` public and remove this accessor? pub fn use_pcre(&self) -> bool { //#![allow(unreachable_code)] //#[cfg(feature="perl")] return self.extended; //TODO: See above comment on un-gating `self.extended` //false self.extended } } /// A string value that may be provided to the CLI, or delegated to `stdio`. pub type MaybeString = MaybeValue<Box<str>>; /// A path that may represent an `stdio` file-descriptor instead of a named file. pub type MaybePath = MaybeValue<Box<Path>>; /// `rematch` is a simple command-line tool for matching & printing capture groups of an input string(s) against a regular expression. /// /// The input string(s) can be provided in the command-line, or they can be provided as line delimited (by default) stream from `stdin`. #[derive(Debug, Parser)] #[command(name = env!("CARGO_PKG_NAME"), version, about, long_about)] pub struct Cli { /// Configuration of the execution #[command(flatten)] pub config: Config, //XXX: Should we make these fields public? /// The input string to use, or `-` to read from stdin. //TODO: Support multiple input strings in non-`stdin` case too. (XXX: How should this be handled...?) string: MaybeString, /// The regular expression to match `string` on. regex: String, /// The regex capture group indecies to print when matches on `string`. #[arg(required= true, trailing_var_arg = true, allow_hyphen_values = false, num_args=1..)] //TODO: Allow ranges & fallible captures, so lines that match group 1 but not 2 will not cause output failure if given `1 2?` but will if given `1 2` (XXX: Is this actually meaningful/possible? Can we do this at all? I'm pretty sure `/(?:(.))?/` still creates an (empty) group? So perhaps, syntax for failing on *empty* group matches...? like, `1! 2` for "group #1 *required*, group #2 is not requested?") groups: Vec<usize>, // TODO: How to dedup (XXX: Do we want to de-dup? Maybe the user wants group `1` twice? I think it's fine (also we need to preserve user ordering of group indecied)) } impl Cli { /// Get the input string to match on /// /// If the requested input is `stdin`, `None` is returned. #[inline] pub fn input_string(&self) -> Option<&str> { self.string.value().map(AsRef::as_ref) } /// Get the string to build the regular expression from pub fn regex_string(&self) -> &str { &self.regex[..] } /// Get the match group(s) to print in the output #[inline] pub fn groups(&self) -> &[usize] { &self.groups[..] } /// Get the number of match groups requested. #[inline] pub fn num_groups(&self) -> usize { self.groups.len() } } /// Parse the command-line arguments passed to the program pub fn parse_cli() -> Cli { clap::Parser::parse() }