From 007ba8781cd4448f0c7a5b9a696d7367cf882dba Mon Sep 17 00:00:00 2001 From: Avril Date: Tue, 1 Apr 2025 20:29:54 +0100 Subject: [PATCH] Version bump 0.2.0: Refactored use to PCRE2, added `unstable` flag, update deps & edition to 2024. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for rematch's current commit: Curse − 凶 --- Cargo.toml | 2 - src/args.rs | 294 ---------------------------------------------------- src/main.rs | 1 - 3 files changed, 297 deletions(-) delete mode 100644 src/args.rs diff --git a/Cargo.toml b/Cargo.toml index 5729d56..3d93f1d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,5 @@ unstable = ["regex/unstable"] [dependencies] pcre2 = { version = "0.2.9", optional = true } -clap = { version = "4.5.35", features = ["derive", "env", "string"] } regex = { version = "1.11.1", features = ["use_std"] } color-eyre = { version = "0.6.3", default-features = false, features = ["track-caller"] } -rayon = "1.10.0" diff --git a/src/args.rs b/src/args.rs deleted file mode 100644 index f86428c..0000000 --- a/src/args.rs +++ /dev/null @@ -1,294 +0,0 @@ -//! Arguments and Cli-parsing -use super::*; -use std::{ - str, - error, fmt, - borrow::{ - Borrow, Cow, ToOwned, - }, - path::{ - Path, PathBuf, - }, - //collections::BTreeSet as Set, -}; -use clap::{ - Parser, - Args, - Subcommand, - ValueEnum, -}; - -/// A value that may be provided, or may be deferred to be provided by `stdin` (/ written to `stdout`.) -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] -pub enum MaybeValue -{ - Stdio, - Value(T), -} - -impl MaybeValue -{ - pub const STDIO_SYMBOL: &'static str = "-"; - - #[inline] - pub const fn is_stdio(&self) -> bool - { - match self { - Self::Stdio => true, - _ => false, - } - } - #[inline] - pub const fn value(&self) -> Option<&T> - { - match self { - Self::Value(v) => Some(&v), - _ => None - } - } - #[inline(always)] - pub const fn has_value(&self) -> bool - { - self.value().is_some() - } - - /// Convert the value type to `U` (if there is one.) - /// - /// e.g. to convert `let _: MaybeValue = MaybeString::map_into();` - #[inline] - pub fn map_into>(self) -> MaybeValue - { - match self { - Self::Value(v) => MaybeValue::Value(v.into()), - Self::Stdio => MaybeValue::Stdio, - } - } - - /// Consume into the `Value(T)` if possible, if not, return `Err(Self)`. - #[inline] - #[must_use] - pub fn try_into_value(self) -> Result - { - match self { - x @ Self::Stdio => Err(x), - Self::Value(v) => Ok(v), - } - } -} - -impl> AsRef for MaybeValue -{ - #[inline] - fn as_ref(&self) -> &str - { - match self { - Self::Stdio => Self::STDIO_SYMBOL, - Self::Value(v) => v.as_ref(), - } - } -} - -impl> Borrow for MaybeValue -{ - #[inline] - fn borrow(&self) -> &str - { - match self { - Self::Stdio => Self::STDIO_SYMBOL, - Self::Value(v) => v.borrow(), - } - } -} - - -impl Default for MaybeValue -{ - #[inline] - fn default() -> Self - { - Self::Stdio - } -} - -impl> MaybeValue -{ - #[inline] - pub fn into_string(self) -> Cow<'static, str> - { - match self { - Self::Value(v) => Cow::Owned(v.into()), - Self::Stdio => Cow::Borrowed(Self::STDIO_SYMBOL), - } - } -} - -impl> MaybeValue -{ - #[inline] - pub fn into_path(self) -> Cow<'static, Path> - { - match self { - Self::Value(v) => Cow::Owned(v.into()), - Self::Stdio => Cow::Borrowed(Path::new(Self::STDIO_SYMBOL)), - } - } -} - -impl> From> for Box -{ - #[inline] - fn from(value: MaybeValue) -> Self { - value.into_path().into_owned().into_boxed_path() - } -} -impl> From> for Box -{ - #[inline] - fn from(value: MaybeValue) -> Self { - value.into_string().into_owned().into_boxed_str() - } -} - -impl> From> for Cow<'static, str> -{ - fn from(from: MaybeValue) -> Self - { - from.into_string() - } -} - - -impl> From for MaybeValue -{ - #[inline] - fn from(from: String) -> Self - { - match &from[..] { - Self::STDIO_SYMBOL => Self::Stdio, - _ => Self::Value(from.into()), - } - } -} - -impl str::FromStr for MaybeValue -where T: str::FromStr { - type Err = T::Err; - - #[inline] - fn from_str(s: &str) -> Result { - match s { - Self::STDIO_SYMBOL => Ok(Self::Stdio), - s => T::from_str(s).map(Self::Value) - } - } -} - -/// User-provied configuration of how the program should behave here -#[derive(Debug, Args)] -pub struct Config -{ - /// Use the PCRE (JS-like) extended regular expression compiler. - /// - /// __NOTE__: The binary must have been compiled with build feature `perl` to use this option. - /// - /// # Feature difference - /// By default, the expression syntax does not support things like negative lookahead and other backtrack-requiring regex features. - /// - /// ## Efficiency - /// Note that non-PCRE expressions are more efficient in general, and can also enable parallel processing of strings where there are many (e.g. a long list of lines from `stdin` can be matched against in parallel.) - /// - /// It is ill-advised to enable PCRE on large inputs unless those features are required. - //TODO: Should we have PCRE on by default or not...? I think we should maybe have it on by default if the feature is enabled... But that will mess with input parallelism... XXX: Perhaps we can auto-detect if to use PCRE or not (e.g. try compiling to regex first, then PCRE if that fails?) - #[arg(short, long)] // XXX: Can we add a clap `value_parser!(FeatureOnBool<"perl">)` which fails to parse its `from_str()` impl if the feature is not enabled. Is this possible with what we currently have? We may be able to with macros, e.g expand a macro to `FeatureOnBool<"perl", const { cfg!(feature="perl") }>` or something similar? (NOTE: If `clap` has a better mechanism for this, use that instead of re-inventing it tho.) -// #[cfg(feature="perl")] //XXX: Do we want this option to be feature-gated? Or should we fail with error `if (! cfg!(feature="perl")) && self.extended)`? I think the latter would make things more easily (since the Regex engine gates PCRE-compilation transparently to the API user [see `crate::re::Regex`], we don't need to gate it this way outside of `re`, if we remove this gate we can just use `cfg!()` everywhere here which makes things **MUCH** cleaner..) It also means the user of a non-PCRE build will at least know why their PCRE flag is failing and that it can be built with the "perl" feature, instead of it being *totally* invisible to the user if the feature is off. - extended: bool, - - /// Delimit read input/output strings from/to `stdin`/`stdout` by NUL ('\0') characters instead of newlines. - /// - /// This only affects the output of each string's match groups, not the groups themselves, those will still be delimited by TAB literals in the output. - #[arg(short='0', long)] - pub zero: bool, //XXX: Add `--field=`/`--ifs` option, put these in same group. Maybe add `--delimit-groups=` to change the group delimiter from `\t` to user-specified value. -} - -impl Config -{ - /// Whether it is requested to use PCRE regex instead of regular regex. - /// - /// # Interaction with feature gating of ~actual~ PCRE support via `feature="perl"` - /// Note that if the "perl" feature is not enabled, this may still return `true`. - /// If the user requests PCRE where it is not available, the caller should return an error/panic to the user telling her that. - #[inline(always)] - //TODO: Make `extended` public and remove this accessor? - pub fn use_pcre(&self) -> bool - { - //#![allow(unreachable_code)] - //#[cfg(feature="perl")] return self.extended; //TODO: See above comment on un-gating `self.extended` - //false - self.extended - } -} - -/// A string value that may be provided to the CLI, or delegated to `stdio`. -pub type MaybeString = MaybeValue>; -/// A path that may represent an `stdio` file-descriptor instead of a named file. -pub type MaybePath = MaybeValue>; - -/// `rematch` is a simple command-line tool for matching & printing capture groups of an input string(s) against a regular expression. -/// -/// The input string(s) can be provided in the command-line, or they can be provided as line delimited (by default) stream from `stdin`. -#[derive(Debug, Parser)] -#[command(name = env!("CARGO_PKG_NAME"), version, about, long_about)] -pub struct Cli -{ - /// Configuration of the execution - #[command(flatten)] - pub config: Config, - - //XXX: Should we make these fields public? - /// The input string to use, or `-` to read from stdin. - //TODO: Support multiple input strings in non-`stdin` case too. (XXX: How should this be handled...?) - string: MaybeString, - /// The regular expression to match `string` on. - regex: String, - /// The regex capture group indecies to print when matches on `string`. - #[arg(required= true, trailing_var_arg = true, allow_hyphen_values = false, num_args=1..)] - //TODO: Allow ranges & fallible captures, so lines that match group 1 but not 2 will not cause output failure if given `1 2?` but will if given `1 2` (XXX: Is this actually meaningful/possible? Can we do this at all? I'm pretty sure `/(?:(.))?/` still creates an (empty) group? So perhaps, syntax for failing on *empty* group matches...? like, `1! 2` for "group #1 *required*, group #2 is not requested?") - groups: Vec, // TODO: How to dedup (XXX: Do we want to de-dup? Maybe the user wants group `1` twice? I think it's fine (also we need to preserve user ordering of group indecied)) -} - -impl Cli { - /// Get the input string to match on - /// - /// If the requested input is `stdin`, `None` is returned. - #[inline] - pub fn input_string(&self) -> Option<&str> - { - self.string.value().map(AsRef::as_ref) - } - - /// Get the string to build the regular expression from - pub fn regex_string(&self) -> &str - { - &self.regex[..] - } - - /// Get the match group(s) to print in the output - #[inline] - pub fn groups(&self) -> &[usize] - { - &self.groups[..] - } - - /// Get the number of match groups requested. - #[inline] - pub fn num_groups(&self) -> usize - { - self.groups.len() - } -} - -/// Parse the command-line arguments passed to the program -pub fn parse_cli() -> Cli -{ - clap::Parser::parse() -} diff --git a/src/main.rs b/src/main.rs index e54d328..4cbb19f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,6 @@ mod re; mod text; -mod args; use color_eyre::{ eyre::{