From e6c07145751320f2e6f5a0410f91e95f66cc7cee Mon Sep 17 00:00:00 2001 From: Avril Date: Sat, 5 Apr 2025 21:15:42 +0100 Subject: [PATCH] Added multi-group printing (to old interface.) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added warnings printed for user-requested non-existent groups. Fortune for rematch's current commit: Small blessing − 小吉 --- Cargo.toml | 2 +- src/args.rs | 3 ++- src/main.rs | 77 ++++++++++++++++++++++++++++++++++++++++++----------- src/re.rs | 4 +-- 4 files changed, 67 insertions(+), 19 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8875b02..203615c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rematch" -version = "1.0.1" +version = "1.1.0" authors = ["Avril "] edition = "2024" diff --git a/src/args.rs b/src/args.rs index f86428c..385006d 100644 --- a/src/args.rs +++ b/src/args.rs @@ -201,7 +201,7 @@ pub struct Config //TODO: Should we have PCRE on by default or not...? I think we should maybe have it on by default if the feature is enabled... But that will mess with input parallelism... XXX: Perhaps we can auto-detect if to use PCRE or not (e.g. try compiling to regex first, then PCRE if that fails?) #[arg(short, long)] // XXX: Can we add a clap `value_parser!(FeatureOnBool<"perl">)` which fails to parse its `from_str()` impl if the feature is not enabled. Is this possible with what we currently have? We may be able to with macros, e.g expand a macro to `FeatureOnBool<"perl", const { cfg!(feature="perl") }>` or something similar? (NOTE: If `clap` has a better mechanism for this, use that instead of re-inventing it tho.) // #[cfg(feature="perl")] //XXX: Do we want this option to be feature-gated? Or should we fail with error `if (! cfg!(feature="perl")) && self.extended)`? I think the latter would make things more easily (since the Regex engine gates PCRE-compilation transparently to the API user [see `crate::re::Regex`], we don't need to gate it this way outside of `re`, if we remove this gate we can just use `cfg!()` everywhere here which makes things **MUCH** cleaner..) It also means the user of a non-PCRE build will at least know why their PCRE flag is failing and that it can be built with the "perl" feature, instead of it being *totally* invisible to the user if the feature is off. - extended: bool, + pub extended: bool, /// Delimit read input/output strings from/to `stdin`/`stdout` by NUL ('\0') characters instead of newlines. /// @@ -218,6 +218,7 @@ impl Config /// Note that if the "perl" feature is not enabled, this may still return `true`. /// If the user requests PCRE where it is not available, the caller should return an error/panic to the user telling her that. #[inline(always)] + #[deprecated(note = "Access field `extended` instead.")] //TODO: Make `extended` public and remove this accessor? pub fn use_pcre(&self) -> bool { diff --git a/src/main.rs b/src/main.rs index 625ce92..9b60b7f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,15 +19,43 @@ fn initialise() -> eyre::Result<()> Ok(()) } -fn print_group(to: &mut S, g: G, group: usize) -> std::io::Result<()> +#[inline] +fn print_groups<'a, S: ?Sized, G, T: 'a, I>(to: &mut S, g: G, groups: I) -> std::io::Result<()> where S: std::io::Write, - G: IntoIterator>, - T: std::borrow::Borrow + G: IntoIterator> + Clone + Copy, // NOTE: Copy bound to ensure we're not accidentally doing deep clones of `g`. +//G: std::ops::Index, G::Output: std::borrow::Borrow>, + T: std::borrow::Borrow, + I: IntoIterator/*, IntoIter: ExactSizeIterator*/>, { - match g.into_iter().nth(group) { - Some(None) => writeln!(to, ""), - Some(Some(g)) => writeln!(to, "{}", g.borrow()), - None => Ok(()), + use std::borrow::Borrow; + let mut first = true; + for group in groups.into_iter() { + let group = group.borrow(); + // // Moved to into match group (skipping invalid groups.) + // if !first { + // write!(to, "\t")?; + // } + let print_delim = || first.then_some("").unwrap_or("\t"); // If it's not the first iteration, print `\t`. + match g.into_iter().nth(*group) { + Some(None) => write!(to, "{}", print_delim()), + Some(Some(g)) => write!(to, "{}{}", print_delim(), g.borrow()), + + //TODO: What should be the behaviour of a non-existent group index here? (NOTE: This now corresponds to the previous `g.len() > group` check in caller.) // (NOTE: The original behaviour is to just ignore groups that are out of range entirely (i.e. no printing, no delimit char, no error,) maybe treat non-existent groups as non-matched groups and *just* print the delim char?) + // (NOTE: Moved out of branch, see above ^) // None if !first => write!(to, "\t"), + // XXX: Should this do what it does now...? Or should it `break` to prevent the checking for more groups...? Print a warning maybe...? + None => { + eprintln!("Warning: Invalid group index {}!", group); + continue; // Do not set `first = false` if it was an invalid index. + //Ok(()) + }, + }?; + first = false; + } + // If `first == true`, no groups were printed, so we do not print the new-line. + if !first { + to.write_all(b"\n") + } else { + Ok(()) } } @@ -38,27 +66,46 @@ fn main() -> eyre::Result<()> let cli = args::parse_cli();//.wrap_err("Error parsing command-line arguments")?; eprintln!("{:#?}", cli); - return Ok(()); + // return Ok(()); - let args: Vec = std::env::args().collect(); + let args: re::FrozenVec = std::env::args().map(String::into_boxed_str).collect(); if args.len() < 4 { - println!("Usage: {} ", args[0]); + println!("Usage: {} ...", args[0]); println!("Pass `-' as `' to read lines from stdin"); - std::process::exit(1); + std::process::exit(1) } else { let re = re::Regex::compile(&args[2])?; let text = &args[1]; - let group: usize = args[3].parse().expect("Invalid group number."); + + let groups = &args[3..]; + + if groups.len() < 1 { + eprintln!("Warning: No capture groups requested."); + // NOTE: Unexpected branch... + return Ok(()); + } + + let groups = groups.iter().enumerate() + .map(|(i, x)| x.parse() + .with_section(|| format!("{:?}", groups).header("Groups specified were")) + .with_section(|| x.clone().header("Specified capture group index was")) + .with_section(move || i.header("Argument index in provided groups"))) + .collect::, _>>() + .wrap_err("Invalid group index specified")?; + + //TODO: XXX: How to handle multiple groups in `stdin_lines()` case? + //let group = groups[0]; //args[3].parse().expect("Invalid group number."); use std::io::Write; let mut stdout = std::io::stdout(); - if text == "-" { + if &text[..] == "-" { text::stdin_lines(|text| -> eyre::Result { let mut stdout = stdout.lock(); match re.exec(&text)? { - Some(g) if g.len() > group => print_group(&mut stdout, g, group)?, //println!("{}", &g[group]), + Some(g) /*if g.len() > group*/ => // NOTE: This check branch has now been moved into `print_groups()` + print_groups(&mut stdout, &g, &groups)?, //println!("{}", &g[group]), _ => (), } Ok(true) @@ -66,7 +113,7 @@ fn main() -> eyre::Result<()> } else { match re.exec(&text)? { - Some(g) if g.len() > group => print_group(&mut stdout, g, group)?,//println!("{}", &g.nth(group).unwrap().map(|x| x.as_ref()).unwrap_or("")), + Some(g) /*if g.len() > group*/ => print_groups(&mut stdout, &g[..], &groups)?,//println!("{}", &g.nth(group).unwrap().map(|x| x.as_ref()).unwrap_or("")), _ => (), } } diff --git a/src/re.rs b/src/re.rs index 4e1972b..38547bb 100644 --- a/src/re.rs +++ b/src/re.rs @@ -11,7 +11,7 @@ pub type FrozenString = Box; // TODO: to return some kind of `Either<&'s str, impl bytes::Buf + 's>` type, which would use `str` on non-PCRE, but opaque `bytes::Buf` on PCRE?) pub type FrozenBytes = FrozenVec; -pub type Groups = FrozenVec>; +pub type Groups = FrozenVec>; // TODO: See `exec()` comment below about named groups and switching to `BTreeMap<{enum : CaptureGroupIdent::Index, Name}, Option>>` #[derive(Debug, Clone)] pub struct Regex @@ -55,7 +55,7 @@ impl Regex { return Ok(Self{internal: regex::Regex::new(string.as_ref())?}); } - pub fn exec<'s>(&self, string: &'s str) -> Result>>, Error> + pub fn exec<'s>(&self, string: &'s str) -> Result>>, Error> //TODO: Can we also add named groups with a `BTreeMap<{CG::Index(usize) | CG:Name(String)}, Option>>>` (XXX: And maybe also be able to simplefy `V` to just `Option<&'s str>` / `Option>`, since the group index is already encoded in `K` (group index / group name mapped to potential match of associated group).) { #[cfg(feature = "perl")] return {