Added multi-group printing (to old interface.)

Added warnings printed for user-requested non-existent groups.

Fortune for rematch's current commit: Small blessing − 小吉
old-interface-extra-help-info
Avril 2 weeks ago
parent bcdbec60ca
commit e6c0714575
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -1,6 +1,6 @@
[package] [package]
name = "rematch" name = "rematch"
version = "1.0.1" version = "1.1.0"
authors = ["Avril <flanchan@cumallover.me>"] authors = ["Avril <flanchan@cumallover.me>"]
edition = "2024" edition = "2024"

@ -201,7 +201,7 @@ pub struct Config
//TODO: Should we have PCRE on by default or not...? I think we should maybe have it on by default if the feature is enabled... But that will mess with input parallelism... XXX: Perhaps we can auto-detect if to use PCRE or not (e.g. try compiling to regex first, then PCRE if that fails?) //TODO: Should we have PCRE on by default or not...? I think we should maybe have it on by default if the feature is enabled... But that will mess with input parallelism... XXX: Perhaps we can auto-detect if to use PCRE or not (e.g. try compiling to regex first, then PCRE if that fails?)
#[arg(short, long)] // XXX: Can we add a clap `value_parser!(FeatureOnBool<"perl">)` which fails to parse its `from_str()` impl if the feature is not enabled. Is this possible with what we currently have? We may be able to with macros, e.g expand a macro to `FeatureOnBool<"perl", const { cfg!(feature="perl") }>` or something similar? (NOTE: If `clap` has a better mechanism for this, use that instead of re-inventing it tho.) #[arg(short, long)] // XXX: Can we add a clap `value_parser!(FeatureOnBool<"perl">)` which fails to parse its `from_str()` impl if the feature is not enabled. Is this possible with what we currently have? We may be able to with macros, e.g expand a macro to `FeatureOnBool<"perl", const { cfg!(feature="perl") }>` or something similar? (NOTE: If `clap` has a better mechanism for this, use that instead of re-inventing it tho.)
// #[cfg(feature="perl")] //XXX: Do we want this option to be feature-gated? Or should we fail with error `if (! cfg!(feature="perl")) && self.extended)`? I think the latter would make things more easily (since the Regex engine gates PCRE-compilation transparently to the API user [see `crate::re::Regex`], we don't need to gate it this way outside of `re`, if we remove this gate we can just use `cfg!()` everywhere here which makes things **MUCH** cleaner..) It also means the user of a non-PCRE build will at least know why their PCRE flag is failing and that it can be built with the "perl" feature, instead of it being *totally* invisible to the user if the feature is off. // #[cfg(feature="perl")] //XXX: Do we want this option to be feature-gated? Or should we fail with error `if (! cfg!(feature="perl")) && self.extended)`? I think the latter would make things more easily (since the Regex engine gates PCRE-compilation transparently to the API user [see `crate::re::Regex`], we don't need to gate it this way outside of `re`, if we remove this gate we can just use `cfg!()` everywhere here which makes things **MUCH** cleaner..) It also means the user of a non-PCRE build will at least know why their PCRE flag is failing and that it can be built with the "perl" feature, instead of it being *totally* invisible to the user if the feature is off.
extended: bool, pub extended: bool,
/// Delimit read input/output strings from/to `stdin`/`stdout` by NUL ('\0') characters instead of newlines. /// Delimit read input/output strings from/to `stdin`/`stdout` by NUL ('\0') characters instead of newlines.
/// ///
@ -218,6 +218,7 @@ impl Config
/// Note that if the "perl" feature is not enabled, this may still return `true`. /// Note that if the "perl" feature is not enabled, this may still return `true`.
/// If the user requests PCRE where it is not available, the caller should return an error/panic to the user telling her that. /// If the user requests PCRE where it is not available, the caller should return an error/panic to the user telling her that.
#[inline(always)] #[inline(always)]
#[deprecated(note = "Access field `extended` instead.")]
//TODO: Make `extended` public and remove this accessor? //TODO: Make `extended` public and remove this accessor?
pub fn use_pcre(&self) -> bool pub fn use_pcre(&self) -> bool
{ {

@ -19,15 +19,43 @@ fn initialise() -> eyre::Result<()>
Ok(()) Ok(())
} }
fn print_group<S: ?Sized, G, T>(to: &mut S, g: G, group: usize) -> std::io::Result<()> #[inline]
fn print_groups<'a, S: ?Sized, G, T: 'a, I>(to: &mut S, g: G, groups: I) -> std::io::Result<()>
where S: std::io::Write, where S: std::io::Write,
G: IntoIterator<Item = Option<T>>, G: IntoIterator<Item = &'a Option<T>> + Clone + Copy, // NOTE: Copy bound to ensure we're not accidentally doing deep clones of `g`.
T: std::borrow::Borrow<str> //G: std::ops::Index<usize>, G::Output: std::borrow::Borrow<Option<T>>,
T: std::borrow::Borrow<str>,
I: IntoIterator<Item: std::borrow::Borrow<usize>/*, IntoIter: ExactSizeIterator*/>,
{ {
match g.into_iter().nth(group) { use std::borrow::Borrow;
Some(None) => writeln!(to, ""), let mut first = true;
Some(Some(g)) => writeln!(to, "{}", g.borrow()), for group in groups.into_iter() {
None => Ok(()), let group = group.borrow();
// // Moved to into match group (skipping invalid groups.)
// if !first {
// write!(to, "\t")?;
// }
let print_delim = || first.then_some("").unwrap_or("\t"); // If it's not the first iteration, print `\t`.
match g.into_iter().nth(*group) {
Some(None) => write!(to, "{}", print_delim()),
Some(Some(g)) => write!(to, "{}{}", print_delim(), g.borrow()),
//TODO: What should be the behaviour of a non-existent group index here? (NOTE: This now corresponds to the previous `g.len() > group` check in caller.) // (NOTE: The original behaviour is to just ignore groups that are out of range entirely (i.e. no printing, no delimit char, no error,) maybe treat non-existent groups as non-matched groups and *just* print the delim char?)
// (NOTE: Moved out of branch, see above ^) // None if !first => write!(to, "\t"),
// XXX: Should this do what it does now...? Or should it `break` to prevent the checking for more groups...? Print a warning maybe...?
None => {
eprintln!("Warning: Invalid group index {}!", group);
continue; // Do not set `first = false` if it was an invalid index.
//Ok(())
},
}?;
first = false;
}
// If `first == true`, no groups were printed, so we do not print the new-line.
if !first {
to.write_all(b"\n")
} else {
Ok(())
} }
} }
@ -38,27 +66,46 @@ fn main() -> eyre::Result<()>
let cli = args::parse_cli();//.wrap_err("Error parsing command-line arguments")?; let cli = args::parse_cli();//.wrap_err("Error parsing command-line arguments")?;
eprintln!("{:#?}", cli); eprintln!("{:#?}", cli);
return Ok(()); // return Ok(());
let args: Vec<String> = std::env::args().collect(); let args: re::FrozenVec<re::FrozenString> = std::env::args().map(String::into_boxed_str).collect();
if args.len() < 4 { if args.len() < 4 {
println!("Usage: {} <str> <regex> <group>", args[0]); println!("Usage: {} <str> <regex> <group>...", args[0]);
println!("Pass `-' as `<str>' to read lines from stdin"); println!("Pass `-' as `<str>' to read lines from stdin");
std::process::exit(1); std::process::exit(1)
} else { } else {
let re = re::Regex::compile(&args[2])?; let re = re::Regex::compile(&args[2])?;
let text = &args[1]; let text = &args[1];
let group: usize = args[3].parse().expect("Invalid group number.");
let groups = &args[3..];
if groups.len() < 1 {
eprintln!("Warning: No capture groups requested.");
// NOTE: Unexpected branch...
return Ok(());
}
let groups = groups.iter().enumerate()
.map(|(i, x)| x.parse()
.with_section(|| format!("{:?}", groups).header("Groups specified were"))
.with_section(|| x.clone().header("Specified capture group index was"))
.with_section(move || i.header("Argument index in provided groups")))
.collect::<Result<Box<[usize]>, _>>()
.wrap_err("Invalid group index specified")?;
//TODO: XXX: How to handle multiple groups in `stdin_lines()` case?
//let group = groups[0]; //args[3].parse().expect("Invalid group number.");
use std::io::Write; use std::io::Write;
let mut stdout = std::io::stdout(); let mut stdout = std::io::stdout();
if text == "-" { if &text[..] == "-" {
text::stdin_lines(|text| -> eyre::Result<bool> { text::stdin_lines(|text| -> eyre::Result<bool> {
let mut stdout = stdout.lock(); let mut stdout = stdout.lock();
match re.exec(&text)? { match re.exec(&text)? {
Some(g) if g.len() > group => print_group(&mut stdout, g, group)?, //println!("{}", &g[group]), Some(g) /*if g.len() > group*/ => // NOTE: This check branch has now been moved into `print_groups()`
print_groups(&mut stdout, &g, &groups)?, //println!("{}", &g[group]),
_ => (), _ => (),
} }
Ok(true) Ok(true)
@ -66,7 +113,7 @@ fn main() -> eyre::Result<()>
} else { } else {
match re.exec(&text)? { match re.exec(&text)? {
Some(g) if g.len() > group => print_group(&mut stdout, g, group)?,//println!("{}", &g.nth(group).unwrap().map(|x| x.as_ref()).unwrap_or("")), Some(g) /*if g.len() > group*/ => print_groups(&mut stdout, &g[..], &groups)?,//println!("{}", &g.nth(group).unwrap().map(|x| x.as_ref()).unwrap_or("")),
_ => (), _ => (),
} }
} }

@ -11,7 +11,7 @@ pub type FrozenString = Box<str>;
// TODO: to return some kind of `Either<&'s str, impl bytes::Buf + 's>` type, which would use `str` on non-PCRE, but opaque `bytes::Buf` on PCRE?) // TODO: to return some kind of `Either<&'s str, impl bytes::Buf + 's>` type, which would use `str` on non-PCRE, but opaque `bytes::Buf` on PCRE?)
pub type FrozenBytes = FrozenVec<u8>; pub type FrozenBytes = FrozenVec<u8>;
pub type Groups<String = FrozenString> = FrozenVec<Option<String>>; pub type Groups<String = FrozenString> = FrozenVec<Option<String>>; // TODO: See `exec()` comment below about named groups and switching to `BTreeMap<{enum : CaptureGroupIdent::Index, Name}, Option<Cow<'s str>>>`
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Regex pub struct Regex
@ -55,7 +55,7 @@ impl Regex {
return Ok(Self{internal: regex::Regex::new(string.as_ref())?}); return Ok(Self{internal: regex::Regex::new(string.as_ref())?});
} }
pub fn exec<'s>(&self, string: &'s str) -> Result<Option<Groups<Cow<'s, str>>>, Error> pub fn exec<'s>(&self, string: &'s str) -> Result<Option<Groups<Cow<'s, str>>>, Error> //TODO: Can we also add named groups with a `BTreeMap<{CG::Index(usize) | CG:Name(String)}, Option<Groups<Cow<'s, str>>>>` (XXX: And maybe also be able to simplefy `V` to just `Option<&'s str>` / `Option<Cow<'s, str>>`, since the group index is already encoded in `K` (group index / group name mapped to potential match of associated group).)
{ {
#[cfg(feature = "perl")] #[cfg(feature = "perl")]
return { return {

Loading…
Cancel
Save