Compare commits

...

3 Commits

Author SHA1 Message Date
Avril 5def7d668c
re: Started generic (not dispatched) interface `RegexEngine` for `NonPCRERegex` & `Regex`.
2 days ago
Avril 9fdf0817ae
Added some basic command-line parsing.
3 days ago
Avril 0b04b94d0c
Started more robust, efficient, and extensive re-write in 2024 Rust with CLI flags.
3 days ago

@ -1,20 +1,33 @@
[package]
name = "rematch"
version = "0.1.0"
version = "1.0.0"
authors = ["Avril <flanchan@cumallover.me>"]
edition = "2018"
edition = "2024"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[profile.release]
opt-level = 3
lto = "fat"
lto = true
codegen-units = 1
panic = "unwind"
strip = true
[profile.symbols]
inherits = "release"
strip = false
[features]
perl = ["pcre"]
default = ["perl"]
# Enable the use of PCRE (extended matching) instead of Rust regexes.
perl = ["dep:pcre"]
unstable = ["regex/unstable"]
[dependencies]
regex = "1"
clap = { version = "4.5.34", features = ["derive", "env", "string"] }
color-eyre = { version = "0.6.3", default-features = false, features = ["track-caller"] }
pcre = { version = "0.2.3", optional = true }
rayon = "1.10.0"
regex = { version = "1.11.1", features = ["use_std"] }

@ -0,0 +1,294 @@
//! Arguments and Cli-parsing
use super::*;
use std::{
str,
error, fmt,
borrow::{
Borrow, Cow, ToOwned,
},
path::{
Path, PathBuf,
},
//collections::BTreeSet as Set,
};
use clap::{
Parser,
Args,
Subcommand,
ValueEnum,
};
/// A value that may be provided, or may be deferred to be provided by `stdin` (/ written to `stdout`.)
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
pub enum MaybeValue<T = String>
{
Stdio,
Value(T),
}
impl<T> MaybeValue<T>
{
pub const STDIO_SYMBOL: &'static str = "-";
#[inline]
pub const fn is_stdio(&self) -> bool
{
match self {
Self::Stdio => true,
_ => false,
}
}
#[inline]
pub const fn value(&self) -> Option<&T>
{
match self {
Self::Value(v) => Some(&v),
_ => None
}
}
#[inline(always)]
pub const fn has_value(&self) -> bool
{
self.value().is_some()
}
/// Convert the value type to `U` (if there is one.)
///
/// e.g. to convert `let _: MaybeValue<PathBuf> = MaybeString::map_into();`
#[inline]
pub fn map_into<U: From<T>>(self) -> MaybeValue<U>
{
match self {
Self::Value(v) => MaybeValue::Value(v.into()),
Self::Stdio => MaybeValue::Stdio,
}
}
/// Consume into the `Value(T)` if possible, if not, return `Err(Self)`.
#[inline]
#[must_use]
pub fn try_into_value(self) -> Result<T, Self>
{
match self {
x @ Self::Stdio => Err(x),
Self::Value(v) => Ok(v),
}
}
}
impl<T: AsRef<str>> AsRef<str> for MaybeValue<T>
{
#[inline]
fn as_ref(&self) -> &str
{
match self {
Self::Stdio => Self::STDIO_SYMBOL,
Self::Value(v) => v.as_ref(),
}
}
}
impl<T: Borrow<str>> Borrow<str> for MaybeValue<T>
{
#[inline]
fn borrow(&self) -> &str
{
match self {
Self::Stdio => Self::STDIO_SYMBOL,
Self::Value(v) => v.borrow(),
}
}
}
impl<T> Default for MaybeValue<T>
{
#[inline]
fn default() -> Self
{
Self::Stdio
}
}
impl<T: Into<String>> MaybeValue<T>
{
#[inline]
pub fn into_string(self) -> Cow<'static, str>
{
match self {
Self::Value(v) => Cow::Owned(v.into()),
Self::Stdio => Cow::Borrowed(Self::STDIO_SYMBOL),
}
}
}
impl<T: Into<PathBuf>> MaybeValue<T>
{
#[inline]
pub fn into_path(self) -> Cow<'static, Path>
{
match self {
Self::Value(v) => Cow::Owned(v.into()),
Self::Stdio => Cow::Borrowed(Path::new(Self::STDIO_SYMBOL)),
}
}
}
impl<T: Into<PathBuf>> From<MaybeValue<T>> for Box<Path>
{
#[inline]
fn from(value: MaybeValue<T>) -> Self {
value.into_path().into_owned().into_boxed_path()
}
}
impl<T: Into<String>> From<MaybeValue<T>> for Box<str>
{
#[inline]
fn from(value: MaybeValue<T>) -> Self {
value.into_string().into_owned().into_boxed_str()
}
}
impl<T: Into<String>> From<MaybeValue<T>> for Cow<'static, str>
{
fn from(from: MaybeValue<T>) -> Self
{
from.into_string()
}
}
impl<T: From<String>> From<String> for MaybeValue<T>
{
#[inline]
fn from(from: String) -> Self
{
match &from[..] {
Self::STDIO_SYMBOL => Self::Stdio,
_ => Self::Value(from.into()),
}
}
}
impl<T> str::FromStr for MaybeValue<T>
where T: str::FromStr {
type Err = T::Err;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
Self::STDIO_SYMBOL => Ok(Self::Stdio),
s => T::from_str(s).map(Self::Value)
}
}
}
/// User-provied configuration of how the program should behave here
#[derive(Debug, Args)]
pub struct Config
{
/// Use the PCRE (JS-like) extended regular expression compiler.
///
/// __NOTE__: The binary must have been compiled with build feature `perl` to use this option.
///
/// # Feature difference
/// By default, the expression syntax does not support things like negative lookahead and other backtrack-requiring regex features.
///
/// ## Efficiency
/// Note that non-PCRE expressions are more efficient in general, and can also enable parallel processing of strings where there are many (e.g. a long list of lines from `stdin` can be matched against in parallel.)
///
/// It is ill-advised to enable PCRE on large inputs unless those features are required.
//TODO: Should we have PCRE on by default or not...? I think we should maybe have it on by default if the feature is enabled... But that will mess with input parallelism... XXX: Perhaps we can auto-detect if to use PCRE or not (e.g. try compiling to regex first, then PCRE if that fails?)
#[arg(short, long)] // XXX: Can we add a clap `value_parser!(FeatureOnBool<"perl">)` which fails to parse its `from_str()` impl if the feature is not enabled. Is this possible with what we currently have? We may be able to with macros, e.g expand a macro to `FeatureOnBool<"perl", const { cfg!(feature="perl") }>` or something similar? (NOTE: If `clap` has a better mechanism for this, use that instead of re-inventing it tho.)
// #[cfg(feature="perl")] //XXX: Do we want this option to be feature-gated? Or should we fail with error `if (! cfg!(feature="perl")) && self.extended)`? I think the latter would make things more easily (since the Regex engine gates PCRE-compilation transparently to the API user [see `crate::re::Regex`], we don't need to gate it this way outside of `re`, if we remove this gate we can just use `cfg!()` everywhere here which makes things **MUCH** cleaner..) It also means the user of a non-PCRE build will at least know why their PCRE flag is failing and that it can be built with the "perl" feature, instead of it being *totally* invisible to the user if the feature is off.
extended: bool,
/// Delimit read input/output strings from/to `stdin`/`stdout` by NUL ('\0') characters instead of newlines.
///
/// This only affects the output of each string's match groups, not the groups themselves, those will still be delimited by TAB literals in the output.
#[arg(short='0', long)]
pub zero: bool, //XXX: Add `--field=`/`--ifs` option, put these in same group. Maybe add `--delimit-groups=` to change the group delimiter from `\t` to user-specified value.
}
impl Config
{
/// Whether it is requested to use PCRE regex instead of regular regex.
///
/// # Interaction with feature gating of ~actual~ PCRE support via `feature="perl"`
/// Note that if the "perl" feature is not enabled, this may still return `true`.
/// If the user requests PCRE where it is not available, the caller should return an error/panic to the user telling her that.
#[inline(always)]
//TODO: Make `extended` public and remove this accessor?
pub fn use_pcre(&self) -> bool
{
//#![allow(unreachable_code)]
//#[cfg(feature="perl")] return self.extended; //TODO: See above comment on un-gating `self.extended`
//false
self.extended
}
}
/// A string value that may be provided to the CLI, or delegated to `stdio`.
pub type MaybeString = MaybeValue<Box<str>>;
/// A path that may represent an `stdio` file-descriptor instead of a named file.
pub type MaybePath = MaybeValue<Box<Path>>;
/// `rematch` is a simple command-line tool for matching & printing capture groups of an input string(s) against a regular expression.
///
/// The input string(s) can be provided in the command-line, or they can be provided as line delimited (by default) stream from `stdin`.
#[derive(Debug, Parser)]
#[command(name = env!("CARGO_PKG_NAME"), version, about, long_about)]
pub struct Cli
{
/// Configuration of the execution
#[command(flatten)]
pub config: Config,
//XXX: Should we make these fields public?
/// The input string to use, or `-` to read from stdin.
//TODO: Support multiple input strings in non-`stdin` case too. (XXX: How should this be handled...?)
string: MaybeString,
/// The regular expression to match `string` on.
regex: String,
/// The regex capture group indecies to print when matches on `string`.
#[arg(required= true, trailing_var_arg = true, allow_hyphen_values = false, num_args=1..)]
//TODO: Allow ranges & fallible captures, so lines that match group 1 but not 2 will not cause output failure if given `1 2?` but will if given `1 2` (XXX: Is this actually meaningful/possible? Can we do this at all? I'm pretty sure `/(?:(.))?/` still creates an (empty) group? So perhaps, syntax for failing on *empty* group matches...? like, `1! 2` for "group #1 *required*, group #2 is not requested?")
groups: Vec<usize>, // TODO: How to dedup (XXX: Do we want to de-dup? Maybe the user wants group `1` twice? I think it's fine (also we need to preserve user ordering of group indecied))
}
impl Cli {
/// Get the input string to match on
///
/// If the requested input is `stdin`, `None` is returned.
#[inline]
pub fn input_string(&self) -> Option<&str>
{
self.string.value().map(AsRef::as_ref)
}
/// Get the string to build the regular expression from
pub fn regex_string(&self) -> &str
{
&self.regex[..]
}
/// Get the match group(s) to print in the output
#[inline]
pub fn groups(&self) -> &[usize]
{
&self.groups[..]
}
/// Get the number of match groups requested.
#[inline]
pub fn num_groups(&self) -> usize
{
self.groups.len()
}
}
/// Parse the command-line arguments passed to the program
pub fn parse_cli() -> Cli
{
clap::Parser::parse()
}

@ -1,10 +1,32 @@
#![allow(dead_code)]
#![cfg_attr(feature="unstable", feature(impl_trait_in_assoc_type))] // XXX: Re-work `re::RegexEngine` to be able to remove this if we can, so we can use non-allocating `try_exec()` on stable...
mod re;
mod text;
mod args;
fn main() -> Result<(), Box<dyn std::error::Error>>
use color_eyre::{
eyre::{
self,
eyre,
WrapErr as _,
},
SectionExt as _, Help as _,
};
fn initialise() -> eyre::Result<()>
{
color_eyre::install()?;
Ok(())
}
fn main() -> eyre::Result<()>
{
initialise().wrap_err("Fatal: Failed to install panic handle")?;
let cli = args::parse_cli();//.wrap_err("Error parsing command-line arguments")?;
eprintln!("{:#?}", cli);
return Ok(());
let args: Vec<String> = std::env::args().collect();
if args.len() < 4 {

@ -9,19 +9,316 @@ use std::{
sync::{
Arc,
Mutex,
}
},
num::NonZeroUsize,
convert::Infallible,
borrow::{
Cow,
Borrow,
},
};
pub type Groups = Vec<String>;
pub type FrozenVector<T> = Box<[T]>;
pub type FrozenString = Box<str>;
//TODO: Re-work this to allow non-matched groups (i.e. `Option<Cow<'static, str>>` or something...) to be communicated without `"".into()`.
pub type Groups = FrozenVector<FrozenString>;
//TODO: We need to provide a `NonPCRERegex` that we can runtime-polymorphicly use in the case PCRE is disabled/enabled by the user's Cli options (see `args::Config::extended`.)
// This `NonPCRERegex` can be written agnostic to the `perl` feature being enabled, as `Regex` below will use the optionally-included package `pcre` when the feature is enabled, but the `regex` package is *always* available.
//compile_error!("TODO: Remove this trait and refactor this shit. XXX: We don't need all this dynamic dispatch shit, we can just have an `enum` of `regex::Regex` & `Regex` if we need to, dispatching the `exec` call through that; as the compile error type differs & there is no exec error for non-PCRE regex exec. ");
//compile_error!("XXX: TODO: (I don't think we'll even need to do that though, just a helper ext-trait with the same types as the below trait and non-dyn methods -- mostly just `exec() -> Result<Option<Groups>, Self::ExecError>` -- is good enough.)")
pub trait RegexMatcher
{
/// Attempt to match this regular expression against `string`, and if successful, pass each to callback `result` while `result` returns `Ok(true)`.
///
/// # Callback feeding from match `try_exec()` as an iterator.
/// Once `result(i, n)` -- where `i` is the index of the group returned from the iterator of `try_exec()`, and `n` is the borrowed string of item -- returns a result other than `Ok(true)`, the function will short-circuit in the following way:
///
/// * `Err(e)` - `Err(e.into())` will be returned.
/// * `Ok(false)` - `Ok(Some(()))` will be returned (a *successful* result, despite the rest of the iterator being ignored.)
/// And if the iterator completes before either of the first two are returned from `result`, `Ok(Some(()))` will be returned as well.
///
/// The short-circuit will happen before the callback is invoked at all if `RegexEngine::try_exec()` returns the following:
/// - `Err(e)` will short-circuit to `return Err(e)`.
/// - `Ok(None)` will short-circuit to `return Ok(None)`.
///
/// Note that the case that `Output<'_>` is a lazy iterator works best when working through this dynamic interface.
///
/// # Return
/// The only time `Ok(None)` is returned is if `result` is never executed because the returned value of `try_exec()` is `None`.
/// An empty iterator wrapped in a `Some(_)` will still be returned as `Ok(Some(()))` from this function.
///
/// Any `Err(_)` result will be propagated from this function (from `try_exec()` or any call to `result(i, n)`) to the caller via `Err(e.into())` whenever it may appear.
fn try_exec_into<'s>(&self, string: &'s str, result: &mut (dyn FnMut((usize, &str)) -> crate::eyre::Result<bool>)) -> crate::eyre::Result<Option<()>>
where Self: 's;
/// Same as `try_exec_into()`, but can rely on being the *soul owner of* self *while invoked*.
///
/// __NOTE__: The generic implementation of this function does not distinguish ownership, and thus `try_exec_into()` should be preferred unless an explicit owning version has been implemented.
// (__XXX__: Can we impl this for `Regex` when using PCRE to bypass need to lock mutex?)
#[inline(always)]
fn try_owned_exec_into<'s>(&mut self, string: &'s str, result: &mut (dyn FnMut((usize, &str)) -> crate::eyre::Result<bool>)) -> crate::eyre::Result<Option<()>>
where Self: 's {
self.try_exec_into(string, result)
}
/// Same as `try_exec_into()`, but can rely on `self` outliving all references within the call.
///
/// Whether `Ok(_)` is returned or not, this `Arc` ref of `self` is consumed after this call.
///
/// __NOTE__: In the generic implementation of this function, If `self` is the only owner of the `Arc<Self>`, it *may* try to dispatch to the owning `try_owned_exec_into()` instead.
/// But **also note that** the generic implementation of `try_owned_exec_into()` defers to `try_exec_into()` anyway.
#[inline]
fn try_shared_exec_into<'s>(self: Arc<Self>, string: &'s str, result: &mut (dyn FnMut((usize, &str)) -> crate::eyre::Result<bool>)) -> crate::eyre::Result<Option<()>>
where Self: Sized + 's {
match Arc::try_unwrap(self) { // Unfortunately, we can't go from `Arc<_>` -> `Box<_>` via `try_from()` or `into()`...
Err(this) => this.try_exec_into(string, result),
Ok(mut this) => this.try_owned_exec_into(string, result),
}
}
/// Identical purpose to `RegexEngine::prepare_regex()`, provided for parallel dynamic dispatch over `self`.
fn do_prepare_regex(&mut self, num: Option<NonZeroUsize>);
/// Identical value to `RegexEngine::should_prefer_run_in_parallel()`, provided for parallel dynamic dispatch over `self`.
fn prefer_run_in_parallel_p(&self, num: Option<NonZeroUsize>) -> Option<bool>;
}
impl<T: ?Sized> RegexMatcher for T
where T: RegexEngine,
for <'a> T::ExecError<'a>: Send + Sync + 'static
{
#[inline]
fn try_exec_into<'s>(&self, string: &'s str, result: &mut (dyn FnMut((usize, &str)) -> crate::eyre::Result<bool>)) -> crate::eyre::Result<Option<()>> where Self: 's {
//use crate::*;
// Try to match on `string`.
let Some(res) = self.try_exec(string)? else {
return Ok(None);
};
// Call `result` callback on each item with its index.
for (i, x) in res.into_iter().enumerate() {
match result((i, x.borrow()))? {
false => break,
_ => (),
}
}
Ok(Some(()))
}
#[inline]
fn prefer_run_in_parallel_p(&self, num: Option<NonZeroUsize>) -> Option<bool>
{
RegexEngine::should_prefer_run_in_parallel(self, num)
}
#[inline(always)]
fn do_prepare_regex(&mut self, num: Option<NonZeroUsize>) {
RegexEngine::prepare_regex(self, num);
}
}
impl<'a, T: Send + Sync + 'a> From<Box<T>> for Box<dyn RegexMatcher + Send + Sync + 'a>
where T: RegexMatcher + RegexEngine
{
#[inline]
fn from(from: Box<T>) -> Self
{
from
}
}
/// Trait represents a regular-expression object that can be compiled from a string and can match on any number of strings from a shared-reference (possibly in parallel, see below.)
///
/// The output of the match operation is a generic iterator over the match groups that matched (__XXX__: with empty strings denoting non-matches for now to keep the indecies valid. __TODO__: I-it does keep them valid, right??) wrapped in an `Option<_>`, which will return `None` if the string provided does not match the whole regular expression.
pub trait RegexEngine
{
type Output<'string>: IntoIterator<Item: Borrow<str>> + 'string
where Self: 'string;
type CompileError<'s>: error::Error;
type ExecError<'s>: error::Error;
/// Attempt to compile `string` into a new boxed instance of `Self`.
///
/// Useful for dispatching with a dynamic `RegexMatcher` instead of `RegexEngine`.
fn try_compile_boxed<'s>(string: &'s str) -> Result<Box<Self>, Self::CompileError<'s>>;
/// Attempt to compile `string` into a new instance of `Self`.
#[inline(always)]
fn try_compile<'s>(string: &'s str) -> Result<Self, Self::CompileError<'s>>
where Self: Sized {
Self::try_compile_boxed(string).map(|x| *x)
}
/// Attempt to run match groups on `string`, returning them as `Self::Output`.
/// If there are no matches, `Ok(None)` should be returned.
fn try_exec<'s>(&self, string: &'s str) -> Result<Option<Self::Output<'s>>, Self::ExecError<'s>>;
/// Should `try_exec()` be ran over an iterator of `string`s in parallel or sequence? Or, does it not matter?
/// Where `num` is the number of `string`s (if known by caller.)
///
/// We assume 0 `string`s will not cause any execution.
///
/// # Returns
/// - `Some(true)` - Yes, do prefer run in parallel.
/// - `Some(false)` - No, do **not** run in parallel if possible.
/// - ~default~ `None` - Unknown. It is possible to run in parallel, but it either does not matter, or may not cause tangible performance benefits over running in sequence.
#[inline(always)]
fn should_prefer_run_in_parallel(&self, _num: Option<NonZeroUsize>) -> Option<bool> { None }
/// Prepare for the regex to be used. If it is to be used on a known number of `string`s, then that count is passed.
///
/// This allows things like re-compilation / optimisation where the regex may be used more than once that could speed up matching.
/// It doesn't have to do anything though.
#[inline(always)]
fn prepare_regex(&mut self, _num: Option<NonZeroUsize>) {}
}
// compile_error!("XXX: Can we do this GAT-generically...?");
// pub trait RegexEngineDetachedOutput: RegexEngine
// {
// fn try_exec_detached<'s>(&self, string: &'s str) -> Result<Self::Output<'static>, Self::ExecError<'s>>;
// }
impl RegexEngine for NonPCRERegex
{
#[cfg(feature="unstable")]
type Output<'string> = impl Iterator<Item = Cow<'string, str>> /*+ impl std::iter::ExactSizeIterator*/ + 'string; //XXX: TODO: No, ehhh..... Rework this shite.... .//impl Iterator<Item = Cow<'static, str>> + 'this;
#[cfg(not(feature="unstable"))]
type Output<'string> = Box<[Cow<'string, str>]>;//impl Iterator<Item = Cow<'string, str>> + 'string; //XXX: TODO: No, ehhh..... Rework this shite.... .//impl Iterator<Item = Cow<'static, str>> + 'this;
type CompileError<'s> = regex::Error;
type ExecError<'s> = Infallible;
#[inline]
fn try_compile<'s>(string: &'s str) -> Result<Self, Self::CompileError<'s>> {
Self::new(string)
}
#[inline(always)]
fn try_compile_boxed<'s>(string: &'s str) -> Result<Box<Self>, Self::CompileError<'s>> {
Self::new(string).map(Box::new)
}
#[inline]
fn try_exec<'s>(&self, string: &'s str) -> Result<Option<Self::Output<'s>>, Self::ExecError<'s>> {
Ok(match self.captures(string) {
Some(m) => {
let res = (0..m.len()).map(move |i| match m.get(i) { Some(ma) => Cow::Borrowed(ma.as_str()), None => Cow::Borrowed("") });
// If `unstable` is not enabled, we cannot skip this allocation (for now...)
#[cfg(not(feature="unstable"))]
let res = Some(res.collect());
// Otherwise, use ITiAT to return the iterator.
#[cfg(feature="unstable")]
let res = Some(res);
res
},
None => None,
})
}
#[inline(always)]
fn should_prefer_run_in_parallel(&self, num: Option<NonZeroUsize>) -> Option<bool> {
Some(match num.map(NonZeroUsize::get) {
Some(1) => return None,
_ => true,
})
}
}
impl RegexEngine for Regex
{
type Output<'string> = Groups; // XXX: Can we have a ref-only output here...? Maybe for non-PCRE... So keep as this for now.
type CompileError<'s> = Error;
type ExecError<'s> = Infallible;
#[inline]
fn try_compile<'s>(string: &'s str) -> Result<Self, Self::CompileError<'s>> {
Self::compile(string)
}
#[inline(always)]
fn try_compile_boxed<'s>(string: &'s str) -> Result<Box<Self>, Self::CompileError<'s>> {
Self::compile(string).map(Box::new)
}
#[inline(always)]
fn try_exec<'t, 's>(&'t self, string: &'s str) -> Result<Option<Self::Output<'s>>, Self::ExecError<'s>> {
// SAFETY: The implementation of `Regex::exec()` has no path that can return an error (XXX: Why does it even return `Result` anyway...?)
Ok(unsafe {
Self::exec(&self, string).unwrap_unchecked()
})
}
/// PCRE supports `study()`ing the regular expression, which we might want to do if we have more than a few strings to match on.
///
/// If PCRE is not enabled, and we use the Rust regex `regex::Regex`; it does not require/support additional optimisations, so keep the default noop-impl from the trait if this feature is not enabled.
#[cfg(feature="perl")]
fn prepare_regex(&mut self, num: Option<NonZeroUsize>) {
match num.map(NonZeroUsize::get) {
Some(1..=2) | None => return,
_ => (),
}
// XXX: Eh.. The `Arc` means we gotta lock here...
// match (&mut self.internal).get_mut() {
// Ok(v) => v.study(),
// Err(mut v) => v.get_mut().study(),
// };
// NOTE: If there is another lock held while *this* method is being invoked, it can *only* make logical sense that it is calling the same method on a different thread. So do not block to call this. (XXX: This is only required because of the silly locking shit we gotta do here...)
match self.internal.try_lock() {
Ok(mut re) => {
re.study();
self.internal.clear_poison();
drop(re);
},
_ => (),
};
}
#[inline(always)]
fn should_prefer_run_in_parallel(&self, num: Option<NonZeroUsize>) -> Option<bool> {
match num.map(NonZeroUsize::get) {
Some(1) => return Some(false),
_ => (),
}
Some(! Self::IS_EXTENDED)
}
}
/// Non-PCRE / non-extended regex (regardless of if the `perl` feature is enabled.)
pub type NonPCRERegex = regex::Regex;
/// PCRE-enabled (if feature is enabled, see [`IS_EXTENDED`]) regex.
#[derive(Debug, Clone)]
pub struct Regex
{
#[cfg(feature="perl")]
internal: Arc<Mutex<pcre::Pcre>>,
internal: Arc<Mutex<pcre::Pcre>>, // XXX: Can we make parallel usage a bit less... expensive? TODO: How expensive is it to clone these into a thread-local cache, for instance?
#[cfg(not(feature = "perl"))]
internal: regex::Regex,
}
impl Regex
{
/// If the implementation uses PCRE instead of default regex.
pub const IS_EXTENDED: bool = cfg!(feature="perl");
}
#[derive(Debug)]
pub enum Error
{
@ -65,9 +362,9 @@ impl Regex {
let len = m.string_count();
let mut output = Vec::with_capacity(len);
for i in 0..len {
output.push(m.group(i).to_owned());
output.push(m.group(i).into());
}
Some(output)
Some(output.into_boxed_slice())
},
None => None,
})
@ -76,14 +373,16 @@ impl Regex {
return {
Ok(match self.internal.captures(string.as_ref()) {
Some(m) => {
let mut output = Vec::with_capacity(m.len());
for i in 0..m.len() {
let ma = m.get(i).unwrap();
let mut op = String::with_capacity(ma.range().len());
write!(op, "{}", ma.as_str())?;
output.push(op);
}
Some(output)
Some((0..m.len()).map(move |i| match m.get(i) { Some(ma) => ma.as_str().into(), None => "".into()} ).collect())
// let mut output = Vec::with_capacity(m.len());
// for i in 0..m.len() {
// let ma = m.get(i).unwrap();
// //let mut op = String::with_capacity(ma.range().len());
// //let op = format!("{}", ma.as_str().into())
// output.push(ma.as_str().into());
// }
// Some(output.into_boxed_slice())
},
None => None,
})
@ -99,7 +398,7 @@ impl From<fmt::Error> for Error
}
}
#[cfg(not(feature = "perl"))]
//#[cfg(not(feature = "perl"))]
impl From<regex::Error> for Error
{
fn from(er: regex::Error) -> Self

Loading…
Cancel
Save