Switched PCRE backend to **much** better PCRE2 library. (Allows for concurrency directly, etc.)

TODO: Port this (XXX: or the other way round, maybe? Import `args.rs` module to here??) to `cli-refactor` branch

Fortune for rematch's current commit: Small curse − 小凶
pcre2
Avril 3 days ago
parent d9ad1bafdf
commit 100b3a9afd
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -1,6 +1,6 @@
[package]
name = "rematch"
version = "0.1.0"
version = "0.1.0+1"
authors = ["Avril <flanchan@cumallover.me>"]
edition = "2018"
@ -8,13 +8,15 @@ edition = "2018"
[profile.release]
opt-level = 3
lto = "fat"
lto = true
codegen-units = 1
panic = "unwind"
strip = true
[features]
perl = ["pcre"]
default= ["perl"]
perl = ["dep:pcre2"]
[dependencies]
regex = "1"
pcre = { version = "0.2.3", optional = true }
pcre2 = { version = "0.2.9", optional = true }

@ -3,6 +3,18 @@
mod re;
mod text;
fn print_group<S: ?Sized, G, T>(to: &mut S, g: G, group: usize) -> std::io::Result<()>
where S: std::io::Write,
G: IntoIterator<Item = Option<T>>,
T: std::borrow::Borrow<str>
{
match g.into_iter().nth(group) {
Some(None) => writeln!(to, ""),
Some(Some(g)) => writeln!(to, "{}", g.borrow()),
None => Ok(()),
}
}
fn main() -> Result<(), Box<dyn std::error::Error>>
{
let args: Vec<String> = std::env::args().collect();
@ -16,10 +28,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>>
let text = &args[1];
let group: usize = args[3].parse().expect("Invalid group number.");
use std::io::Write;
let mut stdout = std::io::stdout();
if text == "-" {
text::stdin_lines(|text| -> Result<bool, re::Error> {
text::stdin_lines(|text| -> Result<bool, Box<dyn std::error::Error + Send + Sync + 'static>> {
let mut stdout = stdout.lock();
match re.exec(&text)? {
Some(g) if g.len() > group => println!("{}", &g[group]),
Some(g) if g.len() > group => print_group(&mut stdout, g, group)?, //println!("{}", &g[group]),
_ => (),
}
Ok(true)
@ -27,10 +43,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>>
} else {
match re.exec(&text)? {
Some(g) if g.len() > group => println!("{}", &g[group]),
Some(g) if g.len() > group => print_group(&mut stdout, g, group)?,//println!("{}", &g.nth(group).unwrap().map(|x| x.as_ref()).unwrap_or("")),
_ => (),
}
}
stdout.flush().unwrap();
}
Ok(())
}

@ -1,23 +1,23 @@
#![allow(unused_imports)]
use std::{
error,
fmt::{
self,
Write,
},
sync::{
Arc,
Mutex,
}
fmt,
borrow::Cow,
};
pub type Groups = Vec<String>;
pub type FrozenVec<T> = Box<[T]>;
pub type FrozenString = Box<str>;
// NOTE: Currently unused, as we use `to_utf8_lossy()` for PCRE2 `byte`-matching (XXX: Should we change?)
// TODO: to return some kind of `Either<&'s str, impl bytes::Buf + 's>` type, which would use `str` on non-PCRE, but opaque `bytes::Buf` on PCRE?)
pub type FrozenBytes = FrozenVec<u8>;
pub type Groups<String = FrozenString> = FrozenVec<Option<String>>;
#[derive(Debug, Clone)]
pub struct Regex
{
#[cfg(feature="perl")]
internal: Arc<Mutex<pcre::Pcre>>,
internal: pcre2::bytes::Regex,
#[cfg(not(feature = "perl"))]
internal: regex::Regex,
}
@ -50,24 +50,18 @@ impl Regex {
pub fn compile(string: impl AsRef<str>) -> Result<Self, Error>
{
#[cfg(feature = "perl")]
return Ok(Self{internal: Arc::new(Mutex::new(pcre::Pcre::compile(string.as_ref())?))});
return Ok(Self{internal: pcre2::bytes::RegexBuilder::new().build(string.as_ref())?});
#[cfg(not(feature = "perl"))]
return Ok(Self{internal: regex::Regex::new(string.as_ref())?});
}
pub fn exec(&self, string: impl AsRef<str>) -> Result<Option<Groups>, Error>
pub fn exec<'s>(&self, string: &'s str) -> Result<Option<Groups<Cow<'s, str>>>, Error>
{
#[cfg(feature = "perl")]
return {
let mut re = self.internal.lock().unwrap();
Ok(match re.exec(string.as_ref()) {
Ok(match self.internal.captures(string.as_ref())? {
Some(m) => {
let len = m.string_count();
let mut output = Vec::with_capacity(len);
for i in 0..len {
output.push(m.group(i).to_owned());
}
Some(output)
Some((0..m.len()).map(move |i| m.get(i).map(|x| String::from_utf8_lossy(x.as_bytes()) )).collect())
},
None => None,
})
@ -76,14 +70,7 @@ impl Regex {
return {
Ok(match self.internal.captures(string.as_ref()) {
Some(m) => {
let mut output = Vec::with_capacity(m.len());
for i in 0..m.len() {
let ma = m.get(i).unwrap();
let mut op = String::with_capacity(ma.range().len());
write!(op, "{}", ma.as_str())?;
output.push(op);
}
Some(output)
Some((0..m.len()).map(move |i| m.get(i).map(|x| Cow::Borrowed(x.as_str()) )).collect())
},
None => None,
})
@ -99,7 +86,7 @@ impl From<fmt::Error> for Error
}
}
#[cfg(not(feature = "perl"))]
//#[cfg(not(feature = "perl"))]
impl From<regex::Error> for Error
{
fn from(er: regex::Error) -> Self
@ -109,9 +96,9 @@ impl From<regex::Error> for Error
}
#[cfg(feature = "perl")]
impl From<pcre::CompilationError> for Error
impl From<pcre2::Error> for Error
{
fn from(er: pcre::CompilationError) -> Self
fn from(er: pcre2::Error) -> Self
{
Self::Compile(format!("{}", er))
}

Loading…
Cancel
Save