Switched PCRE backend to **much** better PCRE2 library. (Allows for concurrency directly, etc.)

TODO: Port this (XXX: or the other way round, maybe? Import `args.rs` module to here??) to `cli-refactor` branch

Fortune for rematch's current commit: Small curse − 小凶
pcre2
Avril 1 week ago
parent d9ad1bafdf
commit 100b3a9afd
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -1,6 +1,6 @@
[package] [package]
name = "rematch" name = "rematch"
version = "0.1.0" version = "0.1.0+1"
authors = ["Avril <flanchan@cumallover.me>"] authors = ["Avril <flanchan@cumallover.me>"]
edition = "2018" edition = "2018"
@ -8,13 +8,15 @@ edition = "2018"
[profile.release] [profile.release]
opt-level = 3 opt-level = 3
lto = "fat" lto = true
codegen-units = 1 codegen-units = 1
panic = "unwind" panic = "unwind"
strip = true
[features] [features]
perl = ["pcre"] default= ["perl"]
perl = ["dep:pcre2"]
[dependencies] [dependencies]
regex = "1" regex = "1"
pcre = { version = "0.2.3", optional = true } pcre2 = { version = "0.2.9", optional = true }

@ -3,6 +3,18 @@
mod re; mod re;
mod text; mod text;
fn print_group<S: ?Sized, G, T>(to: &mut S, g: G, group: usize) -> std::io::Result<()>
where S: std::io::Write,
G: IntoIterator<Item = Option<T>>,
T: std::borrow::Borrow<str>
{
match g.into_iter().nth(group) {
Some(None) => writeln!(to, ""),
Some(Some(g)) => writeln!(to, "{}", g.borrow()),
None => Ok(()),
}
}
fn main() -> Result<(), Box<dyn std::error::Error>> fn main() -> Result<(), Box<dyn std::error::Error>>
{ {
let args: Vec<String> = std::env::args().collect(); let args: Vec<String> = std::env::args().collect();
@ -16,10 +28,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>>
let text = &args[1]; let text = &args[1];
let group: usize = args[3].parse().expect("Invalid group number."); let group: usize = args[3].parse().expect("Invalid group number.");
use std::io::Write;
let mut stdout = std::io::stdout();
if text == "-" { if text == "-" {
text::stdin_lines(|text| -> Result<bool, re::Error> { text::stdin_lines(|text| -> Result<bool, Box<dyn std::error::Error + Send + Sync + 'static>> {
let mut stdout = stdout.lock();
match re.exec(&text)? { match re.exec(&text)? {
Some(g) if g.len() > group => println!("{}", &g[group]), Some(g) if g.len() > group => print_group(&mut stdout, g, group)?, //println!("{}", &g[group]),
_ => (), _ => (),
} }
Ok(true) Ok(true)
@ -27,10 +43,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>>
} else { } else {
match re.exec(&text)? { match re.exec(&text)? {
Some(g) if g.len() > group => println!("{}", &g[group]), Some(g) if g.len() > group => print_group(&mut stdout, g, group)?,//println!("{}", &g.nth(group).unwrap().map(|x| x.as_ref()).unwrap_or("")),
_ => (), _ => (),
} }
} }
stdout.flush().unwrap();
} }
Ok(()) Ok(())
} }

@ -1,23 +1,23 @@
#![allow(unused_imports)]
use std::{ use std::{
error, error,
fmt::{ fmt,
self, borrow::Cow,
Write,
},
sync::{
Arc,
Mutex,
}
}; };
pub type Groups = Vec<String>; pub type FrozenVec<T> = Box<[T]>;
pub type FrozenString = Box<str>;
// NOTE: Currently unused, as we use `to_utf8_lossy()` for PCRE2 `byte`-matching (XXX: Should we change?)
// TODO: to return some kind of `Either<&'s str, impl bytes::Buf + 's>` type, which would use `str` on non-PCRE, but opaque `bytes::Buf` on PCRE?)
pub type FrozenBytes = FrozenVec<u8>;
pub type Groups<String = FrozenString> = FrozenVec<Option<String>>;
#[derive(Debug, Clone)]
pub struct Regex pub struct Regex
{ {
#[cfg(feature="perl")] #[cfg(feature="perl")]
internal: Arc<Mutex<pcre::Pcre>>, internal: pcre2::bytes::Regex,
#[cfg(not(feature = "perl"))] #[cfg(not(feature = "perl"))]
internal: regex::Regex, internal: regex::Regex,
} }
@ -50,24 +50,18 @@ impl Regex {
pub fn compile(string: impl AsRef<str>) -> Result<Self, Error> pub fn compile(string: impl AsRef<str>) -> Result<Self, Error>
{ {
#[cfg(feature = "perl")] #[cfg(feature = "perl")]
return Ok(Self{internal: Arc::new(Mutex::new(pcre::Pcre::compile(string.as_ref())?))}); return Ok(Self{internal: pcre2::bytes::RegexBuilder::new().build(string.as_ref())?});
#[cfg(not(feature = "perl"))] #[cfg(not(feature = "perl"))]
return Ok(Self{internal: regex::Regex::new(string.as_ref())?}); return Ok(Self{internal: regex::Regex::new(string.as_ref())?});
} }
pub fn exec(&self, string: impl AsRef<str>) -> Result<Option<Groups>, Error> pub fn exec<'s>(&self, string: &'s str) -> Result<Option<Groups<Cow<'s, str>>>, Error>
{ {
#[cfg(feature = "perl")] #[cfg(feature = "perl")]
return { return {
let mut re = self.internal.lock().unwrap(); Ok(match self.internal.captures(string.as_ref())? {
Ok(match re.exec(string.as_ref()) {
Some(m) => { Some(m) => {
let len = m.string_count(); Some((0..m.len()).map(move |i| m.get(i).map(|x| String::from_utf8_lossy(x.as_bytes()) )).collect())
let mut output = Vec::with_capacity(len);
for i in 0..len {
output.push(m.group(i).to_owned());
}
Some(output)
}, },
None => None, None => None,
}) })
@ -76,14 +70,7 @@ impl Regex {
return { return {
Ok(match self.internal.captures(string.as_ref()) { Ok(match self.internal.captures(string.as_ref()) {
Some(m) => { Some(m) => {
let mut output = Vec::with_capacity(m.len()); Some((0..m.len()).map(move |i| m.get(i).map(|x| Cow::Borrowed(x.as_str()) )).collect())
for i in 0..m.len() {
let ma = m.get(i).unwrap();
let mut op = String::with_capacity(ma.range().len());
write!(op, "{}", ma.as_str())?;
output.push(op);
}
Some(output)
}, },
None => None, None => None,
}) })
@ -99,7 +86,7 @@ impl From<fmt::Error> for Error
} }
} }
#[cfg(not(feature = "perl"))] //#[cfg(not(feature = "perl"))]
impl From<regex::Error> for Error impl From<regex::Error> for Error
{ {
fn from(er: regex::Error) -> Self fn from(er: regex::Error) -> Self
@ -109,9 +96,9 @@ impl From<regex::Error> for Error
} }
#[cfg(feature = "perl")] #[cfg(feature = "perl")]
impl From<pcre::CompilationError> for Error impl From<pcre2::Error> for Error
{ {
fn from(er: pcre::CompilationError) -> Self fn from(er: pcre2::Error) -> Self
{ {
Self::Compile(format!("{}", er)) Self::Compile(format!("{}", er))
} }

Loading…
Cancel
Save