Compare commits
4 Commits
Author | SHA1 | Date |
---|---|---|
|
937e3f6579 | 2 weeks ago |
|
9c1a627ee0 | 4 years ago |
|
a43a7360c9 | 4 years ago |
|
012e15e1bc | 4 years ago |
@ -1,30 +1,21 @@
|
||||
[package]
|
||||
name = "rematch"
|
||||
version = "0.3.2"
|
||||
version = "2.0.0"
|
||||
authors = ["Avril <flanchan@cumallover.me>"]
|
||||
edition = "2024"
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[profile.release]
|
||||
# We have multiple "crates" in this project, so fat LTO and single codegen for better inlining and cross-crate optimisation.
|
||||
opt-level = 3
|
||||
lto = true
|
||||
lto = "fat"
|
||||
codegen-units = 1
|
||||
panic = "unwind"
|
||||
strip = true
|
||||
|
||||
[profile.symbols]
|
||||
inherits = "release"
|
||||
strip = false
|
||||
|
||||
[features]
|
||||
default= ["perl", "unstable"]
|
||||
|
||||
perl = ["dep:pcre2"]
|
||||
unstable = ["regex/unstable"]
|
||||
perl = ["pcre"]
|
||||
|
||||
[dependencies]
|
||||
pcre2 = { version = "0.2.9", optional = true }
|
||||
regex = { version = "1.11.1", features = ["use_std"] }
|
||||
color-eyre = { version = "0.6.3", default-features = false, features = ["track-caller"] }
|
||||
owo-colors = { version = "3.5.0", features = ["alloc", "supports-colors"] }
|
||||
regex = "1"
|
||||
pcre = { version = "0.2.3", optional = true }
|
||||
|
@ -0,0 +1,35 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
use rematch::*;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>>
|
||||
{
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
|
||||
if args.len() < 4 {
|
||||
println!("Usage: {} <str> <regex> <group>", args[0]);
|
||||
println!("Pass `-' as `<str>' to read lines from stdin");
|
||||
std::process::exit(1);
|
||||
} else {
|
||||
let re = re::Regex::compile(&args[2])?;
|
||||
let text = &args[1];
|
||||
let group: usize = args[3].parse().expect("Invalid group number.");
|
||||
|
||||
if text == "-" {
|
||||
text::stdin_lines(|text| -> Result<bool, re::Error> {
|
||||
match re.exec(&text)? {
|
||||
Some(g) if g.len() > group => println!("{}", &g[group]),
|
||||
_ => (),
|
||||
}
|
||||
Ok(true)
|
||||
})?;
|
||||
} else {
|
||||
|
||||
match re.exec(&text)? {
|
||||
Some(g) if g.len() > group => println!("{}", &g[group]),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
use rematch::*;
|
||||
|
||||
pub mod op;
|
||||
|
||||
fn main()
|
||||
{
|
||||
|
||||
println!("Hello world!");
|
||||
}
|
@ -0,0 +1,211 @@
|
||||
//! Operations traits and definitions
|
||||
use super::*;
|
||||
use std::marker::{
|
||||
Send, Sync,
|
||||
};
|
||||
use std::io;
|
||||
use std::borrow::Cow;
|
||||
use std::any::Any;
|
||||
|
||||
/// Format of an operation.
|
||||
/// References the input string for fields, which need to be extracted.
|
||||
///
|
||||
/// Follows the convention `<opcode><sep><find><sep><replace><sep><flags>`.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Format<'a>
|
||||
{
|
||||
// TODO: `_line` fields represent an array of char opcode/flags. There should be methods to handle these outside of the actual slice
|
||||
opcode_line: &'a str,
|
||||
pub seperator: char,
|
||||
pub find: &'a str,
|
||||
pub replace: &'a str,
|
||||
flags_line: &'a str,
|
||||
}
|
||||
|
||||
/// An `OperationSource` defines and handles an operation on a specific `Format`.
|
||||
///
|
||||
/// The source should produce an `Operation` that handles the `input` format the source it is given.
|
||||
/// Setup can include things like compiling the regex, preparing the replacement pattern from the `replace` string in the `Format`, etc.
|
||||
pub trait OperationSource
|
||||
{
|
||||
const OPCODE: Option<char>;
|
||||
|
||||
type Error: std::error::Error;
|
||||
type Operation: Operation;
|
||||
|
||||
/// Test `Format` to see if operation matches.
|
||||
///
|
||||
/// This method should check that the opcode(s) and provided by the user are for this operation, and return `Ok(true)` if they are, `Ok(false)` if not. If it/they is/are intended for this operation but are invalid, an error can be returned to the user.
|
||||
///
|
||||
/// This methoud should not validate flags of perform any other validation of `input`. That should be handled by `create_operation()`, as an error returned from this function will still attempt to look for other operations that may be valid.
|
||||
///
|
||||
/// This happens after `OPCODE` is checked, if there is a predefined opcode.
|
||||
/// If not, then this is always called.
|
||||
fn visit_operation(&self, input: &Format<'_>) -> Result<bool, Self::Error>;
|
||||
/// Create an operation of this type for this specific `Format`.
|
||||
///
|
||||
/// The source should handle any setup of the returned operation before the operation is given any input to match/replace on.
|
||||
/// If the format is invalid for this defined operation, an error can be returned to the user.
|
||||
///
|
||||
/// This method will be called only after `visit_operation` returns `Ok(true)`. The opcode can be assumed to be correct (as long as `visit_operation()` is correct).
|
||||
/// This method should validate flags and the input/output format before returning `Ok(...)`.
|
||||
/// An error returned from this function will terminate the program with an error code, as this operation has already been validated as the one the user selected in `visit_operation()`.
|
||||
// TODO: Should this be mutable?
|
||||
fn create_operation(&mut self, input: Format<'_>) -> Result<Self::Operation, Self::Error>;
|
||||
}
|
||||
|
||||
/// An operation produced from an `OperationSource` for that source's opcode and the input `Format` the source was given to create this.
|
||||
///
|
||||
/// An operation handles the actual find/replace work in the input stream(s) and producing the output stream(s).
|
||||
/// It is instantiated and set up for the find and replace strings provided by the user by its corresponding `OperationSource`.
|
||||
/// An operation should not handle those things itself, it should begin with a state set up for working on the stream(s).
|
||||
///
|
||||
/// Objects of this trait are used via dynamic dispatch through the wrapper trait `OperationDyn` and should have a `'static` lifetime. They should also be `Send` and `Sync`.
|
||||
pub trait Operation: Send + Sync
|
||||
{
|
||||
/// Define a downcastable type that can be used to hold mutable state between processing of streams in sequence of the *same input*.
|
||||
/// This is optional, if state is not needed between split input streams you can set this to the type `NoState`.
|
||||
///
|
||||
/// The type must be `Send`, as it is possible for the operation instance to be moved between threads between sequential runs.
|
||||
/// The state is only reused for input streams from the same input. As processing unrelated streams (different inputs) can happen concurrently, each new input has a new state object created for it (see `create_state_for()`.)
|
||||
type State: Any + Send + 'static /* = NoState */;
|
||||
|
||||
/// Create a state for this input stream.
|
||||
///
|
||||
/// Since `process_stream()` can be called concurrently, it does not have a mutable receiver for `self`.
|
||||
/// To store state that may need to be mutated between input streams of the same input (called sequentially with the same state), you can define a downcastable type for your state and construct it here before the first (and usually only) stream of this input is sent to `process_stream()`.
|
||||
///
|
||||
/// By default, there is no state.
|
||||
#[inline] fn create_state_for<T: io::Read>(&mut self, _stream: T) -> Option<Self::State>
|
||||
{
|
||||
None
|
||||
}
|
||||
|
||||
/// This function is responsible for processing a whole input -> output pipeline. It may be ran concurrently.
|
||||
///
|
||||
/// In this function, you should do tokenise the input as needed (usually according to `flags` and the needs of the operation itself), and then pass it down to `process_line()`, the output of which should then be written to `output`.
|
||||
//TODO: Should we handle multiple streams in the function itself or have the program call the function for each stream?
|
||||
// Should this be `&mut self`? It would prevent us from calling it concurrently on mutable streams (as objects of this trait must be `Sync`.)
|
||||
// Should we even be working on streams at all? Or should this trait be working on already-split input? I don't think it should.
|
||||
fn process_stream<I: io::Read, O: io::Write>(&self, state: Option<&mut Self::State>, input: I, output: O) -> io::Result<()>;
|
||||
|
||||
/// Process a single line of input.
|
||||
/// This should be called from `process_stream()` when it has extracted a single span of text to match against.
|
||||
/// This function should do the find+replace work.
|
||||
///
|
||||
/// This function may be called from outside the operation's own `process_stream()` (for single line inputs.)
|
||||
fn process_line<'a, T: ?Sized + AsRef<str> + 'a>(&self, state: Option<&mut Self::State>, line: &'a T) -> Cow<'a, str>;
|
||||
}
|
||||
|
||||
/// Default type for your `Operation::State` when the operation takes no state.
|
||||
#[derive(Debug)]
|
||||
pub enum NoState{}
|
||||
|
||||
/// Boxed state for an `Operation`.
|
||||
pub(crate) type BoxState = Box<dyn Any + Send + 'static>;
|
||||
/// Mutable reference for `BoxState` or as a dynamic reference to any `Operation::State`.
|
||||
pub(crate) type StateRef<'a> = &'a mut (dyn Any + Send + 'static);
|
||||
|
||||
/// Dynamic dispatch wrapper trait over `Operation`.
|
||||
///
|
||||
/// This is a convenience wrapper that is auto-implemented to allow `Operation` to use generics.
|
||||
pub trait OperationDyn: Send + Sync
|
||||
{
|
||||
fn create_state_for_dyn(&mut self, _stream: &mut dyn io::Read) -> Option<BoxState>;
|
||||
fn process_stream_dyn(&self, state: Option<StateRef<'_>>, input: &mut dyn io::Read, output: &mut dyn io::Write) -> io::Result<()>;
|
||||
fn process_line_dyn<'a>(&self, state: Option<StateRef<'_>>, line: &'a str) -> Cow<'a, str>;
|
||||
}
|
||||
|
||||
impl<Op: ?Sized> OperationDyn for Op
|
||||
where Op: Operation + Send + Sync
|
||||
{
|
||||
#[inline(always)] fn create_state_for_dyn(&mut self, stream: &mut dyn io::Read) -> Option<BoxState> {
|
||||
self.create_state_for(stream).map(|x| -> BoxState {Box::new(x)})
|
||||
}
|
||||
#[inline(always)] fn process_stream_dyn(&self, state: Option<StateRef<'_>>, input: &mut dyn io::Read, output: &mut dyn io::Write) -> io::Result<()> {
|
||||
self.process_stream(state.map(|m| m.downcast_mut()).flatten(), input, output)
|
||||
}
|
||||
#[inline(always)] fn process_line_dyn<'a>(&self, state: Option<StateRef<'_>>, line: &'a str) -> Cow<'a, str> {
|
||||
self.process_line(state.map(|m| m.downcast_mut()).flatten(), line)
|
||||
}
|
||||
}
|
||||
|
||||
fn _assert_obj_safe(_: &(dyn OperationDyn + Send + Sync + 'static), _: BoxOperation<'static>){}
|
||||
|
||||
/// A boxed operation. Operations are generated dynamically and so are dispatched on dynamically.
|
||||
pub type BoxOperation<'bx> = Box<dyn OperationDyn + Send + Sync + 'bx>;
|
||||
|
||||
/// Generic processing functions
|
||||
pub mod proc {
|
||||
use super::*;
|
||||
use io::BufRead;
|
||||
|
||||
pub fn proc_split_stream<Op: Operation,
|
||||
I: io::Read,
|
||||
O: io::Write,
|
||||
P: AsRef<str>>
|
||||
(op: &Op,
|
||||
mut input: I,
|
||||
pa: P,
|
||||
mut output: O) -> io::Result<()>
|
||||
{
|
||||
let pa = pa.as_ref();
|
||||
let mut buf = io::BufReader::new(input);
|
||||
|
||||
|
||||
//TODO: Turn this into an iterator type that takes a stream, reads into a growing buffer until `P` is found, then returns the buffer up to, but not including, `P`, removes that part from the buffer, and continues.
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// An iterator that grows an internal buffer from an input stream until a substring of bytes in that buffer is found.
|
||||
/// The iterator then yeilds the part of the buffer containing the pre-substring section, removes that section and the subsrtring from itself, and continues.
|
||||
#[derive(Debug)]
|
||||
pub struct StreamSplitIter<'a, T: ?Sized>
|
||||
{
|
||||
stream: &'a mut T,
|
||||
buffer: Vec<u8>,
|
||||
orig_cap: usize,
|
||||
split_on: &'a str,
|
||||
}
|
||||
|
||||
impl<'a, T: ?Sized + 'a> StreamSplitIter<'a, T>
|
||||
where T: io::Read
|
||||
{
|
||||
fn try_extend_buffer_once(&mut self) -> io::Result<&[u8]>
|
||||
{
|
||||
let i = self.buffer.len();
|
||||
self.buffer.resize(i + self.orig_cap, 0);
|
||||
let sl = {
|
||||
let sl = &mut self.buffer[i..];
|
||||
match self.stream.read(sl) {
|
||||
Err(err) => {
|
||||
// Reset buffer size
|
||||
self.buffer.resize(i, 0);
|
||||
return Err(err);
|
||||
},
|
||||
Ok(0) => {
|
||||
self.buffer.resize(i, 0);
|
||||
return Ok(&[]);
|
||||
},
|
||||
Ok(n) if n != sl.len() => {
|
||||
self.buffer.resize(n, 0);
|
||||
&self.buffer[i..(i+n)]
|
||||
}
|
||||
Ok(n) => &self.buffer[i..(i+n)],
|
||||
}
|
||||
};
|
||||
Ok(sl)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: ?Sized + 'a> Iterator for StreamSplitIter<'a, T>
|
||||
where T: io::Read
|
||||
{
|
||||
type Item = io::Result<Vec<u8>>;
|
||||
fn next(&mut self) -> Option<Self::Item>
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,127 +0,0 @@
|
||||
//! Extensions
|
||||
use super::*;
|
||||
use std::{
|
||||
fmt,
|
||||
};
|
||||
|
||||
/// Run an expression on an named value with a result type `Result<T, U>`.
|
||||
/// Where `T` and `U` have *the same API surface* for the duration of the provided expression.
|
||||
///
|
||||
/// # Example
|
||||
/// If there is a value `let mut value: Result<T, U>`, where `T: Write` & `U: BufWrite`;
|
||||
/// the expression `value.flush()` is valid for both `T` and `U`.
|
||||
/// Therefore, it can be simplified to be called as so: `unwrap_either(mut value => value.flush())`.
|
||||
///
|
||||
/// # Reference capture vs. `move` capture.
|
||||
/// Note that by default, the identified value is **moved** *into* the expression.
|
||||
/// The type of reference can be controlled by appending `ref`, `mut`, or `ref mut` to the ident.
|
||||
///
|
||||
/// Identifier capture table:
|
||||
/// - **none** ~default~ - Capture by move, value is immutable in expression.
|
||||
/// - `mut` - Capture by move, value is mutable in expression.
|
||||
/// - `ref` - Capture by ref, value is immutable (`&value`) in expression.
|
||||
/// - `ref mut` - Capture by mutable ref, value is mutable (`&mut value`) in expression. (__NOTE__: `value` must be defined as mutable to take a mutable reference of it.)
|
||||
///
|
||||
/// Essentially the same rules as any `match` branch pattern.
|
||||
macro_rules! unwrap_either {
|
||||
($res:ident => $($rest:tt)+) => {
|
||||
match $res {
|
||||
Ok(ref mut $res) => $($rest)+,
|
||||
Err(ref mut $res) => $($rest)+,
|
||||
}
|
||||
};
|
||||
(ref mut $res:ident => $($rest:tt)+) => {
|
||||
match $res {
|
||||
Ok(ref mut $res) => $($rest)+,
|
||||
Err(ref mut $res) => $($rest)+,
|
||||
}
|
||||
};
|
||||
(ref $res:ident => $($rest:tt)+) => {
|
||||
match $res {
|
||||
Ok(ref $res) => $($rest)+,
|
||||
Err(ref $res) => $($rest)+,
|
||||
}
|
||||
};
|
||||
(mut $res:ident => $($rest:tt)+) => {
|
||||
match $res {
|
||||
Ok(mut $res) => $($rest)+,
|
||||
Err(mut $res) => $($rest)+,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) use unwrap_either;
|
||||
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash)]
|
||||
#[repr(transparent)]
|
||||
pub struct DisjointString<'a, T: ?Sized>([&'a T]);
|
||||
|
||||
macro_rules! disjoint {
|
||||
[$($ex:expr),+] => {
|
||||
$crate::ext::DisjointString::from_array(& [$($ex),+])
|
||||
};
|
||||
}
|
||||
|
||||
impl<'a, T: ?Sized> DisjointString<'a, T>
|
||||
where T: fmt::Display
|
||||
{
|
||||
#[inline]
|
||||
pub const fn from_array<'o: 'a, const N: usize>(strings: &'o [&'a T; N]) -> &'o Self
|
||||
{
|
||||
Self::new(strings.as_slice())
|
||||
}
|
||||
#[inline]
|
||||
pub const fn new<'o: 'a>(strings: &'o [&'a T]) -> &'o Self
|
||||
{
|
||||
// SAFETY: Transparent newtype wrapper over `[&'a T]`
|
||||
unsafe {
|
||||
std::mem::transmute(strings)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
impl<'a, T: ?Sized> DisjointString<'a, T>
|
||||
{
|
||||
#[inline]
|
||||
pub const fn len(&self) -> usize
|
||||
{
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn iter(&self) -> impl Iterator<Item = &T> + ExactSizeIterator + std::iter::FusedIterator + std::iter::DoubleEndedIterator
|
||||
{
|
||||
self.0.iter().map(|&x| x)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn into_iter<'o: 'a>(&'o self) -> impl Iterator<Item = &'a T> + ExactSizeIterator + std::iter::FusedIterator + std::iter::DoubleEndedIterator + 'o
|
||||
{
|
||||
self.0.into_iter().map(|&x|x)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: ?Sized> AsRef<[&'a T]> for DisjointString<'a, T>
|
||||
{
|
||||
#[inline]
|
||||
fn as_ref(&self) -> &[&'a T]
|
||||
{
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: ?Sized> fmt::Display for DisjointString<'a, T>
|
||||
where T: fmt::Display
|
||||
{
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
for &s in &self.0 {
|
||||
s.fmt(f)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) use disjoint;
|
@ -0,0 +1,5 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
pub mod re;
|
||||
pub mod text;
|
||||
|
@ -1,153 +0,0 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
mod re;
|
||||
mod text;
|
||||
mod ext; use ext::*;
|
||||
|
||||
use color_eyre::{
|
||||
eyre::{
|
||||
self,
|
||||
eyre,
|
||||
WrapErr as _,
|
||||
},
|
||||
SectionExt as _, Help as _,
|
||||
};
|
||||
|
||||
fn initialise() -> eyre::Result<()>
|
||||
{
|
||||
color_eyre::install()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn print_groups<'a, S: ?Sized, G, T: 'a, I>(to: &mut S, g: G, groups: I) -> std::io::Result<()>
|
||||
where S: std::io::Write,
|
||||
G: IntoIterator<Item = &'a Option<T>> + Clone + Copy, // NOTE: Copy bound to ensure we're not accidentally doing deep clones of `g`.
|
||||
//G: std::ops::Index<usize>, G::Output: std::borrow::Borrow<Option<T>>,
|
||||
T: std::borrow::Borrow<str>,
|
||||
I: IntoIterator<Item: std::borrow::Borrow<usize>/*, IntoIter: ExactSizeIterator*/>,
|
||||
{
|
||||
use std::borrow::Borrow;
|
||||
let mut first = true;
|
||||
for group in groups.into_iter() {
|
||||
let group = group.borrow();
|
||||
// // Moved to into match group (skipping invalid groups.)
|
||||
// if !first {
|
||||
// write!(to, "\t")?;
|
||||
// }
|
||||
let print_delim = || first.then_some("").unwrap_or("\t"); // If it's not the first iteration, print `\t`.
|
||||
match g.into_iter().nth(*group) {
|
||||
Some(None) => write!(to, "{}", print_delim()),
|
||||
Some(Some(g)) => write!(to, "{}{}", print_delim(), g.borrow()),
|
||||
|
||||
//TODO: What should be the behaviour of a non-existent group index here? (NOTE: This now corresponds to the previous `g.len() > group` check in caller.) // (NOTE: The original behaviour is to just ignore groups that are out of range entirely (i.e. no printing, no delimit char, no error,) maybe treat non-existent groups as non-matched groups and *just* print the delim char?)
|
||||
// (NOTE: Moved out of branch, see above ^) // None if !first => write!(to, "\t"),
|
||||
// XXX: Should this do what it does now...? Or should it `break` to prevent the checking for more groups...? Print a warning maybe...?
|
||||
None => {
|
||||
eprintln!("Warning: Invalid group index {}!", group);
|
||||
continue; // Do not set `first = false` if it was an invalid index.
|
||||
//Ok(())
|
||||
},
|
||||
}?;
|
||||
first = false;
|
||||
}
|
||||
// If `first == true`, no groups were printed, so we do not print the new-line.
|
||||
if !first {
|
||||
to.write_all(b"\n")
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> eyre::Result<()>
|
||||
{
|
||||
initialise().wrap_err("Fatal: Failed to install panic handle")?;
|
||||
|
||||
//let cli = args::parse_cli();//.wrap_err("Error parsing command-line arguments")?;
|
||||
|
||||
//eprintln!("{:#?}", cli);
|
||||
// return Ok(());
|
||||
|
||||
let args: re::FrozenVec<re::FrozenString> = std::env::args().map(String::into_boxed_str).collect();
|
||||
|
||||
if args.len() < 4 {
|
||||
use owo_colors::OwoColorize;
|
||||
use owo_colors::Stream;
|
||||
|
||||
macro_rules! colour {
|
||||
(in $name:ident: $fmt:expr => $col:ident) => {
|
||||
$fmt.if_supports_color(Stream::$name, |text| text.$col())
|
||||
};
|
||||
($fmt:expr => $col:ident) => {
|
||||
colour!(in Stdout: $fmt => $col)
|
||||
}
|
||||
}
|
||||
|
||||
println!("rematch v{}: Regular-expression group matcher", env!("CARGO_PKG_VERSION"));
|
||||
println!("");
|
||||
println!("Usage: {} <str> <regex> <group>...", args[0]);
|
||||
println!("Pass `-' as `<str>' to read lines from stdin");
|
||||
println!("");
|
||||
println!("Enabled Features:");
|
||||
if cfg!(feature="perl") {
|
||||
println!("{}\t\t\tEnable PCRE2 (extended) regular-expressions.\n\t\t\tNote that PCRE2 regex engine matches on *bytes*, not *characters*; meaning if a match cuts a vlid UTF8 codepoint into an invalid one, the output will replace the invalid characters with U+FFFD REPLACEMENT CHARACTER.", colour!(disjoint!["+", "perl"] => bright_red));
|
||||
} else {
|
||||
println!("{}\t\t\tPCRE2 (extended) features are disabled; a faster but less featureful regular expression engine (that matches on UTF8 strings instead of raw bytes) is used instead.", colour!(disjoint!["-", "perl"] => blue));
|
||||
}
|
||||
if cfg!(feature="unstable") {
|
||||
println!("{}\t\tUnstable optimisations evailable & enabled for build.", colour!(disjoint!["+", "unstable"] => red));
|
||||
} else {
|
||||
println!("{}\t\tUnstable optimisations disabled / not available for build.", colour!(disjoint!["-", "unstable"] => bright_blue));
|
||||
}
|
||||
std::process::exit(1)
|
||||
} else {
|
||||
let re = re::Regex::compile(&args[2])?;
|
||||
let text = &args[1];
|
||||
|
||||
let groups = &args[3..];
|
||||
|
||||
if groups.len() < 1 {
|
||||
eprintln!("Warning: No capture groups requested.");
|
||||
// NOTE: Unexpected branch...
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let groups = groups.iter().enumerate()
|
||||
.map(|(i, x)| x.parse()
|
||||
.with_section(|| format!("{:?}", groups).header("Groups specified were"))
|
||||
.with_section(|| x.clone().header("Specified capture group index was"))
|
||||
.with_section(move || i.header("Argument index in provided groups")))
|
||||
.collect::<Result<Box<[usize]>, _>>()
|
||||
.wrap_err("Invalid group index specified")?;
|
||||
|
||||
//TODO: XXX: How to handle multiple groups in `stdin_lines()` case?
|
||||
//let group = groups[0]; //args[3].parse().expect("Invalid group number.");
|
||||
|
||||
use std::io::Write;
|
||||
let mut stdout = std::io::stdout();
|
||||
|
||||
let stdout = if &text[..] == "-" {
|
||||
let mut stdout = std::io::BufWriter::new(stdout.lock());
|
||||
text::stdin_lines(|text| -> eyre::Result<bool> {
|
||||
match re.exec(&text)? {
|
||||
Some(g) /*if g.len() > group*/ => // NOTE: This check branch has now been moved into `print_groups()`
|
||||
print_groups(&mut stdout, &g, &groups)?, //println!("{}", &g[group]),
|
||||
_ => (),
|
||||
}
|
||||
Ok(true)
|
||||
})?;
|
||||
|
||||
Some(stdout)
|
||||
} else {
|
||||
|
||||
match re.exec(&text)? {
|
||||
Some(g) /*if g.len() > group*/ => print_groups(&mut stdout, &g[..], &groups)?,//println!("{}", &g.nth(group).unwrap().map(|x| x.as_ref()).unwrap_or("")),
|
||||
_ => (),
|
||||
}
|
||||
|
||||
None
|
||||
}.ok_or_else(move || stdout);
|
||||
unwrap_either!(mut stdout => stdout.flush()).unwrap();
|
||||
}
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in new issue