Compare commits
4 Commits
Author | SHA1 | Date |
---|---|---|
|
937e3f6579 | 2 weeks ago |
|
9c1a627ee0 | 4 years ago |
|
a43a7360c9 | 4 years ago |
|
012e15e1bc | 4 years ago |
@ -1,30 +1,21 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "rematch"
|
name = "rematch"
|
||||||
version = "0.3.2"
|
version = "2.0.0"
|
||||||
authors = ["Avril <flanchan@cumallover.me>"]
|
authors = ["Avril <flanchan@cumallover.me>"]
|
||||||
edition = "2024"
|
edition = "2018"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
|
# We have multiple "crates" in this project, so fat LTO and single codegen for better inlining and cross-crate optimisation.
|
||||||
opt-level = 3
|
opt-level = 3
|
||||||
lto = true
|
lto = "fat"
|
||||||
codegen-units = 1
|
codegen-units = 1
|
||||||
panic = "unwind"
|
panic = "unwind"
|
||||||
strip = true
|
|
||||||
|
|
||||||
[profile.symbols]
|
|
||||||
inherits = "release"
|
|
||||||
strip = false
|
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default= ["perl", "unstable"]
|
perl = ["pcre"]
|
||||||
|
|
||||||
perl = ["dep:pcre2"]
|
|
||||||
unstable = ["regex/unstable"]
|
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
pcre2 = { version = "0.2.9", optional = true }
|
regex = "1"
|
||||||
regex = { version = "1.11.1", features = ["use_std"] }
|
pcre = { version = "0.2.3", optional = true }
|
||||||
color-eyre = { version = "0.6.3", default-features = false, features = ["track-caller"] }
|
|
||||||
owo-colors = { version = "3.5.0", features = ["alloc", "supports-colors"] }
|
|
||||||
|
@ -0,0 +1,35 @@
|
|||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
use rematch::*;
|
||||||
|
|
||||||
|
fn main() -> Result<(), Box<dyn std::error::Error>>
|
||||||
|
{
|
||||||
|
let args: Vec<String> = std::env::args().collect();
|
||||||
|
|
||||||
|
if args.len() < 4 {
|
||||||
|
println!("Usage: {} <str> <regex> <group>", args[0]);
|
||||||
|
println!("Pass `-' as `<str>' to read lines from stdin");
|
||||||
|
std::process::exit(1);
|
||||||
|
} else {
|
||||||
|
let re = re::Regex::compile(&args[2])?;
|
||||||
|
let text = &args[1];
|
||||||
|
let group: usize = args[3].parse().expect("Invalid group number.");
|
||||||
|
|
||||||
|
if text == "-" {
|
||||||
|
text::stdin_lines(|text| -> Result<bool, re::Error> {
|
||||||
|
match re.exec(&text)? {
|
||||||
|
Some(g) if g.len() > group => println!("{}", &g[group]),
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
Ok(true)
|
||||||
|
})?;
|
||||||
|
} else {
|
||||||
|
|
||||||
|
match re.exec(&text)? {
|
||||||
|
Some(g) if g.len() > group => println!("{}", &g[group]),
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -0,0 +1,9 @@
|
|||||||
|
use rematch::*;
|
||||||
|
|
||||||
|
pub mod op;
|
||||||
|
|
||||||
|
fn main()
|
||||||
|
{
|
||||||
|
|
||||||
|
println!("Hello world!");
|
||||||
|
}
|
@ -0,0 +1,211 @@
|
|||||||
|
//! Operations traits and definitions
|
||||||
|
use super::*;
|
||||||
|
use std::marker::{
|
||||||
|
Send, Sync,
|
||||||
|
};
|
||||||
|
use std::io;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::any::Any;
|
||||||
|
|
||||||
|
/// Format of an operation.
|
||||||
|
/// References the input string for fields, which need to be extracted.
|
||||||
|
///
|
||||||
|
/// Follows the convention `<opcode><sep><find><sep><replace><sep><flags>`.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Format<'a>
|
||||||
|
{
|
||||||
|
// TODO: `_line` fields represent an array of char opcode/flags. There should be methods to handle these outside of the actual slice
|
||||||
|
opcode_line: &'a str,
|
||||||
|
pub seperator: char,
|
||||||
|
pub find: &'a str,
|
||||||
|
pub replace: &'a str,
|
||||||
|
flags_line: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An `OperationSource` defines and handles an operation on a specific `Format`.
|
||||||
|
///
|
||||||
|
/// The source should produce an `Operation` that handles the `input` format the source it is given.
|
||||||
|
/// Setup can include things like compiling the regex, preparing the replacement pattern from the `replace` string in the `Format`, etc.
|
||||||
|
pub trait OperationSource
|
||||||
|
{
|
||||||
|
const OPCODE: Option<char>;
|
||||||
|
|
||||||
|
type Error: std::error::Error;
|
||||||
|
type Operation: Operation;
|
||||||
|
|
||||||
|
/// Test `Format` to see if operation matches.
|
||||||
|
///
|
||||||
|
/// This method should check that the opcode(s) and provided by the user are for this operation, and return `Ok(true)` if they are, `Ok(false)` if not. If it/they is/are intended for this operation but are invalid, an error can be returned to the user.
|
||||||
|
///
|
||||||
|
/// This methoud should not validate flags of perform any other validation of `input`. That should be handled by `create_operation()`, as an error returned from this function will still attempt to look for other operations that may be valid.
|
||||||
|
///
|
||||||
|
/// This happens after `OPCODE` is checked, if there is a predefined opcode.
|
||||||
|
/// If not, then this is always called.
|
||||||
|
fn visit_operation(&self, input: &Format<'_>) -> Result<bool, Self::Error>;
|
||||||
|
/// Create an operation of this type for this specific `Format`.
|
||||||
|
///
|
||||||
|
/// The source should handle any setup of the returned operation before the operation is given any input to match/replace on.
|
||||||
|
/// If the format is invalid for this defined operation, an error can be returned to the user.
|
||||||
|
///
|
||||||
|
/// This method will be called only after `visit_operation` returns `Ok(true)`. The opcode can be assumed to be correct (as long as `visit_operation()` is correct).
|
||||||
|
/// This method should validate flags and the input/output format before returning `Ok(...)`.
|
||||||
|
/// An error returned from this function will terminate the program with an error code, as this operation has already been validated as the one the user selected in `visit_operation()`.
|
||||||
|
// TODO: Should this be mutable?
|
||||||
|
fn create_operation(&mut self, input: Format<'_>) -> Result<Self::Operation, Self::Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An operation produced from an `OperationSource` for that source's opcode and the input `Format` the source was given to create this.
|
||||||
|
///
|
||||||
|
/// An operation handles the actual find/replace work in the input stream(s) and producing the output stream(s).
|
||||||
|
/// It is instantiated and set up for the find and replace strings provided by the user by its corresponding `OperationSource`.
|
||||||
|
/// An operation should not handle those things itself, it should begin with a state set up for working on the stream(s).
|
||||||
|
///
|
||||||
|
/// Objects of this trait are used via dynamic dispatch through the wrapper trait `OperationDyn` and should have a `'static` lifetime. They should also be `Send` and `Sync`.
|
||||||
|
pub trait Operation: Send + Sync
|
||||||
|
{
|
||||||
|
/// Define a downcastable type that can be used to hold mutable state between processing of streams in sequence of the *same input*.
|
||||||
|
/// This is optional, if state is not needed between split input streams you can set this to the type `NoState`.
|
||||||
|
///
|
||||||
|
/// The type must be `Send`, as it is possible for the operation instance to be moved between threads between sequential runs.
|
||||||
|
/// The state is only reused for input streams from the same input. As processing unrelated streams (different inputs) can happen concurrently, each new input has a new state object created for it (see `create_state_for()`.)
|
||||||
|
type State: Any + Send + 'static /* = NoState */;
|
||||||
|
|
||||||
|
/// Create a state for this input stream.
|
||||||
|
///
|
||||||
|
/// Since `process_stream()` can be called concurrently, it does not have a mutable receiver for `self`.
|
||||||
|
/// To store state that may need to be mutated between input streams of the same input (called sequentially with the same state), you can define a downcastable type for your state and construct it here before the first (and usually only) stream of this input is sent to `process_stream()`.
|
||||||
|
///
|
||||||
|
/// By default, there is no state.
|
||||||
|
#[inline] fn create_state_for<T: io::Read>(&mut self, _stream: T) -> Option<Self::State>
|
||||||
|
{
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This function is responsible for processing a whole input -> output pipeline. It may be ran concurrently.
|
||||||
|
///
|
||||||
|
/// In this function, you should do tokenise the input as needed (usually according to `flags` and the needs of the operation itself), and then pass it down to `process_line()`, the output of which should then be written to `output`.
|
||||||
|
//TODO: Should we handle multiple streams in the function itself or have the program call the function for each stream?
|
||||||
|
// Should this be `&mut self`? It would prevent us from calling it concurrently on mutable streams (as objects of this trait must be `Sync`.)
|
||||||
|
// Should we even be working on streams at all? Or should this trait be working on already-split input? I don't think it should.
|
||||||
|
fn process_stream<I: io::Read, O: io::Write>(&self, state: Option<&mut Self::State>, input: I, output: O) -> io::Result<()>;
|
||||||
|
|
||||||
|
/// Process a single line of input.
|
||||||
|
/// This should be called from `process_stream()` when it has extracted a single span of text to match against.
|
||||||
|
/// This function should do the find+replace work.
|
||||||
|
///
|
||||||
|
/// This function may be called from outside the operation's own `process_stream()` (for single line inputs.)
|
||||||
|
fn process_line<'a, T: ?Sized + AsRef<str> + 'a>(&self, state: Option<&mut Self::State>, line: &'a T) -> Cow<'a, str>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Default type for your `Operation::State` when the operation takes no state.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum NoState{}
|
||||||
|
|
||||||
|
/// Boxed state for an `Operation`.
|
||||||
|
pub(crate) type BoxState = Box<dyn Any + Send + 'static>;
|
||||||
|
/// Mutable reference for `BoxState` or as a dynamic reference to any `Operation::State`.
|
||||||
|
pub(crate) type StateRef<'a> = &'a mut (dyn Any + Send + 'static);
|
||||||
|
|
||||||
|
/// Dynamic dispatch wrapper trait over `Operation`.
|
||||||
|
///
|
||||||
|
/// This is a convenience wrapper that is auto-implemented to allow `Operation` to use generics.
|
||||||
|
pub trait OperationDyn: Send + Sync
|
||||||
|
{
|
||||||
|
fn create_state_for_dyn(&mut self, _stream: &mut dyn io::Read) -> Option<BoxState>;
|
||||||
|
fn process_stream_dyn(&self, state: Option<StateRef<'_>>, input: &mut dyn io::Read, output: &mut dyn io::Write) -> io::Result<()>;
|
||||||
|
fn process_line_dyn<'a>(&self, state: Option<StateRef<'_>>, line: &'a str) -> Cow<'a, str>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Op: ?Sized> OperationDyn for Op
|
||||||
|
where Op: Operation + Send + Sync
|
||||||
|
{
|
||||||
|
#[inline(always)] fn create_state_for_dyn(&mut self, stream: &mut dyn io::Read) -> Option<BoxState> {
|
||||||
|
self.create_state_for(stream).map(|x| -> BoxState {Box::new(x)})
|
||||||
|
}
|
||||||
|
#[inline(always)] fn process_stream_dyn(&self, state: Option<StateRef<'_>>, input: &mut dyn io::Read, output: &mut dyn io::Write) -> io::Result<()> {
|
||||||
|
self.process_stream(state.map(|m| m.downcast_mut()).flatten(), input, output)
|
||||||
|
}
|
||||||
|
#[inline(always)] fn process_line_dyn<'a>(&self, state: Option<StateRef<'_>>, line: &'a str) -> Cow<'a, str> {
|
||||||
|
self.process_line(state.map(|m| m.downcast_mut()).flatten(), line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn _assert_obj_safe(_: &(dyn OperationDyn + Send + Sync + 'static), _: BoxOperation<'static>){}
|
||||||
|
|
||||||
|
/// A boxed operation. Operations are generated dynamically and so are dispatched on dynamically.
|
||||||
|
pub type BoxOperation<'bx> = Box<dyn OperationDyn + Send + Sync + 'bx>;
|
||||||
|
|
||||||
|
/// Generic processing functions
|
||||||
|
pub mod proc {
|
||||||
|
use super::*;
|
||||||
|
use io::BufRead;
|
||||||
|
|
||||||
|
pub fn proc_split_stream<Op: Operation,
|
||||||
|
I: io::Read,
|
||||||
|
O: io::Write,
|
||||||
|
P: AsRef<str>>
|
||||||
|
(op: &Op,
|
||||||
|
mut input: I,
|
||||||
|
pa: P,
|
||||||
|
mut output: O) -> io::Result<()>
|
||||||
|
{
|
||||||
|
let pa = pa.as_ref();
|
||||||
|
let mut buf = io::BufReader::new(input);
|
||||||
|
|
||||||
|
|
||||||
|
//TODO: Turn this into an iterator type that takes a stream, reads into a growing buffer until `P` is found, then returns the buffer up to, but not including, `P`, removes that part from the buffer, and continues.
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator that grows an internal buffer from an input stream until a substring of bytes in that buffer is found.
|
||||||
|
/// The iterator then yeilds the part of the buffer containing the pre-substring section, removes that section and the subsrtring from itself, and continues.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct StreamSplitIter<'a, T: ?Sized>
|
||||||
|
{
|
||||||
|
stream: &'a mut T,
|
||||||
|
buffer: Vec<u8>,
|
||||||
|
orig_cap: usize,
|
||||||
|
split_on: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T: ?Sized + 'a> StreamSplitIter<'a, T>
|
||||||
|
where T: io::Read
|
||||||
|
{
|
||||||
|
fn try_extend_buffer_once(&mut self) -> io::Result<&[u8]>
|
||||||
|
{
|
||||||
|
let i = self.buffer.len();
|
||||||
|
self.buffer.resize(i + self.orig_cap, 0);
|
||||||
|
let sl = {
|
||||||
|
let sl = &mut self.buffer[i..];
|
||||||
|
match self.stream.read(sl) {
|
||||||
|
Err(err) => {
|
||||||
|
// Reset buffer size
|
||||||
|
self.buffer.resize(i, 0);
|
||||||
|
return Err(err);
|
||||||
|
},
|
||||||
|
Ok(0) => {
|
||||||
|
self.buffer.resize(i, 0);
|
||||||
|
return Ok(&[]);
|
||||||
|
},
|
||||||
|
Ok(n) if n != sl.len() => {
|
||||||
|
self.buffer.resize(n, 0);
|
||||||
|
&self.buffer[i..(i+n)]
|
||||||
|
}
|
||||||
|
Ok(n) => &self.buffer[i..(i+n)],
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(sl)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T: ?Sized + 'a> Iterator for StreamSplitIter<'a, T>
|
||||||
|
where T: io::Read
|
||||||
|
{
|
||||||
|
type Item = io::Result<Vec<u8>>;
|
||||||
|
fn next(&mut self) -> Option<Self::Item>
|
||||||
|
{
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,127 +0,0 @@
|
|||||||
//! Extensions
|
|
||||||
use super::*;
|
|
||||||
use std::{
|
|
||||||
fmt,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Run an expression on an named value with a result type `Result<T, U>`.
|
|
||||||
/// Where `T` and `U` have *the same API surface* for the duration of the provided expression.
|
|
||||||
///
|
|
||||||
/// # Example
|
|
||||||
/// If there is a value `let mut value: Result<T, U>`, where `T: Write` & `U: BufWrite`;
|
|
||||||
/// the expression `value.flush()` is valid for both `T` and `U`.
|
|
||||||
/// Therefore, it can be simplified to be called as so: `unwrap_either(mut value => value.flush())`.
|
|
||||||
///
|
|
||||||
/// # Reference capture vs. `move` capture.
|
|
||||||
/// Note that by default, the identified value is **moved** *into* the expression.
|
|
||||||
/// The type of reference can be controlled by appending `ref`, `mut`, or `ref mut` to the ident.
|
|
||||||
///
|
|
||||||
/// Identifier capture table:
|
|
||||||
/// - **none** ~default~ - Capture by move, value is immutable in expression.
|
|
||||||
/// - `mut` - Capture by move, value is mutable in expression.
|
|
||||||
/// - `ref` - Capture by ref, value is immutable (`&value`) in expression.
|
|
||||||
/// - `ref mut` - Capture by mutable ref, value is mutable (`&mut value`) in expression. (__NOTE__: `value` must be defined as mutable to take a mutable reference of it.)
|
|
||||||
///
|
|
||||||
/// Essentially the same rules as any `match` branch pattern.
|
|
||||||
macro_rules! unwrap_either {
|
|
||||||
($res:ident => $($rest:tt)+) => {
|
|
||||||
match $res {
|
|
||||||
Ok(ref mut $res) => $($rest)+,
|
|
||||||
Err(ref mut $res) => $($rest)+,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
(ref mut $res:ident => $($rest:tt)+) => {
|
|
||||||
match $res {
|
|
||||||
Ok(ref mut $res) => $($rest)+,
|
|
||||||
Err(ref mut $res) => $($rest)+,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
(ref $res:ident => $($rest:tt)+) => {
|
|
||||||
match $res {
|
|
||||||
Ok(ref $res) => $($rest)+,
|
|
||||||
Err(ref $res) => $($rest)+,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
(mut $res:ident => $($rest:tt)+) => {
|
|
||||||
match $res {
|
|
||||||
Ok(mut $res) => $($rest)+,
|
|
||||||
Err(mut $res) => $($rest)+,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) use unwrap_either;
|
|
||||||
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Hash)]
|
|
||||||
#[repr(transparent)]
|
|
||||||
pub struct DisjointString<'a, T: ?Sized>([&'a T]);
|
|
||||||
|
|
||||||
macro_rules! disjoint {
|
|
||||||
[$($ex:expr),+] => {
|
|
||||||
$crate::ext::DisjointString::from_array(& [$($ex),+])
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T: ?Sized> DisjointString<'a, T>
|
|
||||||
where T: fmt::Display
|
|
||||||
{
|
|
||||||
#[inline]
|
|
||||||
pub const fn from_array<'o: 'a, const N: usize>(strings: &'o [&'a T; N]) -> &'o Self
|
|
||||||
{
|
|
||||||
Self::new(strings.as_slice())
|
|
||||||
}
|
|
||||||
#[inline]
|
|
||||||
pub const fn new<'o: 'a>(strings: &'o [&'a T]) -> &'o Self
|
|
||||||
{
|
|
||||||
// SAFETY: Transparent newtype wrapper over `[&'a T]`
|
|
||||||
unsafe {
|
|
||||||
std::mem::transmute(strings)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
impl<'a, T: ?Sized> DisjointString<'a, T>
|
|
||||||
{
|
|
||||||
#[inline]
|
|
||||||
pub const fn len(&self) -> usize
|
|
||||||
{
|
|
||||||
self.0.len()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn iter(&self) -> impl Iterator<Item = &T> + ExactSizeIterator + std::iter::FusedIterator + std::iter::DoubleEndedIterator
|
|
||||||
{
|
|
||||||
self.0.iter().map(|&x| x)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn into_iter<'o: 'a>(&'o self) -> impl Iterator<Item = &'a T> + ExactSizeIterator + std::iter::FusedIterator + std::iter::DoubleEndedIterator + 'o
|
|
||||||
{
|
|
||||||
self.0.into_iter().map(|&x|x)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T: ?Sized> AsRef<[&'a T]> for DisjointString<'a, T>
|
|
||||||
{
|
|
||||||
#[inline]
|
|
||||||
fn as_ref(&self) -> &[&'a T]
|
|
||||||
{
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T: ?Sized> fmt::Display for DisjointString<'a, T>
|
|
||||||
where T: fmt::Display
|
|
||||||
{
|
|
||||||
#[inline]
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
|
||||||
{
|
|
||||||
for &s in &self.0 {
|
|
||||||
s.fmt(f)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) use disjoint;
|
|
@ -0,0 +1,5 @@
|
|||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
pub mod re;
|
||||||
|
pub mod text;
|
||||||
|
|
@ -1,153 +0,0 @@
|
|||||||
#![allow(dead_code)]
|
|
||||||
|
|
||||||
mod re;
|
|
||||||
mod text;
|
|
||||||
mod ext; use ext::*;
|
|
||||||
|
|
||||||
use color_eyre::{
|
|
||||||
eyre::{
|
|
||||||
self,
|
|
||||||
eyre,
|
|
||||||
WrapErr as _,
|
|
||||||
},
|
|
||||||
SectionExt as _, Help as _,
|
|
||||||
};
|
|
||||||
|
|
||||||
fn initialise() -> eyre::Result<()>
|
|
||||||
{
|
|
||||||
color_eyre::install()?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn print_groups<'a, S: ?Sized, G, T: 'a, I>(to: &mut S, g: G, groups: I) -> std::io::Result<()>
|
|
||||||
where S: std::io::Write,
|
|
||||||
G: IntoIterator<Item = &'a Option<T>> + Clone + Copy, // NOTE: Copy bound to ensure we're not accidentally doing deep clones of `g`.
|
|
||||||
//G: std::ops::Index<usize>, G::Output: std::borrow::Borrow<Option<T>>,
|
|
||||||
T: std::borrow::Borrow<str>,
|
|
||||||
I: IntoIterator<Item: std::borrow::Borrow<usize>/*, IntoIter: ExactSizeIterator*/>,
|
|
||||||
{
|
|
||||||
use std::borrow::Borrow;
|
|
||||||
let mut first = true;
|
|
||||||
for group in groups.into_iter() {
|
|
||||||
let group = group.borrow();
|
|
||||||
// // Moved to into match group (skipping invalid groups.)
|
|
||||||
// if !first {
|
|
||||||
// write!(to, "\t")?;
|
|
||||||
// }
|
|
||||||
let print_delim = || first.then_some("").unwrap_or("\t"); // If it's not the first iteration, print `\t`.
|
|
||||||
match g.into_iter().nth(*group) {
|
|
||||||
Some(None) => write!(to, "{}", print_delim()),
|
|
||||||
Some(Some(g)) => write!(to, "{}{}", print_delim(), g.borrow()),
|
|
||||||
|
|
||||||
//TODO: What should be the behaviour of a non-existent group index here? (NOTE: This now corresponds to the previous `g.len() > group` check in caller.) // (NOTE: The original behaviour is to just ignore groups that are out of range entirely (i.e. no printing, no delimit char, no error,) maybe treat non-existent groups as non-matched groups and *just* print the delim char?)
|
|
||||||
// (NOTE: Moved out of branch, see above ^) // None if !first => write!(to, "\t"),
|
|
||||||
// XXX: Should this do what it does now...? Or should it `break` to prevent the checking for more groups...? Print a warning maybe...?
|
|
||||||
None => {
|
|
||||||
eprintln!("Warning: Invalid group index {}!", group);
|
|
||||||
continue; // Do not set `first = false` if it was an invalid index.
|
|
||||||
//Ok(())
|
|
||||||
},
|
|
||||||
}?;
|
|
||||||
first = false;
|
|
||||||
}
|
|
||||||
// If `first == true`, no groups were printed, so we do not print the new-line.
|
|
||||||
if !first {
|
|
||||||
to.write_all(b"\n")
|
|
||||||
} else {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() -> eyre::Result<()>
|
|
||||||
{
|
|
||||||
initialise().wrap_err("Fatal: Failed to install panic handle")?;
|
|
||||||
|
|
||||||
//let cli = args::parse_cli();//.wrap_err("Error parsing command-line arguments")?;
|
|
||||||
|
|
||||||
//eprintln!("{:#?}", cli);
|
|
||||||
// return Ok(());
|
|
||||||
|
|
||||||
let args: re::FrozenVec<re::FrozenString> = std::env::args().map(String::into_boxed_str).collect();
|
|
||||||
|
|
||||||
if args.len() < 4 {
|
|
||||||
use owo_colors::OwoColorize;
|
|
||||||
use owo_colors::Stream;
|
|
||||||
|
|
||||||
macro_rules! colour {
|
|
||||||
(in $name:ident: $fmt:expr => $col:ident) => {
|
|
||||||
$fmt.if_supports_color(Stream::$name, |text| text.$col())
|
|
||||||
};
|
|
||||||
($fmt:expr => $col:ident) => {
|
|
||||||
colour!(in Stdout: $fmt => $col)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("rematch v{}: Regular-expression group matcher", env!("CARGO_PKG_VERSION"));
|
|
||||||
println!("");
|
|
||||||
println!("Usage: {} <str> <regex> <group>...", args[0]);
|
|
||||||
println!("Pass `-' as `<str>' to read lines from stdin");
|
|
||||||
println!("");
|
|
||||||
println!("Enabled Features:");
|
|
||||||
if cfg!(feature="perl") {
|
|
||||||
println!("{}\t\t\tEnable PCRE2 (extended) regular-expressions.\n\t\t\tNote that PCRE2 regex engine matches on *bytes*, not *characters*; meaning if a match cuts a vlid UTF8 codepoint into an invalid one, the output will replace the invalid characters with U+FFFD REPLACEMENT CHARACTER.", colour!(disjoint!["+", "perl"] => bright_red));
|
|
||||||
} else {
|
|
||||||
println!("{}\t\t\tPCRE2 (extended) features are disabled; a faster but less featureful regular expression engine (that matches on UTF8 strings instead of raw bytes) is used instead.", colour!(disjoint!["-", "perl"] => blue));
|
|
||||||
}
|
|
||||||
if cfg!(feature="unstable") {
|
|
||||||
println!("{}\t\tUnstable optimisations evailable & enabled for build.", colour!(disjoint!["+", "unstable"] => red));
|
|
||||||
} else {
|
|
||||||
println!("{}\t\tUnstable optimisations disabled / not available for build.", colour!(disjoint!["-", "unstable"] => bright_blue));
|
|
||||||
}
|
|
||||||
std::process::exit(1)
|
|
||||||
} else {
|
|
||||||
let re = re::Regex::compile(&args[2])?;
|
|
||||||
let text = &args[1];
|
|
||||||
|
|
||||||
let groups = &args[3..];
|
|
||||||
|
|
||||||
if groups.len() < 1 {
|
|
||||||
eprintln!("Warning: No capture groups requested.");
|
|
||||||
// NOTE: Unexpected branch...
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let groups = groups.iter().enumerate()
|
|
||||||
.map(|(i, x)| x.parse()
|
|
||||||
.with_section(|| format!("{:?}", groups).header("Groups specified were"))
|
|
||||||
.with_section(|| x.clone().header("Specified capture group index was"))
|
|
||||||
.with_section(move || i.header("Argument index in provided groups")))
|
|
||||||
.collect::<Result<Box<[usize]>, _>>()
|
|
||||||
.wrap_err("Invalid group index specified")?;
|
|
||||||
|
|
||||||
//TODO: XXX: How to handle multiple groups in `stdin_lines()` case?
|
|
||||||
//let group = groups[0]; //args[3].parse().expect("Invalid group number.");
|
|
||||||
|
|
||||||
use std::io::Write;
|
|
||||||
let mut stdout = std::io::stdout();
|
|
||||||
|
|
||||||
let stdout = if &text[..] == "-" {
|
|
||||||
let mut stdout = std::io::BufWriter::new(stdout.lock());
|
|
||||||
text::stdin_lines(|text| -> eyre::Result<bool> {
|
|
||||||
match re.exec(&text)? {
|
|
||||||
Some(g) /*if g.len() > group*/ => // NOTE: This check branch has now been moved into `print_groups()`
|
|
||||||
print_groups(&mut stdout, &g, &groups)?, //println!("{}", &g[group]),
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
Ok(true)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
Some(stdout)
|
|
||||||
} else {
|
|
||||||
|
|
||||||
match re.exec(&text)? {
|
|
||||||
Some(g) /*if g.len() > group*/ => print_groups(&mut stdout, &g[..], &groups)?,//println!("{}", &g.nth(group).unwrap().map(|x| x.as_ref()).unwrap_or("")),
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
}.ok_or_else(move || stdout);
|
|
||||||
unwrap_either!(mut stdout => stdout.flush()).unwrap();
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
Loading…
Reference in new issue