[For feature `perl`]: Started re-work of `re` to allow dynamic dispatch for user-selection of if PCRE extensions are enabled. Fortune for rematch's current commit: Half blessing − 半吉cli-refactor
parent
0b04b94d0c
commit
9fdf0817ae
@ -0,0 +1,290 @@
|
|||||||
|
//! Arguments and Cli-parsing
|
||||||
|
use super::*;
|
||||||
|
use std::{
|
||||||
|
str,
|
||||||
|
error, fmt,
|
||||||
|
borrow::{
|
||||||
|
Borrow, Cow, ToOwned,
|
||||||
|
},
|
||||||
|
path::{
|
||||||
|
Path, PathBuf,
|
||||||
|
},
|
||||||
|
//collections::BTreeSet as Set,
|
||||||
|
};
|
||||||
|
use clap::{
|
||||||
|
Parser,
|
||||||
|
Args,
|
||||||
|
Subcommand,
|
||||||
|
ValueEnum,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// A value that may be provided, or may be deferred to be provided by `stdin` (/ written to `stdout`.)
|
||||||
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
|
||||||
|
pub enum MaybeValue<T = String>
|
||||||
|
{
|
||||||
|
Stdio,
|
||||||
|
Value(T),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> MaybeValue<T>
|
||||||
|
{
|
||||||
|
pub const STDIO_SYMBOL: &'static str = "-";
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub const fn is_stdio(&self) -> bool
|
||||||
|
{
|
||||||
|
match self {
|
||||||
|
Self::Stdio => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
pub const fn value(&self) -> Option<&T>
|
||||||
|
{
|
||||||
|
match self {
|
||||||
|
Self::Value(v) => Some(&v),
|
||||||
|
_ => None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
pub const fn has_value(&self) -> bool
|
||||||
|
{
|
||||||
|
self.value().is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert the value type to `U` (if there is one.)
|
||||||
|
///
|
||||||
|
/// e.g. to convert `let _: MaybeValue<PathBuf> = MaybeString::map_into();`
|
||||||
|
#[inline]
|
||||||
|
pub fn map_into<U: From<T>>(self) -> MaybeValue<U>
|
||||||
|
{
|
||||||
|
match self {
|
||||||
|
Self::Value(v) => MaybeValue::Value(v.into()),
|
||||||
|
Self::Stdio => MaybeValue::Stdio,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Consume into the `Value(T)` if possible, if not, return `Err(Self)`.
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub fn try_into_value(self) -> Result<T, Self>
|
||||||
|
{
|
||||||
|
match self {
|
||||||
|
x @ Self::Stdio => Err(x),
|
||||||
|
Self::Value(v) => Ok(v),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: AsRef<str>> AsRef<str> for MaybeValue<T>
|
||||||
|
{
|
||||||
|
#[inline]
|
||||||
|
fn as_ref(&self) -> &str
|
||||||
|
{
|
||||||
|
match self {
|
||||||
|
Self::Stdio => Self::STDIO_SYMBOL,
|
||||||
|
Self::Value(v) => v.as_ref(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Borrow<str>> Borrow<str> for MaybeValue<T>
|
||||||
|
{
|
||||||
|
#[inline]
|
||||||
|
fn borrow(&self) -> &str
|
||||||
|
{
|
||||||
|
match self {
|
||||||
|
Self::Stdio => Self::STDIO_SYMBOL,
|
||||||
|
Self::Value(v) => v.borrow(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
impl<T> Default for MaybeValue<T>
|
||||||
|
{
|
||||||
|
#[inline]
|
||||||
|
fn default() -> Self
|
||||||
|
{
|
||||||
|
Self::Stdio
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Into<String>> MaybeValue<T>
|
||||||
|
{
|
||||||
|
#[inline]
|
||||||
|
pub fn into_string(self) -> Cow<'static, str>
|
||||||
|
{
|
||||||
|
match self {
|
||||||
|
Self::Value(v) => Cow::Owned(v.into()),
|
||||||
|
Self::Stdio => Cow::Borrowed(Self::STDIO_SYMBOL),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Into<PathBuf>> MaybeValue<T>
|
||||||
|
{
|
||||||
|
#[inline]
|
||||||
|
pub fn into_path(self) -> Cow<'static, Path>
|
||||||
|
{
|
||||||
|
match self {
|
||||||
|
Self::Value(v) => Cow::Owned(v.into()),
|
||||||
|
Self::Stdio => Cow::Borrowed(Path::new(Self::STDIO_SYMBOL)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Into<PathBuf>> From<MaybeValue<T>> for Box<Path>
|
||||||
|
{
|
||||||
|
#[inline]
|
||||||
|
fn from(value: MaybeValue<T>) -> Self {
|
||||||
|
value.into_path().into_owned().into_boxed_path()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<T: Into<String>> From<MaybeValue<T>> for Box<str>
|
||||||
|
{
|
||||||
|
#[inline]
|
||||||
|
fn from(value: MaybeValue<T>) -> Self {
|
||||||
|
value.into_string().into_owned().into_boxed_str()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Into<String>> From<MaybeValue<T>> for Cow<'static, str>
|
||||||
|
{
|
||||||
|
fn from(from: MaybeValue<T>) -> Self
|
||||||
|
{
|
||||||
|
from.into_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
impl<T: From<String>> From<String> for MaybeValue<T>
|
||||||
|
{
|
||||||
|
#[inline]
|
||||||
|
fn from(from: String) -> Self
|
||||||
|
{
|
||||||
|
match &from[..] {
|
||||||
|
Self::STDIO_SYMBOL => Self::Stdio,
|
||||||
|
_ => Self::Value(from.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> str::FromStr for MaybeValue<T>
|
||||||
|
where T: str::FromStr {
|
||||||
|
type Err = T::Err;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
match s {
|
||||||
|
Self::STDIO_SYMBOL => Ok(Self::Stdio),
|
||||||
|
s => T::from_str(s).map(Self::Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// User-provied configuration of how the program should behave here
|
||||||
|
#[derive(Debug, Args)]
|
||||||
|
pub struct Config
|
||||||
|
{
|
||||||
|
/// Use the PCRE (JS-like) extended regular expression compiler
|
||||||
|
///
|
||||||
|
/// # Feature difference
|
||||||
|
/// By default, the expression syntax does not support things like negative lookahead and other backtrack-requiring regex features.
|
||||||
|
///
|
||||||
|
/// ## Efficiency
|
||||||
|
/// Note that non-PCRE expressions are more efficient in general, and can also enable parallel processing of strings where there are many (e.g. a long list of lines from `stdin` can be matched against in parallel.)
|
||||||
|
///
|
||||||
|
/// It is ill-advised to enable PCRE on large inputs unless those features are required.
|
||||||
|
//TODO: Should we have PCRE on by default or not...? I think we should maybe have it on by default if the feature is enabled... But that will mess with input parallelism... XXX: Perhaps we can auto-detect if to use PCRE or not (e.g. try compiling to regex first, then PCRE if that fails?)
|
||||||
|
#[arg(short, long)]
|
||||||
|
#[cfg(feature="perl")] //XXX: Do we want this option to be feature-gated? Or should we fail with error `if (! cfg!(feature="perl")) && self.extended)`? I think the latter would make things more easily (since the Regex engine gates PCRE-compilation transparently to the API user [see `crate::re::Regex`], we don't need to gate it this way outside of `re`, if we remove this gate we can just use `cfg!()` everywhere here which makes things **MUCH** cleaner..) It also means the user of a non-PCRE build will at least know why their PCRE flag is failing and that it can be built with the "perl" feature, instead of it being *totally* invisible to the user if the feature is off.
|
||||||
|
extended: bool,
|
||||||
|
|
||||||
|
/// Delimit read input/output strings from/to `stdin`/`stdout` by NUL ('\0') characters instead of newlines.
|
||||||
|
///
|
||||||
|
/// This only affects the output of each string's match groups, not the groups themselves, those will still be delimited by TAB literals in the output.
|
||||||
|
#[arg(short='0', long)]
|
||||||
|
pub zero: bool, //XXX: Add `--field=`/`--ifs` option, put these in same group. Maybe add `--delimit-groups=` to change the group delimiter from `\t` to user-specified value.
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Config
|
||||||
|
{
|
||||||
|
/// Whether it is requested to use PCRE regex instead of regular regex.
|
||||||
|
///
|
||||||
|
/// # Interaction with feature gating of ~actual~ PCRE support via `feature="perl"`
|
||||||
|
/// Note that if the "perl" feature is not enabled, this may still return `true`.
|
||||||
|
/// If the user requests PCRE where it is not available, the caller should return an error/panic to the user telling her that.
|
||||||
|
#[inline]
|
||||||
|
pub fn use_pcre(&self) -> bool
|
||||||
|
{
|
||||||
|
#![allow(unreachable_code)]
|
||||||
|
#[cfg(feature="perl")] return self.extended; //TODO: See above comment on un-gating `self.extended`
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A string value that may be provided to the CLI, or delegated to `stdio`.
|
||||||
|
pub type MaybeString = MaybeValue<Box<str>>;
|
||||||
|
/// A path that may represent an `stdio` file-descriptor instead of a named file.
|
||||||
|
pub type MaybePath = MaybeValue<Box<Path>>;
|
||||||
|
|
||||||
|
/// `rematch` is a simple command-line tool for matching & printing capture groups of an input string(s) against a regular expression.
|
||||||
|
///
|
||||||
|
/// The input string(s) can be provided in the command-line, or they can be provided as line delimited (by default) stream from `stdin`.
|
||||||
|
#[derive(Debug, Parser)]
|
||||||
|
#[command(name = env!("CARGO_PKG_NAME"), version, about, long_about)]
|
||||||
|
pub struct Cli
|
||||||
|
{
|
||||||
|
/// Configuration of the execution
|
||||||
|
#[command(flatten)]
|
||||||
|
pub config: Config,
|
||||||
|
|
||||||
|
//XXX: Should we make these fields public?
|
||||||
|
/// The input string to use, or `-` to read from stdin.
|
||||||
|
//TODO: Support multiple input strings in non-`stdin` case too. (XXX: How should this be handled...?)
|
||||||
|
string: MaybeString,
|
||||||
|
/// The regular expression to match `string` on.
|
||||||
|
regex: String,
|
||||||
|
/// The regex capture group indecies to print when matches on `string`.
|
||||||
|
#[arg(required= true, trailing_var_arg = true, allow_hyphen_values = false, num_args=1..)]
|
||||||
|
//TODO: Allow ranges & fallible captures, so lines that match group 1 but not 2 will not cause output failure if given `1 2?` but will if given `1 2` (XXX: Is this actually meaningful/possible? Can we do this at all? I'm pretty sure `/(?:(.))?/` still creates an (empty) group? So perhaps, syntax for failing on *empty* group matches...? like, `1! 2` for "group #1 *required*, group #2 is not requested?")
|
||||||
|
groups: Vec<usize>, // TODO: How to dedup (XXX: Do we want to de-dup? Maybe the user wants group `1` twice? I think it's fine (also we need to preserve user ordering of group indecied))
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Cli {
|
||||||
|
/// Get the input string to match on
|
||||||
|
///
|
||||||
|
/// If the requested input is `stdin`, `None` is returned.
|
||||||
|
#[inline]
|
||||||
|
pub fn input_string(&self) -> Option<&str>
|
||||||
|
{
|
||||||
|
self.string.value().map(AsRef::as_ref)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the string to build the regular expression from
|
||||||
|
pub fn regex_string(&self) -> &str
|
||||||
|
{
|
||||||
|
&self.regex[..]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the match group(s) to print in the output
|
||||||
|
#[inline]
|
||||||
|
pub fn groups(&self) -> &[usize]
|
||||||
|
{
|
||||||
|
&self.groups[..]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the number of match groups requested.
|
||||||
|
#[inline]
|
||||||
|
pub fn num_groups(&self) -> usize
|
||||||
|
{
|
||||||
|
self.groups.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the command-line arguments passed to the program
|
||||||
|
pub fn parse_cli() -> Cli
|
||||||
|
{
|
||||||
|
clap::Parser::parse()
|
||||||
|
}
|
Loading…
Reference in new issue