From a43a7360c9082ff74c138e2e8802dbed5df973c6 Mon Sep 17 00:00:00 2001 From: Avril Date: Tue, 24 Aug 2021 10:08:24 +0100 Subject: [PATCH] Added Operation and related traits for defining processes to operate on inputs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moved source structure into subdirectory. Fortune for rematch's current commit: Middle blessing − 中吉 --- src/bin/repatch.rs | 6 -- src/bin/repatch/main.rs | 9 +++ src/bin/repatch/op.rs | 136 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 6 deletions(-) delete mode 100644 src/bin/repatch.rs create mode 100644 src/bin/repatch/main.rs create mode 100644 src/bin/repatch/op.rs diff --git a/src/bin/repatch.rs b/src/bin/repatch.rs deleted file mode 100644 index 95bebe5..0000000 --- a/src/bin/repatch.rs +++ /dev/null @@ -1,6 +0,0 @@ -use rematch::*; - -fn main() -{ - println!("Hello world!"); -} diff --git a/src/bin/repatch/main.rs b/src/bin/repatch/main.rs new file mode 100644 index 0000000..20c505a --- /dev/null +++ b/src/bin/repatch/main.rs @@ -0,0 +1,9 @@ +use rematch::*; + +pub mod op; + +fn main() +{ + + println!("Hello world!"); +} diff --git a/src/bin/repatch/op.rs b/src/bin/repatch/op.rs new file mode 100644 index 0000000..0a9f54f --- /dev/null +++ b/src/bin/repatch/op.rs @@ -0,0 +1,136 @@ +//! Operations traits and definitions +use super::*; +use std::marker::{ + Send, Sync, +}; +use std::io; +use std::borrow::Cow; +use std::any::Any; + +/// Format of an operation. +/// References the input string for fields, which need to be extracted. +/// +/// Follows the convention ``. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Format<'a> +{ + // TODO: `_line` fields represent an array of char opcode/flags. There should be methods to handle these outside of the actual slice + opcode_line: &'a str, + pub seperator: char, + pub find: &'a str, + pub replace: &'a str, + flags_line: &'a str, +} + +/// An `OperationSource` defines and handles an operation on a specific `Format`. +/// +/// The source should produce an `Operation` that handles the `input` format the source it is given. +/// Setup can include things like compiling the regex, preparing the replacement pattern from the `replace` string in the `Format`, etc. +pub trait OperationSource +{ + const OPCODE: Option; + + type Error: std::error::Error; + type Operation: Operation; + + /// Test `Format` to see if operation matches. + /// + /// This method should check that the opcode(s) and provided by the user are for this operation, and return `Ok(true)` if they are, `Ok(false)` if not. If it/they is/are intended for this operation but are invalid, an error can be returned to the user. + /// + /// This methoud should not validate flags of perform any other validation of `input`. That should be handled by `create_operation()`, as an error returned from this function will still attempt to look for other operations that may be valid. + /// + /// This happens after `OPCODE` is checked, if there is a predefined opcode. + /// If not, then this is always called. + fn visit_operation(&self, input: &Format<'_>) -> Result; + /// Create an operation of this type for this specific `Format`. + /// + /// The source should handle any setup of the returned operation before the operation is given any input to match/replace on. + /// If the format is invalid for this defined operation, an error can be returned to the user. + /// + /// This method will be called only after `visit_operation` returns `Ok(true)`. The opcode can be assumed to be correct (as long as `visit_operation()` is correct). + /// This method should validate flags and the input/output format before returning `Ok(...)`. + /// An error returned from this function will terminate the program with an error code, as this operation has already been validated as the one the user selected in `visit_operation()`. + // TODO: Should this be mutable? + fn create_operation(&mut self, input: Format<'_>) -> Result; +} + +/// An operation produced from an `OperationSource` for that source's opcode and the input `Format` the source was given to create this. +/// +/// An operation handles the actual find/replace work in the input stream(s) and producing the output stream(s). +/// It is instantiated and set up for the find and replace strings provided by the user by its corresponding `OperationSource`. +/// An operation should not handle those things itself, it should begin with a state set up for working on the stream(s). +/// +/// Objects of this trait are used via dynamic dispatch through the wrapper trait `OperationDyn` and should have a `'static` lifetime. They should also be `Send` and `Sync`. +pub trait Operation: Send + Sync +{ + /// Define a downcastable type that can be used to hold mutable state between processing of streams in sequence of the *same input*. + /// This is optional, if state is not needed between split input streams you can set this to the type `NoState`. + /// + /// The type must be `Send`, as it is possible for the operation instance to be moved between threads between sequential runs. + /// The state is only reused for input streams from the same input. As processing unrelated streams (different inputs) can happen concurrently, each new input has a new state object created for it (see `create_state_for()`.) + type State: Any + Send + 'static; + + /// Create a state for this input stream. + /// + /// Since `process_stream()` can be called concurrently, it does not have a mutable receiver for `self`. + /// To store state that may need to be mutated between input streams of the same input (called sequentially with the same state), you can define a downcastable type for your state and construct it here before the first (and usually only) stream of this input is sent to `process_stream()`. + /// + /// By default, there is no state. + #[inline] fn create_state_for(&mut self, _stream: T) -> Option + { + None + } + + /// This function is responsible for processing a whole input -> output pipeline. It may be ran concurrently. + /// + /// In this function, you should do tokenise the input as needed (usually according to `flags` and the needs of the operation itself), and then pass it down to `process_line()`, the output of which should then be written to `output`. + //TODO: Should we handle multiple streams in the function itself or have the program call the function for each stream? + // Should this be `&mut self`? It would prevent us from calling it concurrently on mutable streams (as objects of this trait must be `Sync`.) + // Should we even be working on streams at all? Or should this trait be working on already-split input? I don't think it should. + fn process_stream(&self, state: Option<&mut Self::State>, input: I, output: O) -> io::Result<()>; + + /// Process a single line of input. + /// This should be called from `process_stream()` when it has extracted a single span of text to match against. + /// This function should do the find+replace work. + /// + /// This function may be called from outside the operation's own `process_stream()` (for single line inputs.) + fn process_line<'a, T: ?Sized + AsRef + 'a>(&self, state: Option<&mut Self::State>, line: &'a T) -> Cow<'a, str>; +} + +/// Default type for your `Operation::State` when the operation takes no state. +#[derive(Debug)] +pub enum NoState{} + +/// Boxed state for an `Operation`. +pub(crate) type BoxState = Box; +/// Mutable reference for `BoxState` or as a dynamic reference to any `Operation::State`. +pub(crate) type StateRef<'a> = &'a mut (dyn Any + Send + 'static); + +/// Dynamic dispatch wrapper trait over `Operation`. +/// +/// This is a convenience wrapper that is auto-implemented to allow `Operation` to use generics. +pub trait OperationDyn: Send + Sync +{ + fn create_state_for_dyn(&mut self, _stream: &mut dyn io::Read) -> Option; + fn process_stream_dyn(&self, state: Option>, input: &mut dyn io::Read, output: &mut dyn io::Write) -> io::Result<()>; + fn process_line_dyn<'a>(&self, state: Option>, line: &'a str) -> Cow<'a, str>; +} + +impl OperationDyn for Op +where Op: Operation + Send + Sync +{ + #[inline(always)] fn create_state_for_dyn(&mut self, stream: &mut dyn io::Read) -> Option { + self.create_state_for(stream).map(|x| -> BoxState {Box::new(x)}) + } + #[inline(always)] fn process_stream_dyn(&self, state: Option>, input: &mut dyn io::Read, output: &mut dyn io::Write) -> io::Result<()> { + self.process_stream(state.map(|m| m.downcast_mut()).flatten(), input, output) + } + #[inline(always)] fn process_line_dyn<'a>(&self, state: Option>, line: &'a str) -> Cow<'a, str> { + self.process_line(state.map(|m| m.downcast_mut()).flatten(), line) + } +} + +fn _assert_obj_safe(_: &(dyn OperationDyn + Send + Sync + 'static), _: BoxOperation<'static>){} + +/// A boxed operation. Operations are generated dynamically and so are dispatched on dynamically. +pub type BoxOperation<'bx> = Box;