diff --git a/Cargo.toml b/Cargo.toml index 0ad5f3b..d7b82b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.6.0" +version = "0.6.1" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/markov.toml b/markov.toml index e2064b8..df5fde0 100644 --- a/markov.toml +++ b/markov.toml @@ -6,4 +6,5 @@ max_gen_size = 256 trust_x_forwarded_for = false [filter] -exclude = "<>)([]/" +inbound = "<>)([]/" +outbound = "*" diff --git a/src/config.rs b/src/config.rs index ebae65e..a820eb8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -32,18 +32,28 @@ pub struct Config #[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Hash, Serialize, Deserialize)] pub struct FilterConfig { - exclude: String, - + inbound: String, + #[serde(default)] + outbound: String, } impl FilterConfig { - pub fn get_filter(&self) -> sanitise::filter::Filter + pub fn get_inbound_filter(&self) -> sanitise::filter::Filter + { + let filt: sanitise::filter::Filter = self.inbound.parse().unwrap(); + if !filt.is_empty() + { + info!("Loaded inbound filter: {:?}", filt.iter().collect::()); + } + filt + } + pub fn get_outbound_filter(&self) -> sanitise::filter::Filter { - let filt: sanitise::filter::Filter = self.exclude.parse().unwrap(); + let filt: sanitise::filter::Filter = self.outbound.parse().unwrap(); if !filt.is_empty() { - warn!("Loaded exclude filter: {:?}", filt.iter().collect::()); + info!("Loaded outbound filter: {:?}", filt.iter().collect::()); } filt } diff --git a/src/ext.rs b/src/ext.rs index eb9b5ce..bc44889 100644 --- a/src/ext.rs +++ b/src/ext.rs @@ -1,6 +1,7 @@ //! Extensions use std::{ iter, + ops::Range, }; pub trait StringJoinExt: Sized @@ -25,3 +26,71 @@ where I: IntoIterator, string } } + +pub trait FindSliceBounds +{ + type SliceType: ?Sized; + fn slice_bounds(&self, from: &Self::SliceType) -> Range; +} + +impl> FindSliceBounds for T +{ + type SliceType = str; + fn slice_bounds(&self, from: &Self::SliceType) -> Range{ + let this = self.as_ref(); + unsafe { + let sptr = from.as_ptr(); + let eptr = sptr.add(from.len()); + + let ssptr = this.as_ptr(); + let septr = ssptr.add(this.len()); + + let sptr = sptr as usize; + let ssptr = ssptr as usize; + let eptr = eptr as usize; + let septr = septr as usize; + + assert!(sptr >= ssptr && sptr <= septr, "Start index of slice is outside the bounds of self"); + assert!(eptr >= ssptr && eptr <= septr, "End index of slice is outside the bounds of self"); + + (sptr - ssptr)..(eptr - ssptr) + } + } +} + +pub trait SliceInPlace +{ + fn drain_inverse>(&mut self, slice: R); +} + +impl SliceInPlace for String +{ + fn drain_inverse>(&mut self, slice: R) + { + use std::ops::Bound; + match slice.end_bound() { + Bound::Excluded(&ex) => drop(self.drain(ex..)), + Bound::Included(&inc) => drop(self.drain(inc+1..)), + _ => (), + }; + match slice.start_bound() { + Bound::Included(&ex) => drop(self.drain(..ex)), + Bound::Excluded(&ex) => drop(..ex+1), + _ => () + }; + } +} + +pub trait TrimInPlace +{ + fn trim_in_place(&mut self) -> &mut Self; +} + +impl TrimInPlace for String +{ + fn trim_in_place(&mut self) -> &mut Self { + let bounds = self.slice_bounds(self.trim()); + self.drain_inverse(bounds); + self + } +} diff --git a/src/feed.rs b/src/feed.rs index 48150c2..2ac1031 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -69,7 +69,7 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream {:?}", who, buffer); let mut chain = state.chain().write().await; cfg_if! { @@ -92,7 +92,7 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream, mut output: mpsc::Sender) -> Result<(), GenBodyError> { let chain = state.chain().read().await; - if !chain.is_empty() { + if !chain.is_empty() { + let filter = state.outbound_filter(); match num { Some(num) if num < state.config().max_gen_size => { //This could DoS `full_body` and writes, potentially. for string in chain.str_iter_for(num) { - output.send(string).await.map_err(|e| GenBodyError(e.0))?; + output.send(filter.filter_owned(string)).await.map_err(|e| GenBodyError(e.0))?; } }, - _ => output.send(chain.generate_str()).await.map_err(|e| GenBodyError(e.0))?, + _ => output.send(filter.filter_owned(chain.generate_str())).await.map_err(|e| GenBodyError(e.0))?, } } Ok(()) diff --git a/src/main.rs b/src/main.rs index 46ad529..6472f53 100644 --- a/src/main.rs +++ b/src/main.rs @@ -195,9 +195,14 @@ async fn main() { async move { let (tx, rx) = mpsc::channel(state.config().max_gen_size); tokio::spawn(gen::body(state, num, tx)); - Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.map(move |x| { - info!("{} <- {:?}", host, x); - Ok::<_, std::convert::Infallible>(x) + Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.filter_map(move |mut x| { + if x.trim_in_place().len() != 0 { + info!("{} <- {:?}", host, x); + x.push('\n'); + Some(Ok::<_, std::convert::Infallible>(x)) + } else { + None + } })))) } }) @@ -213,16 +218,14 @@ async fn main() { async move { let (tx, rx) = mpsc::channel(state.config().max_gen_size); tokio::spawn(sentance::body(state, num, tx)); - Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.map(move |mut x| { - info!("{} (sentance) <- {:?}", host, x); - // match x.chars().last() { - // Some(chr) if sanitise::is_sentance_boundary(chr) => { - // x.push(' '); - // }, - // _ => (), - // } - x.push(' '); - Ok::<_, std::convert::Infallible>(x) + Ok::<_, std::convert::Infallible>(Response::new(Body::wrap_stream(rx.filter_map(move |mut x| { + if x.trim_in_place().len() != 0 { + info!("{} (sentance) <- {:?}", host, x); + x.push(' '); + Some(Ok::<_, std::convert::Infallible>(x)) + } else { + None + } })))) } }) diff --git a/src/sanitise/filter.rs b/src/sanitise/filter.rs index 686b1b5..5288d08 100644 --- a/src/sanitise/filter.rs +++ b/src/sanitise/filter.rs @@ -133,7 +133,23 @@ impl Filter self.0.get(&chr).is_some() } - pub fn filter<'a, I: IntoIterator>(&'a self, from_iter: I) -> FilterIter<'a, I::IntoIter> + pub fn filter_owned(&self, input: impl Into) -> String + { + let mut input = input.into(); + self.filter(&mut input); + input + } + + pub fn filter<'a>(&self, output: &'a mut String) -> &'a mut String + { + if self.is_empty() { + return output; + } + output.retain(|chr| !self.check(chr)); + output + } + + pub fn filter_iter<'a, I: IntoIterator>(&'a self, from_iter: I) -> FilterIter<'a, I::IntoIter> where I::IntoIter: 'a { FilterIter(&self, from_iter.into_iter().fuse()) diff --git a/src/sentance.rs b/src/sentance.rs index b7aee69..5ffeacb 100644 --- a/src/sentance.rs +++ b/src/sentance.rs @@ -16,9 +16,10 @@ pub async fn body(state: State, num: Option, mut output: mpsc::Sender, //to avoid cloning config - exclude: Arc, + exclude: Arc<(sanitise::filter::Filter, sanitise::filter::Filter)>, chain: Arc>>, save: Arc, @@ -21,16 +21,21 @@ pub struct State impl State { - pub fn filter(&self) -> &sanitise::filter::Filter + pub fn inbound_filter(&self) -> &sanitise::filter::Filter { - &self.exclude + &self.exclude.0 + } + pub fn outbound_filter(&self) -> &sanitise::filter::Filter + { + &self.exclude.1 } pub fn new(config: Config, chain: Arc>>, save: Arc) -> Self { let (shutdown, shutdown_recv) = watch::channel(false); Self { - exclude: Arc::new(config.filter.get_filter()), + exclude: Arc::new((config.filter.get_inbound_filter(), + config.filter.get_outbound_filter())), config: Arc::new(config), chain, save,