range is configurable

feed
Avril 4 years ago
parent 107b34bcbd
commit 1c509031d6
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -1,6 +1,6 @@
[package] [package]
name = "markov" name = "markov"
version = "0.7.4" version = "0.8.0"
description = "Generate string of text from Markov chain fed by stdin" description = "Generate string of text from Markov chain fed by stdin"
authors = ["Avril <flanchan@cumallover.me>"] authors = ["Avril <flanchan@cumallover.me>"]
edition = "2018" edition = "2018"

@ -1,4 +1,4 @@
FEATURES:="api,always-aggregate" FEATURES:="api,always-aggregate,split-sentance"
VERSION:=`cargo read-manifest | rematch - 'version":"([0-9\.]+)"' 1` VERSION:=`cargo read-manifest | rematch - 'version":"([0-9\.]+)"' 1`
markov: markov:

@ -4,7 +4,8 @@ max_content_length = 4194304
max_gen_size = 256 max_gen_size = 256
save_interval_secs = 2 save_interval_secs = 2
trust_x_forwarded_for = false trust_x_forwarded_for = false
feed_bounds = '2..'
[filter] [filter]
inbound = '' inbound = '<>/\\'
outbound = '' outbound = ''

@ -6,6 +6,8 @@ use std::{
io, io,
borrow::Cow, borrow::Cow,
num::NonZeroU64, num::NonZeroU64,
error,
fmt,
}; };
use tokio::{ use tokio::{
fs::OpenOptions, fs::OpenOptions,
@ -27,6 +29,8 @@ pub struct Config
pub trust_x_forwarded_for: bool, pub trust_x_forwarded_for: bool,
#[serde(default)] #[serde(default)]
pub filter: FilterConfig, pub filter: FilterConfig,
#[serde(default)]
pub feed_bounds: String,
} }
#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Hash, Serialize, Deserialize)] #[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Hash, Serialize, Deserialize)]
@ -39,7 +43,7 @@ pub struct FilterConfig
impl FilterConfig impl FilterConfig
{ {
pub fn get_inbound_filter(&self) -> sanitise::filter::Filter fn get_inbound_filter(&self) -> sanitise::filter::Filter
{ {
let filt: sanitise::filter::Filter = self.inbound.parse().unwrap(); let filt: sanitise::filter::Filter = self.inbound.parse().unwrap();
if !filt.is_empty() if !filt.is_empty()
@ -48,7 +52,7 @@ impl FilterConfig
} }
filt filt
} }
pub fn get_outbound_filter(&self) -> sanitise::filter::Filter fn get_outbound_filter(&self) -> sanitise::filter::Filter
{ {
let filt: sanitise::filter::Filter = self.outbound.parse().unwrap(); let filt: sanitise::filter::Filter = self.outbound.parse().unwrap();
if !filt.is_empty() if !filt.is_empty()
@ -72,12 +76,45 @@ impl Default for Config
save_interval_secs: Some(unsafe{NonZeroU64::new_unchecked(2)}), save_interval_secs: Some(unsafe{NonZeroU64::new_unchecked(2)}),
trust_x_forwarded_for: false, trust_x_forwarded_for: false,
filter: Default::default(), filter: Default::default(),
feed_bounds: "2..".to_owned(),
} }
} }
} }
impl Config impl Config
{ {
/// Try to generate a config cache for this instance.
pub fn try_gen_cache(&self) -> Result<Cache, InvalidConfigError>
{
macro_rules! section {
($name:literal, $expr:expr) => {
match $expr {
Ok(v) => Ok(v),
Err(e) => Err(InvalidConfigError($name, Box::new(e))),
}
}
}
use std::ops::RangeBounds;
Ok(Cache {
feed_bounds: section!("feed_bounds", self.parse_feed_bounds()).and_then(|bounds| if bounds.contains(&0) {
Err(InvalidConfigError("feed_bounds", Box::new(opaque_error!("Bounds not allowed to contains 0 (they were `{}`)", bounds))))
} else {
Ok(bounds)
})?,
inbound_filter: self.filter.get_inbound_filter(),
outbound_filter: self.filter.get_outbound_filter(),
})
}
/// Try to parse the `feed_bounds`
fn parse_feed_bounds(&self) -> Result<range::DynRange<usize>, range::ParseError>
{
if self.feed_bounds.len() == 0 {
Ok(feed::DEFAULT_FEED_BOUNDS.into())
} else {
self.feed_bounds.parse()
}
}
pub fn save_interval(&self) -> Option<Duration> pub fn save_interval(&self) -> Option<Duration>
{ {
self.save_interval_secs.map(|x| Duration::from_secs(x.into())) self.save_interval_secs.map(|x| Duration::from_secs(x.into()))
@ -139,3 +176,52 @@ async fn load_args<I: Iterator<Item=String>>(mut from: I) -> Option<Config>
}, },
} }
} }
#[derive(Debug)]
pub struct InvalidConfigError(&'static str, Box<dyn error::Error+ 'static>);
impl InvalidConfigError
{
pub fn field(&self) -> &str
{
&self.0[..]
}
}
impl error::Error for InvalidConfigError
{
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
Some(self.1.as_ref())
}
}
impl fmt::Display for InvalidConfigError
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
write!(f,"failed to parse field `{}`: {}", self.0, self.1)
}
}
/// Caches some parsed config arguments
#[derive(Clone, PartialEq, Eq)]
pub struct Cache
{
pub feed_bounds: range::DynRange<usize>,
pub inbound_filter: sanitise::filter::Filter,
pub outbound_filter: sanitise::filter::Filter,
}
impl fmt::Debug for Cache
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
f.debug_struct("Cache")
.field("feed_bounds", &self.feed_bounds)
.field("inbound_filter", &self.inbound_filter.iter().collect::<String>())
.field("outbound_filter", &self.outbound_filter.iter().collect::<String>())
.finish()
}
}

@ -3,7 +3,7 @@ use super::*;
#[cfg(any(feature="feed-sentance", feature="split-sentance"))] #[cfg(any(feature="feed-sentance", feature="split-sentance"))]
use sanitise::Sentance; use sanitise::Sentance;
const FEED_BOUNDS: std::ops::RangeFrom<usize> = 2..; //TODO: Add to config somehow pub const DEFAULT_FEED_BOUNDS: std::ops::RangeFrom<usize> = 2..; //TODO: Add to config somehow
/// Feed `what` into `chain`, at least `bounds` tokens. /// Feed `what` into `chain`, at least `bounds` tokens.
/// ///
@ -35,10 +35,11 @@ pub fn feed(chain: &mut Chain<String>, what: impl AsRef<str>, bounds: impl std::
debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds"); debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds");
for map in map {// feed each sentance seperately for map in map {// feed each sentance seperately
if bounds.contains(&map.len()) { if bounds.contains(&map.len()) {
debug!("Feeding chain {} items", map.len());
chain.feed(map); chain.feed(map);
} }
else { else {
debug!("Ignoring feed of invalid length {}", map.len()); debug!("Ignoring feed of invalid length {}: {:?}", map.len(), map);
} }
} }
} else { } else {
@ -49,16 +50,17 @@ pub fn feed(chain: &mut Chain<String>, what: impl AsRef<str>, bounds: impl std::
.flatten() // add all into one buffer .flatten() // add all into one buffer
.map(|s| s.to_owned()).collect::<Vec<_>>(); .map(|s| s.to_owned()).collect::<Vec<_>>();
} else { } else {
let map: Vec<_> = sanitise::Word::new_iter(what.as_ref()).map(ToOwned::to_owned) let map: Vec<_> = sanitise::words(what.as_ref()).map(ToOwned::to_owned)
.collect(); .collect();
} }
} }
debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds"); debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds");
if bounds.contains(&map.len()) { if bounds.contains(&map.len()) {
debug!("Feeding chain {} items", map.len());
chain.feed(map); chain.feed(map);
} }
else { else {
debug!("Ignoring feed of invalid length {}", map.len()); debug!("Ignoring feed of invalid length {}: {:?}", map.len(), map);
} }
} }
@ -71,11 +73,12 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream<Item = R
if_debug! { if_debug! {
let timer = std::time::Instant::now(); let timer = std::time::Instant::now();
} }
let bounds = &state.config_cache().feed_bounds;
macro_rules! feed { macro_rules! feed {
($chain:expr, $buffer:ident, $bounds:expr) => { ($chain:expr, $buffer:ident) => {
{ {
let buffer = $buffer; let buffer = $buffer;
feed($chain, &buffer, $bounds) feed($chain, &buffer, bounds)
} }
} }
} }
@ -102,10 +105,10 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream<Item = R
cfg_if! { cfg_if! {
if #[cfg(feature="split-newlines")] { if #[cfg(feature="split-newlines")] {
for buffer in buffer.split('\n').filter(|line| !line.trim().is_empty()) { for buffer in buffer.split('\n').filter(|line| !line.trim().is_empty()) {
feed!(&mut chain, buffer, FEED_BOUNDS); feed!(&mut chain, buffer);
} }
} else { } else {
feed!(&mut chain, buffer, FEED_BOUNDS); feed!(&mut chain, buffer);
} }
} }
@ -124,7 +127,7 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream<Item = R
#[cfg(not(feature="hog-buffer"))] #[cfg(not(feature="hog-buffer"))]
let mut chain = state.chain().write().await; // Acquire mutex once per line? Is this right? let mut chain = state.chain().write().await; // Acquire mutex once per line? Is this right?
feed!(&mut chain, line, FEED_BOUNDS); feed!(&mut chain, line);
info!("{} -> {:?}", who, line); info!("{} -> {:?}", who, line);
} }
written+=line.len(); written+=line.len();

@ -63,6 +63,7 @@ macro_rules! status {
mod ext; mod ext;
use ext::*; use ext::*;
mod util; mod util;
mod range;
mod sanitise; mod sanitise;
mod bytes; mod bytes;
mod chunking; mod chunking;
@ -105,8 +106,19 @@ fn init_log()
async fn main() { async fn main() {
init_log(); init_log();
let config = match config::load().await { let (config, ccache) = match config::load().await {
Some(v) => v, Some(v) => {
let cache = match v.try_gen_cache() {
Ok(c) => c,
Err(e) => {
error!("Invalid config, cannot continue");
error!("{}", e);
debug!("{:?}", e);
return;
},
};
(v, cache)
},
_ => { _ => {
let cfg = config::Config::default(); let cfg = config::Config::default();
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@ -115,10 +127,12 @@ async fn main() {
error!("Failed to create default config file: {}", err); error!("Failed to create default config file: {}", err);
} }
} }
cfg let cache= cfg.try_gen_cache().unwrap();
(cfg, cache)
}, },
}; };
trace!("Using config {:?}", config); debug!("Using config {:?}", config);
trace!("With config cached: {:?}", ccache);
let chain = Arc::new(RwLock::new(match save::load(&config.file).await { let chain = Arc::new(RwLock::new(match save::load(&config.file).await {
Ok(chain) => { Ok(chain) => {
@ -137,6 +151,7 @@ async fn main() {
let save_when = Arc::new(Notify::new()); let save_when = Arc::new(Notify::new());
let state = State::new(config, let state = State::new(config,
ccache,
Arc::clone(&chain), Arc::clone(&chain),
Arc::clone(&save_when)); Arc::clone(&save_when));
let state2 = state.clone(); let state2 = state.clone();

@ -0,0 +1,287 @@
//! Workarounds for ridiculously janky `std::ops::Range*` polymorphism
use super::*;
use std::{
ops::{
Range,
RangeFrom,
RangeInclusive,
RangeTo,
RangeToInclusive,
RangeFull,
Bound,
RangeBounds,
},
str::{
FromStr,
},
fmt,
error,
};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum DynRange<T>
{
Range(Range<T>),
From(RangeFrom<T>),
Inclusive(RangeInclusive<T>),
To(RangeTo<T>),
ToInclusive(RangeToInclusive<T>),
Full(RangeFull),
}
#[macro_export] macro_rules! impl_from {
(Full, RangeFull) => {
impl<T> From<RangeFull> for DynRange<T>
{
#[inline] fn from(from: RangeFull) -> Self
{
Self::Full(from)
}
}
};
($name:ident, $range:tt) => {
impl<T> From<$range <T>> for DynRange<T>
{
#[inline] fn from(from: $range<T>) -> Self
{
Self::$name(from)
}
}
};
}
impl_from!(Range, Range);
impl_from!(From, RangeFrom);
impl_from!(Inclusive, RangeInclusive);
impl_from!(To, RangeTo);
impl_from!(ToInclusive, RangeToInclusive);
impl_from!(Full, RangeFull);
macro_rules! bounds {
($self:ident, $bound:ident) => {
match $self {
DynRange::Range(from) => from.$bound(),
DynRange::From(from) => from.$bound(),
DynRange::Inclusive(i) => i.$bound(),
DynRange::To(i) => i.$bound(),
DynRange::ToInclusive(i) => i.$bound(),
DynRange::Full(_) => (..).$bound(),
}
};
}
impl<T> RangeBounds<T> for DynRange<T>
{
fn start_bound(&self) -> Bound<&T> {
bounds!(self, start_bound)
}
fn end_bound(&self) -> Bound<&T> {
bounds!(self, end_bound)
}
}
impl<'a, T> RangeBounds<T> for &'a DynRange<T>
{
fn start_bound(&self) -> Bound<&T> {
bounds!(self, start_bound)
}
fn end_bound(&self) -> Bound<&T> {
bounds!(self, end_bound)
}
}
impl<T: fmt::Display> fmt::Display for DynRange<T>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
match self {
Self::Range(from) => write!(f, "{}..{}", from.start, from.end),
Self::From(from) => write!(f, "{}..", from.start),
Self::Inclusive(from) => write!(f, "{}..={}", from.start(), from.end()),
Self::To(from) => write!(f, "..{}", from.end),
Self::ToInclusive(from) => write!(f, "..={}", from.end),
Self::Full(_) => write!(f, ".."),
}
}
}
use std::any::{
Any,
};
impl<T: 'static> DynRange<T>
{
fn into_inner(self) -> Box<dyn Any + 'static>
{
match self {
Self::Range(from) => Box::new(from),
Self::From(from) => Box::new(from),
Self::Inclusive(from) => Box::new(from),
Self::To(from) => Box::new(from),
Self::ToInclusive(from) => Box::new(from),
Self::Full(_) => Box::new(..),
}
}
fn inner_mut(&mut self) -> &mut dyn Any
{
match self {
Self::Range(from) => from,
Self::From(from) => from,
Self::Inclusive(from) => from,
Self::To(from) => from,
Self::ToInclusive(from) => from,
Self::Full(f) => f,
}
}
fn inner_ref(&self) -> &dyn Any
{
match self {
Self::Range(from) => from,
Self::From(from) => from,
Self::Inclusive(from) => from,
Self::To(from) => from,
Self::ToInclusive(from) => from,
Self::Full(_) => &(..),
}
}
pub fn downcast_ref<R: RangeBounds<T> + 'static>(&self) -> Option<&R>
{
self.inner_ref().downcast_ref()
}
pub fn downcast_mut<R: RangeBounds<T> + 'static>(&mut self) -> Option<&mut R>
{
self.inner_mut().downcast_mut()
}
pub fn downcast<R: RangeBounds<T> + 'static>(self) -> Result<R, Self>
{
Box::new(self).downcast()
}
}
#[derive(Debug)]
pub struct ParseError(DynRange<()>, Option<Box<dyn error::Error+'static>>);
impl ParseError
{
fn new<R: Into<DynRange<()>>>(which: R, err: impl error::Error + 'static) -> Self
{
Self(which.into(), Some(Box::new(err)))
}
fn none(which: impl Into<DynRange<()>>) -> Self
{
Self(which.into(), None)
}
fn map<T: Into<DynRange<()>>>(self, to: T) -> Self
{
Self (to.into(), self.1)
}
}
impl error::Error for ParseError
{
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
if let Some(this) = self.1.as_ref() {
Some(this.as_ref())
} else {
None
}
}
}
impl fmt::Display for ParseError
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
write!(f, "failed to parse range in format `{:?}`", self.0)?;
if let Some(this) = self.1.as_ref() {
write!(f, ": {}", this)?;
}
Ok(())
}
}
impl<T: FromStr> FromStr for DynRange<T>
where T::Err: error::Error + 'static
{
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s== ".." {
Ok(Self::Full(..))
} else if s.starts_with("..=") {
Ok(Self::ToInclusive(..=T::from_str(&s[3..]).map_err(|x| ParseError::new(..=(), x))?))
} else if s.starts_with("..") {
Ok(Self::To(..(T::from_str(&s[2..])).map_err(|x| ParseError::new(..(), x))?))
} else if s.ends_with("..") {
Ok(Self::From(T::from_str(&s[..s.len()-2]).map_err(|x| ParseError::new(().., x))?..))
} else {
fn try_next_incl<'a, T: FromStr>(m: &mut impl Iterator<Item=&'a str>) -> Result<RangeInclusive<T>, ParseError>
where T::Err: error::Error + 'static
{
let (first, second) = if let Some(first) = m.next() {
if let Some(seocond) = m.next() {
(first,seocond)
} else {
return Err(ParseError::none(()..=()));
}
} else {
return Err(ParseError::none(()..=()));
};
let first: T = first.parse().map_err(|x| ParseError::new(()..=(), x))?;
let second: T = second.parse().map_err(|x| ParseError::new(()..=(), x))?;
Ok(first..=second)
}
fn try_next<'a, T: FromStr>(m: &mut impl Iterator<Item=&'a str>) -> Result<Range<T>, ParseError>
where T::Err: error::Error + 'static
{
let (first, second) = if let Some(first) = m.next() {
if let Some(seocond) = m.next() {
(first,seocond)
} else {
return Err(ParseError::none(()..()));
}
} else {
return Err(ParseError::none(()..()));
};
let first: T = first.parse().map_err(|x| ParseError::new(()..(), x))?;
let second: T = second.parse().map_err(|x| ParseError::new(()..(), x))?;
Ok(first..second)
}
let mut split = s.split("..=").fuse();
let mut last_err = ParseError::none(()..());
match loop {
match try_next_incl(&mut split) {
Err(ParseError(_, None)) => break Err(last_err), // iter empty
Err(other) => last_err = other,
Ok(value) => break Ok(Self::Inclusive(value)),
}
} {
Ok(v) => return Ok(v),
Err(e) => last_err = e,
};
let mut split = s.split("..").fuse();
match loop {
match try_next(&mut split) {
Err(ParseError(_, None)) => break Err(last_err), // iter empty
Err(other) => last_err = other,
Ok(value) => break Ok(Self::Range(value)),
}
} {
Ok(v) => Ok(v),
Err(e) => Err(e),
}
}
}
}

@ -139,3 +139,11 @@ impl AsRef<Word> for Word
self self
} }
} }
pub fn words(input: &str) -> impl Iterator<Item=&'_ Word>
{
input.split_inclusive(is_word_boundary)
.map(|x| x.trim())
.filter(|x| !x.is_empty())
.map(|x| new!(x))
}

@ -24,8 +24,7 @@ impl fmt::Display for ShutdownError
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct State pub struct State
{ {
config: Arc<Config>, //to avoid cloning config config: Arc<Box<(Config, config::Cache)>>, //to avoid cloning config
exclude: Arc<(sanitise::filter::Filter, sanitise::filter::Filter)>,
chain: Arc<RwLock<Chain<String>>>, chain: Arc<RwLock<Chain<String>>>,
save: Arc<Notify>, save: Arc<Notify>,
begin: Initialiser, begin: Initialiser,
@ -72,20 +71,18 @@ impl State
pub fn inbound_filter(&self) -> &sanitise::filter::Filter pub fn inbound_filter(&self) -> &sanitise::filter::Filter
{ {
&self.exclude.0 &self.config_cache().inbound_filter
} }
pub fn outbound_filter(&self) -> &sanitise::filter::Filter pub fn outbound_filter(&self) -> &sanitise::filter::Filter
{ {
&self.exclude.1 &self.config_cache().outbound_filter
} }
pub fn new(config: Config, chain: Arc<RwLock<Chain<String>>>, save: Arc<Notify>) -> Self pub fn new(config: Config, cache: config::Cache, chain: Arc<RwLock<Chain<String>>>, save: Arc<Notify>) -> Self
{ {
let (shutdown, shutdown_recv) = watch::channel(false); let (shutdown, shutdown_recv) = watch::channel(false);
Self { Self {
exclude: Arc::new((config.filter.get_inbound_filter(), config: Arc::new(Box::new((config, cache))),
config.filter.get_outbound_filter())),
config: Arc::new(config),
chain, chain,
save, save,
begin: Initialiser::new(), begin: Initialiser::new(),
@ -96,7 +93,12 @@ impl State
pub fn config(&self) -> &Config pub fn config(&self) -> &Config
{ {
self.config.as_ref() &self.config.as_ref().0
}
pub fn config_cache(&self) -> &config::Cache
{
&self.config.as_ref().1
} }
pub fn notify_save(&self) pub fn notify_save(&self)

@ -39,3 +39,54 @@ pub fn hint_cap<T: NewCapacity, I: Iterator>(iter: &I) -> T
(_, Some(x)) | (x, _) => T::with_capacity(x) (_, Some(x)) | (x, _) => T::with_capacity(x)
} }
} }
#[macro_export] macro_rules! opaque_error {
($msg:literal) => {
{
#[derive(Debug)]
struct OpaqueError;
impl ::std::error::Error for OpaqueError{}
impl ::std::fmt::Display for OpaqueError
{
fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result
{
write!(f, $msg)
}
}
OpaqueError
}
};
($msg:literal $($tt:tt)*) => {
{
#[derive(Debug)]
struct OpaqueError(String);
impl ::std::error::Error for OpaqueError{}
impl ::std::fmt::Display for OpaqueError
{
fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result
{
write!(f, "{}", self.0)
}
}
OpaqueError(format!($msg $($tt)*))
}
};
(yield $msg:literal $($tt:tt)*) => {
{
#[derive(Debug)]
struct OpaqueError<'a>(fmt::Arguments<'a>);
impl ::std::error::Error for OpaqueError{}
impl ::std::fmt::Display for OpaqueError
{
fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result
{
write!(f, "{}", self.0)
}
}
OpaqueError(format_args!($msg $($tt)*))
}
};
}

Loading…
Cancel
Save