document feed() pipeline

feed
Avril 4 years ago
parent 684a6f6aa0
commit 107b34bcbd
Signed by: flanchan
GPG Key ID: 284488987C31F630

2
Cargo.lock generated

@ -616,7 +616,7 @@ dependencies = [
[[package]] [[package]]
name = "markov" name = "markov"
version = "0.7.3" version = "0.7.4"
dependencies = [ dependencies = [
"async-compression", "async-compression",
"bzip2-sys", "bzip2-sys",

@ -5,7 +5,26 @@ use sanitise::Sentance;
const FEED_BOUNDS: std::ops::RangeFrom<usize> = 2..; //TODO: Add to config somehow const FEED_BOUNDS: std::ops::RangeFrom<usize> = 2..; //TODO: Add to config somehow
/// Feed `what` into `chain`, at least `bounds` tokens.
///
/// # Tokenising
/// How the tokens are split within this function that operates on single buffers is determined largely by the features `split-sentance` and `feed-sentance` determining the use of the sentance API.
///
/// ## Pipeline
/// Since this is called on single buffers, it happens after the `split-newlines` tokenising if it's enabled, and thus the sentance API is only able to operate on each seperate line if that feature is enabled, regardless of `always-aggre`, or `feed-sentance` or `split-sentance`.
/// This is the pipeline for just within this function, after splitting through newlines if enabled.
///
/// * `feed-sentance`
/// ** Feed the buffer through the sentance split tokeniser
/// ** Feed the sentances through the word split tokeniser
/// ** Feed each collection of words into the chain seperately
/// * `split-sentance`
/// ** Feed the buffer through the sentance split tokeniser
/// ** Feed the sentances through the word split tokeniser
/// ** Feed the flattened collection into the chain once, concatenated.
/// * Neither
/// ** Feed the buffer through the word split tokeniser
/// ** Feed the collection into the chain
pub fn feed(chain: &mut Chain<String>, what: impl AsRef<str>, bounds: impl std::ops::RangeBounds<usize>) pub fn feed(chain: &mut Chain<String>, what: impl AsRef<str>, bounds: impl std::ops::RangeBounds<usize>)
{ {
cfg_if! { cfg_if! {
@ -84,7 +103,6 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream<Item = R
if #[cfg(feature="split-newlines")] { if #[cfg(feature="split-newlines")] {
for buffer in buffer.split('\n').filter(|line| !line.trim().is_empty()) { for buffer in buffer.split('\n').filter(|line| !line.trim().is_empty()) {
feed!(&mut chain, buffer, FEED_BOUNDS); feed!(&mut chain, buffer, FEED_BOUNDS);
} }
} else { } else {
feed!(&mut chain, buffer, FEED_BOUNDS); feed!(&mut chain, buffer, FEED_BOUNDS);

Loading…
Cancel
Save