From 107b34bcbdb441fad743767a37a8f5cb55b46eb4 Mon Sep 17 00:00:00 2001 From: Avril Date: Mon, 12 Oct 2020 21:09:22 +0100 Subject: [PATCH] document feed() pipeline --- Cargo.lock | 2 +- src/feed.rs | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bcf467e..8fb3bc7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -616,7 +616,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.7.3" +version = "0.7.4" dependencies = [ "async-compression", "bzip2-sys", diff --git a/src/feed.rs b/src/feed.rs index c998e1b..aaa2dec 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -5,7 +5,26 @@ use sanitise::Sentance; const FEED_BOUNDS: std::ops::RangeFrom = 2..; //TODO: Add to config somehow - +/// Feed `what` into `chain`, at least `bounds` tokens. +/// +/// # Tokenising +/// How the tokens are split within this function that operates on single buffers is determined largely by the features `split-sentance` and `feed-sentance` determining the use of the sentance API. +/// +/// ## Pipeline +/// Since this is called on single buffers, it happens after the `split-newlines` tokenising if it's enabled, and thus the sentance API is only able to operate on each seperate line if that feature is enabled, regardless of `always-aggre`, or `feed-sentance` or `split-sentance`. +/// This is the pipeline for just within this function, after splitting through newlines if enabled. +/// +/// * `feed-sentance` +/// ** Feed the buffer through the sentance split tokeniser +/// ** Feed the sentances through the word split tokeniser +/// ** Feed each collection of words into the chain seperately +/// * `split-sentance` +/// ** Feed the buffer through the sentance split tokeniser +/// ** Feed the sentances through the word split tokeniser +/// ** Feed the flattened collection into the chain once, concatenated. +/// * Neither +/// ** Feed the buffer through the word split tokeniser +/// ** Feed the collection into the chain pub fn feed(chain: &mut Chain, what: impl AsRef, bounds: impl std::ops::RangeBounds) { cfg_if! { @@ -84,7 +103,6 @@ pub async fn full(who: &IpAddr, state: State, body: impl Unpin + Stream