diff --git a/Cargo.lock b/Cargo.lock index ae411b7..819b64b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -639,7 +639,7 @@ dependencies = [ [[package]] name = "markov" -version = "0.5.1" +version = "0.5.2" dependencies = [ "async-compression", "cfg-if 1.0.0", diff --git a/Cargo.toml b/Cargo.toml index 1184a3b..0c04205 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markov" -version = "0.5.2" +version = "0.5.3" description = "Generate string of text from Markov chain fed by stdin" authors = ["Avril "] edition = "2018" diff --git a/src/feed.rs b/src/feed.rs index 7b119e8..3db4c67 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -10,7 +10,7 @@ pub fn feed(chain: &mut Chain, what: impl AsRef, bounds: impl std:: cfg_if! { if #[cfg(feature="split-sentance")] { let map = Sentance::new_iter(&what) //get each sentance in string - .map(|what| what.split_whitespace() // .words() here will remove the punctuation. + .map(|what| what.words() .filter(|word| !word.is_empty()) .map(|s| s.to_owned()).collect::>()); debug_assert!(!bounds.contains(&0), "Cannot allow 0 size feeds"); @@ -24,7 +24,7 @@ pub fn feed(chain: &mut Chain, what: impl AsRef, bounds: impl std:: } } else { let map = Sentance::new_iter(&what) //get each sentance in string - .map(|what| what.split_whitespace() // .words() here will remove the punctuation. + .map(|what| what.words() .filter(|word| !word.is_empty())) .flatten() // add all into one buffer .map(|s| s.to_owned()).collect::>(); diff --git a/src/main.rs b/src/main.rs index 7d7200e..c516086 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +#![feature(split_inclusive)] + #![allow(dead_code)] #[macro_use] extern crate log; diff --git a/src/sanitise/sentance.rs b/src/sanitise/sentance.rs index 9144240..2734234 100644 --- a/src/sanitise/sentance.rs +++ b/src/sanitise/sentance.rs @@ -71,7 +71,7 @@ impl Sentance pub fn new_iter<'a>(from: &'a (impl AsRef +'a + ?Sized)) -> impl Iterator { let from = from.as_ref(); - from.split(is_sentance_boundary) + from.split_inclusive(is_sentance_boundary) .map(|x| new!(x.trim())) .filter(|x| !x.is_empty()) } diff --git a/src/sanitise/word.rs b/src/sanitise/word.rs index 365c11d..320fe88 100644 --- a/src/sanitise/word.rs +++ b/src/sanitise/word.rs @@ -71,7 +71,8 @@ impl Word pub fn new_iter<'a>(from: &'a (impl AsRef +?Sized+'a)) -> impl Iterator { let from = from.as_ref(); - from.split(is_word_boundary) + from.split_inclusive(is_word_boundary) + .map(|x| x.trim()) .filter(|x| !x.is_empty()) .map(|x| new!(x)) }