parent
51a5d0aeba
commit
ecf7ff6f07
@ -1,37 +1,73 @@
|
||||
//! Sanitisers
|
||||
use super::*;
|
||||
use std::{
|
||||
marker::Unpin,
|
||||
error,
|
||||
fmt,
|
||||
};
|
||||
use tokio::{
|
||||
prelude::*,
|
||||
io::{
|
||||
AsyncRead,
|
||||
AsyncBufRead
|
||||
},
|
||||
};
|
||||
|
||||
mod sentance;
|
||||
pub use sentance::*;
|
||||
mod word;
|
||||
pub use word::*;
|
||||
|
||||
/*
|
||||
pub fn take_sentance<T: AsyncBufRead+ ?Sized + Unpin, U: AsyncWrite + ?Sized + Unpin>(from: &mut T, to: &mut U) -> Result<usize, Error>
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}*/
|
||||
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
|
||||
Word(WordError),
|
||||
Sentance(SentanceError),
|
||||
}
|
||||
|
||||
impl error::Error for Error{}
|
||||
|
||||
impl fmt::Display for Error
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
Ok(())
|
||||
match self {
|
||||
Self::Word(_) => write!(f, "couldn't extract word"),
|
||||
Self::Sentance(_) => write!(f, "couldn't extract sentance"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<WordError> for Error
|
||||
{
|
||||
#[inline] fn from(from: WordError) -> Self
|
||||
{
|
||||
Self::Word(from)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SentanceError> for Error
|
||||
{
|
||||
#[inline] fn from(from: SentanceError) -> Self
|
||||
{
|
||||
Self::Sentance(from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests
|
||||
{
|
||||
use super::*;
|
||||
#[test]
|
||||
fn sentance()
|
||||
{
|
||||
let string = r#"Hello world.
|
||||
I am a string, that is a string. Strings, I love them!!!
|
||||
|
||||
Owo uwu"#;
|
||||
let sentances = Sentance::new_iter(string);
|
||||
for sentance in sentances {
|
||||
let words = Word::new(sentance);
|
||||
println!("Word in {:?} -> {:?}", sentance, words);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,146 @@
|
||||
//! Sentance splitting
|
||||
use super::*;
|
||||
use std::{
|
||||
borrow::{
|
||||
Borrow,
|
||||
ToOwned,
|
||||
},
|
||||
ops::{
|
||||
Deref,DerefMut,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SentanceError;
|
||||
|
||||
/// A sentance
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[repr(transparent)]
|
||||
pub struct Sentance(str);
|
||||
|
||||
|
||||
macro_rules! new {
|
||||
($str:expr) => {
|
||||
unsafe {Sentance::new_unchecked($str)}
|
||||
};
|
||||
}
|
||||
|
||||
const DEFAULT_BOUNDARIES: &[char] = &['\n', '.', ':', '!'];
|
||||
|
||||
lazy_static! {
|
||||
static ref BOUNDARIES: smallmap::Map<char, ()> = {
|
||||
let mut map = smallmap::Map::new();
|
||||
for &chr in DEFAULT_BOUNDARIES.iter() {
|
||||
map.insert(chr, ());
|
||||
}
|
||||
map
|
||||
};
|
||||
}
|
||||
|
||||
#[inline] pub fn is_sentance_boundary(chr: char) -> bool
|
||||
{
|
||||
BOUNDARIES.contains_key(&chr)
|
||||
}
|
||||
|
||||
impl Sentance
|
||||
{
|
||||
/// Create a new word reference without checking for sentance boundaries
|
||||
pub unsafe fn new_unchecked<'a>(from: &'a str) -> &'a Self
|
||||
{
|
||||
std::mem::transmute(from)
|
||||
}
|
||||
|
||||
/// Create a single sentance
|
||||
pub fn single<'a>(from: &'a (impl AsRef<str> + 'a + ?Sized)) -> Result<&'a Self, SentanceError>
|
||||
{
|
||||
let from = from.as_ref();
|
||||
match from.find(is_sentance_boundary) {
|
||||
Some(_) => Err(SentanceError),
|
||||
_ => Ok(new!(from)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new section of sentances from this string
|
||||
#[inline] pub fn new<'a>(from: &'a (impl AsRef<str> + 'a + ?Sized)) -> Vec<&'a Self>
|
||||
{
|
||||
Self::new_iter(from)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Create a new iterator over sentances from this string.
|
||||
pub fn new_iter<'a>(from: &'a (impl AsRef<str> +'a + ?Sized)) -> impl Iterator<Item = &'a Self>
|
||||
{
|
||||
let from = from.as_ref();
|
||||
from.split(is_sentance_boundary)
|
||||
.map(|x| new!(x.trim()))
|
||||
.filter(|x| !x.is_empty())
|
||||
}
|
||||
|
||||
/// Get the words in this sentance
|
||||
pub fn words(&self) -> impl Iterator<Item = &'_ Word>
|
||||
{
|
||||
Word::new_iter(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for &'a Sentance
|
||||
{
|
||||
fn from(from: &'a str) -> Self
|
||||
{
|
||||
new!(from)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<str> for Sentance
|
||||
{
|
||||
fn as_ref(&self) -> &str
|
||||
{
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Sentance> for str
|
||||
{
|
||||
fn as_ref(&self) -> &Sentance
|
||||
{
|
||||
new!(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Borrow<Sentance> for String
|
||||
{
|
||||
fn borrow(&self) -> &Sentance {
|
||||
new!(&self[..])
|
||||
}
|
||||
}
|
||||
|
||||
impl ToOwned for Sentance
|
||||
{
|
||||
type Owned = String;
|
||||
fn to_owned(&self) -> Self::Owned {
|
||||
self.0.to_owned()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Sentance
|
||||
{
|
||||
type Target = str;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Sentance
|
||||
{
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Sentance> for Sentance
|
||||
{
|
||||
#[inline] fn as_ref(&self) -> &Sentance
|
||||
{
|
||||
self
|
||||
}
|
||||
}
|
Loading…
Reference in new issue