You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
genmarkov/src/sanitise/sentance.rs

147 lines
2.6 KiB

//! Sentance splitting
use super::*;
use std::{
borrow::{
Borrow,
ToOwned,
},
ops::{
Deref,DerefMut,
},
};
#[derive(Debug)]
pub struct SentanceError;
/// A sentance
#[derive(Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct Sentance(str);
macro_rules! new {
($str:expr) => {
unsafe {Sentance::new_unchecked($str)}
};
}
const DEFAULT_BOUNDARIES: &[char] = &['\n', '.', ':', '!', '?'];
lazy_static! {
static ref BOUNDARIES: smallmap::Map<char, ()> = {
let mut map = smallmap::Map::new();
for &chr in DEFAULT_BOUNDARIES.iter() {
map.insert(chr, ());
}
map
};
}
#[inline] pub fn is_sentance_boundary(chr: char) -> bool
{
BOUNDARIES.contains_key(&chr)
}
impl Sentance
{
/// Create a new word reference without checking for sentance boundaries
pub unsafe fn new_unchecked<'a>(from: &'a str) -> &'a Self
{
std::mem::transmute(from)
}
/// Create a single sentance
pub fn single<'a>(from: &'a (impl AsRef<str> + 'a + ?Sized)) -> Result<&'a Self, SentanceError>
{
let from = from.as_ref();
match from.find(is_sentance_boundary) {
Some(_) => Err(SentanceError),
_ => Ok(new!(from)),
}
}
/// Create a new section of sentances from this string
#[inline] pub fn new<'a>(from: &'a (impl AsRef<str> + 'a + ?Sized)) -> Vec<&'a Self>
{
Self::new_iter(from)
.collect()
}
/// Create a new iterator over sentances from this string.
pub fn new_iter<'a>(from: &'a (impl AsRef<str> +'a + ?Sized)) -> impl Iterator<Item = &'a Self> + Clone
{
let from = from.as_ref();
from.split_inclusive(is_sentance_boundary)
.map(|x| new!(x.trim()))
.filter(|x| !x.is_empty())
}
/// Get the words in this sentance
#[inline] pub fn words(&self) -> impl Iterator<Item = &'_ Word>
{
Word::new_iter(self)
}
}
impl<'a> From<&'a str> for &'a Sentance
{
fn from(from: &'a str) -> Self
{
new!(from)
}
}
impl AsRef<str> for Sentance
{
fn as_ref(&self) -> &str
{
&self.0
}
}
impl AsRef<Sentance> for str
{
fn as_ref(&self) -> &Sentance
{
new!(self)
}
}
impl Borrow<Sentance> for String
{
fn borrow(&self) -> &Sentance {
new!(&self[..])
}
}
impl ToOwned for Sentance
{
type Owned = String;
fn to_owned(&self) -> Self::Owned {
self.0.to_owned()
}
}
impl Deref for Sentance
{
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for Sentance
{
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl AsRef<Sentance> for Sentance
{
#[inline] fn as_ref(&self) -> &Sentance
{
self
}
}