You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
genmarkov/src/sanitise/word.rs

142 lines
2.4 KiB

//! Word splitting
use super::*;
use std::{
borrow::{
Borrow,
ToOwned,
},
ops::{
Deref,DerefMut,
},
};
#[derive(Debug)]
pub struct WordError;
/// A word is a non-whitespace containing string representing part of a sentance
#[derive(Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct Word(str);
macro_rules! new {
($str:expr) => {
unsafe {Word::new_unchecked($str)}
};
}
const DEFAULT_BOUNDARIES: &[char] = &['!', '.', ','];
lazy_static! {
static ref BOUNDARIES: smallmap::Map<char, ()> = {
let mut map = smallmap::Map::new();
for &chr in DEFAULT_BOUNDARIES.iter() {
map.insert(chr, ());
}
map
};
}
#[inline] pub fn is_word_boundary(chr: char) -> bool
{
chr.is_whitespace() || BOUNDARIES.contains_key(&chr)
}
impl Word
{
/// Create a new word reference without checking for whitespace
pub unsafe fn new_unchecked<'a>(from: &'a str) -> &'a Self
{
std::mem::transmute(from)
}
/// Create a single word
pub fn single<'a>(from: &'a (impl AsRef<Sentance> +?Sized +'a)) -> Result<&'a Self, WordError>
{
let from = from.as_ref();
match from.find(is_word_boundary) {
Some(_) => Err(WordError),
_ => Ok(new!(from)),
}
}
/// Create a new section of words from this sentance
pub fn new<'a>(from: &'a (impl AsRef<Sentance> +?Sized+'a)) -> Vec<&'a Self>
{
Self::new_iter(from)
.collect()
}
/// Create a new iterator over words from this sentance.
pub fn new_iter<'a>(from: &'a (impl AsRef<Sentance> +?Sized+'a)) -> impl Iterator<Item = &'a Self>
{
let from = from.as_ref();
from.split_inclusive(is_word_boundary)
.map(|x| x.trim())
.filter(|x| !x.is_empty())
.map(|x| new!(x))
}
}
impl<'a> From<&'a str> for &'a Word
{
fn from(from: &'a str) -> Self
{
new!(from)
}
}
impl AsRef<str> for Word
{
fn as_ref(&self) -> &str
{
&self.0
}
}
impl AsRef<Word> for str
{
fn as_ref(&self) -> &Word
{
new!(self)
}
}
impl Borrow<Word> for String
{
fn borrow(&self) -> &Word {
new!(&self[..])
}
}
impl ToOwned for Word
{
type Owned = String;
fn to_owned(&self) -> Self::Owned {
self.0.to_owned()
}
}
impl Deref for Word
{
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for Word
{
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl AsRef<Word> for Word
{
#[inline] fn as_ref(&self) -> &Word
{
self
}
}