You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
151 lines
2.6 KiB
151 lines
2.6 KiB
//! Word splitting
|
|
use super::*;
|
|
use std::{
|
|
borrow::{
|
|
Borrow,
|
|
ToOwned,
|
|
},
|
|
ops::{
|
|
Deref,DerefMut,
|
|
},
|
|
};
|
|
|
|
#[derive(Debug)]
|
|
pub struct WordError;
|
|
|
|
/// A word is a non-whitespace containing string representing part of a sentance
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
#[repr(transparent)]
|
|
pub struct Word(str);
|
|
|
|
|
|
macro_rules! new {
|
|
($str:expr) => {
|
|
unsafe {Word::new_unchecked($str)}
|
|
};
|
|
}
|
|
|
|
const DEFAULT_BOUNDARIES: &[char] = &['!', '.', ',', '*'];
|
|
|
|
lazy_static! {
|
|
static ref BOUNDARIES: smallmap::Map<char, ()> = {
|
|
let mut map = smallmap::Map::new();
|
|
for &chr in DEFAULT_BOUNDARIES.iter() {
|
|
map.insert(chr, ());
|
|
}
|
|
map
|
|
};
|
|
}
|
|
|
|
#[inline] pub fn is_word_boundary(chr: char) -> bool
|
|
{
|
|
chr.is_whitespace() || BOUNDARIES.contains_key(&chr)
|
|
}
|
|
|
|
impl Word
|
|
{
|
|
/// Create a new word reference without checking for whitespace
|
|
pub unsafe fn new_unchecked<'a>(from: &'a str) -> &'a Self
|
|
{
|
|
std::mem::transmute(from)
|
|
}
|
|
|
|
/// Create a single word
|
|
pub fn single<'a>(from: &'a (impl AsRef<Sentance> +?Sized +'a)) -> Result<&'a Self, WordError>
|
|
{
|
|
let from = from.as_ref();
|
|
match from.find(is_word_boundary) {
|
|
Some(_) => Err(WordError),
|
|
_ => Ok(new!(from)),
|
|
}
|
|
}
|
|
|
|
/// Create a new section of words from this sentance
|
|
pub fn new<'a>(from: &'a (impl AsRef<Sentance> +?Sized+'a)) -> Vec<&'a Self>
|
|
{
|
|
Self::new_iter(from)
|
|
.collect()
|
|
}
|
|
|
|
/// Create a new iterator over words from this sentance.
|
|
pub fn new_iter<'a, 'b>(from: &'a (impl AsRef<Sentance> +?Sized+'b)) -> impl Iterator<Item = &'a Self>
|
|
where 'b: 'a
|
|
{
|
|
let from = from.as_ref();
|
|
from.split_inclusive(is_word_boundary)
|
|
.map(|x| x.trim())
|
|
.filter(|x| !x.is_empty())
|
|
.map(|x| new!(x))
|
|
}
|
|
}
|
|
|
|
impl<'a> From<&'a str> for &'a Word
|
|
{
|
|
fn from(from: &'a str) -> Self
|
|
{
|
|
new!(from)
|
|
}
|
|
}
|
|
|
|
impl AsRef<str> for Word
|
|
{
|
|
fn as_ref(&self) -> &str
|
|
{
|
|
&self.0
|
|
}
|
|
}
|
|
|
|
impl AsRef<Word> for str
|
|
{
|
|
fn as_ref(&self) -> &Word
|
|
{
|
|
new!(self)
|
|
}
|
|
}
|
|
|
|
impl Borrow<Word> for String
|
|
{
|
|
fn borrow(&self) -> &Word {
|
|
new!(&self[..])
|
|
}
|
|
}
|
|
|
|
impl ToOwned for Word
|
|
{
|
|
type Owned = String;
|
|
fn to_owned(&self) -> Self::Owned {
|
|
self.0.to_owned()
|
|
}
|
|
}
|
|
|
|
impl Deref for Word
|
|
{
|
|
type Target = str;
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.0
|
|
}
|
|
}
|
|
|
|
impl DerefMut for Word
|
|
{
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
&mut self.0
|
|
}
|
|
}
|
|
|
|
impl AsRef<Word> for Word
|
|
{
|
|
#[inline] fn as_ref(&self) -> &Word
|
|
{
|
|
self
|
|
}
|
|
}
|
|
|
|
pub fn words(input: &str) -> impl Iterator<Item=&'_ Word>
|
|
{
|
|
input.split_inclusive(is_word_boundary)
|
|
.map(|x| x.trim())
|
|
.filter(|x| !x.is_empty())
|
|
.map(|x| new!(x))
|
|
}
|