|
|
|
//! Sentance splitting
|
|
|
|
use super::*;
|
|
|
|
use std::{
|
|
|
|
borrow::{
|
|
|
|
Borrow,
|
|
|
|
ToOwned,
|
|
|
|
},
|
|
|
|
ops::{
|
|
|
|
Deref,DerefMut,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct SentanceError;
|
|
|
|
|
|
|
|
/// A sentance
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
|
|
#[repr(transparent)]
|
|
|
|
pub struct Sentance(str);
|
|
|
|
|
|
|
|
|
|
|
|
macro_rules! new {
|
|
|
|
($str:expr) => {
|
|
|
|
unsafe {Sentance::new_unchecked($str)}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
const DEFAULT_BOUNDARIES: &[char] = &['\n', '.', ':', '!', '?'];
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref BOUNDARIES: smallmap::Map<char, ()> = {
|
|
|
|
let mut map = smallmap::Map::new();
|
|
|
|
for &chr in DEFAULT_BOUNDARIES.iter() {
|
|
|
|
map.insert(chr, ());
|
|
|
|
}
|
|
|
|
map
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline] pub fn is_sentance_boundary(chr: char) -> bool
|
|
|
|
{
|
|
|
|
BOUNDARIES.contains_key(&chr)
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Sentance
|
|
|
|
{
|
|
|
|
/// Create a new word reference without checking for sentance boundaries
|
|
|
|
pub unsafe fn new_unchecked<'a>(from: &'a str) -> &'a Self
|
|
|
|
{
|
|
|
|
std::mem::transmute(from)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a single sentance
|
|
|
|
pub fn single<'a>(from: &'a (impl AsRef<str> + 'a + ?Sized)) -> Result<&'a Self, SentanceError>
|
|
|
|
{
|
|
|
|
let from = from.as_ref();
|
|
|
|
match from.find(is_sentance_boundary) {
|
|
|
|
Some(_) => Err(SentanceError),
|
|
|
|
_ => Ok(new!(from)),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new section of sentances from this string
|
|
|
|
#[inline] pub fn new<'a>(from: &'a (impl AsRef<str> + 'a + ?Sized)) -> Vec<&'a Self>
|
|
|
|
{
|
|
|
|
Self::new_iter(from)
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new iterator over sentances from this string.
|
|
|
|
pub fn new_iter<'a>(from: &'a (impl AsRef<str> +'a + ?Sized)) -> impl Iterator<Item = &'a Self> + Clone
|
|
|
|
{
|
|
|
|
let from = from.as_ref();
|
|
|
|
from.split_inclusive(is_sentance_boundary)
|
|
|
|
.map(|x| new!(x.trim()))
|
|
|
|
.filter(|x| !x.is_empty())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the words in this sentance
|
|
|
|
#[inline] pub fn words(&self) -> impl Iterator<Item = &'_ Word>
|
|
|
|
{
|
|
|
|
Word::new_iter(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> From<&'a str> for &'a Sentance
|
|
|
|
{
|
|
|
|
fn from(from: &'a str) -> Self
|
|
|
|
{
|
|
|
|
new!(from)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsRef<str> for Sentance
|
|
|
|
{
|
|
|
|
fn as_ref(&self) -> &str
|
|
|
|
{
|
|
|
|
&self.0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsRef<Sentance> for str
|
|
|
|
{
|
|
|
|
fn as_ref(&self) -> &Sentance
|
|
|
|
{
|
|
|
|
new!(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Borrow<Sentance> for String
|
|
|
|
{
|
|
|
|
fn borrow(&self) -> &Sentance {
|
|
|
|
new!(&self[..])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ToOwned for Sentance
|
|
|
|
{
|
|
|
|
type Owned = String;
|
|
|
|
fn to_owned(&self) -> Self::Owned {
|
|
|
|
self.0.to_owned()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Deref for Sentance
|
|
|
|
{
|
|
|
|
type Target = str;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
|
|
&self.0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl DerefMut for Sentance
|
|
|
|
{
|
|
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
|
|
&mut self.0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsRef<Sentance> for Sentance
|
|
|
|
{
|
|
|
|
#[inline] fn as_ref(&self) -> &Sentance
|
|
|
|
{
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|