You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
genmarkov/src/sanitise/filter.rs

278 lines
5.2 KiB

//! Filter out characters and such
use smallmap::Map as SmallMap;
use std::{
borrow::Cow,
fmt,
iter::{
self,
FromIterator,
},
str,
};
use once_cell::sync::OnceCell;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Filter(SmallMap<char, ()>);
/*
impl<const N: usize> From<[char; N]> for Filter
{
fn from(from: [char; N]) -> Self
{
let mut map = SmallMap::with_capacity(1 + (N / 256));
for &chr in from.iter()
{
map.insert(chr, ());
}
Self(map)
}
}*/
impl<'a> From<&'a [char]> for Filter
{
fn from(from: &'a [char]) -> Self
{
let mut map = SmallMap::new();
for &chr in from.iter()
{
map.insert(chr, ());
}
Self(map)
}
}
impl<'a> From<&'a str> for Filter
{
fn from(from: &'a str) -> Self
{
let mut output = Self::new();
output.insert(from.chars());
output
}
}
impl str::FromStr for Filter
{
type Err = std::convert::Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self::from(s))
}
}
impl fmt::Display for Filter
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
use std::fmt::Write;
for chr in self.iter()
{
f.write_char(chr)?;
}
Ok(())
}
}
pub struct FilterKeyIter<'a>(smallmap::iter::Iter<'a, char, ()>, usize);
impl<'a> Iterator for FilterKeyIter<'a>
{
type Item = char;
fn next(&mut self) -> Option<Self::Item>
{
self.0.next().map(|&(x, _)| x)
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.1, Some(self.1))
}
}
impl<'a> iter::FusedIterator for FilterKeyIter<'a>{}
impl<'a> iter::ExactSizeIterator for FilterKeyIter<'a>{}
impl Filter
{
pub fn new() -> Self
{
Self(SmallMap::new())
}
pub fn insert<I: IntoIterator<Item=char>>(&mut self, from: I)
{
for from in from.into_iter()
{
self.0.insert(from, ());
}
}
pub fn remove<I: IntoIterator<Item=char>>(&mut self, from: I)
{
for from in from.into_iter()
{
self.0.remove(&from);
}
}
pub fn len(&self) -> usize
{
self.0.len()
}
pub fn is_empty(&self) -> bool
{
//TODO: impl this in smallmap itself
self.len() == 0
}
pub fn iter(&self) -> FilterKeyIter<'_> //impl Iterator<Item=char> + '_
{
//self.0.iter()
// .copied()
// .map(|(x, _)| x)
FilterKeyIter(self.0.iter(), self.0.len())
}
/// Should this character be filtered?
#[inline] pub fn check(&self, chr: char) -> bool
{
self.0.get(&chr).is_some()
}
pub fn filter_owned(&self, input: impl Into<String>) -> String
{
let mut input = input.into();
self.filter(&mut input);
input
}
pub fn filter<'a>(&self, output: &'a mut String) -> &'a mut String
{
if self.is_empty() {
return output;
}
output.retain(|chr| !self.check(chr));
output
}
pub fn filter_iter<'a, I: IntoIterator<Item=char>>(&'a self, from_iter: I) -> FilterIter<'a, I::IntoIter>
where I::IntoIter: 'a
{
FilterIter(&self, from_iter.into_iter().fuse())
}
pub fn filter_cow<'a>(&self, string: &'a (impl AsRef<str> + 'a + ?Sized)) -> Cow<'a, str>
{
let string = string.as_ref();
if self.is_empty() {
return Cow::Borrowed(string);
}
let mut output = Cow::Borrowed(string);
let mut i=0;
for chr in string.chars()
{
if self.check(chr) {
output.to_mut().remove(i);
} else {
i+=1;
}
}
output
}
pub fn filter_str<'a, T: AsRef<str>+'a +?Sized>(&'a self, string: &'a T) -> FilterStr<'a>
{
FilterStr(string.as_ref(), self, OnceCell::new())
}
}
impl FromIterator<char> for Filter
{
fn from_iter<I: IntoIterator<Item=char>>(iter: I) -> Self
{
let mut output= Self::new();
output.insert(iter);
output
}
}
impl<'a> FilterStr<'a>
{
pub fn as_str(&self) -> &str
{
fn fmt(this: &FilterStr<'_>) -> String
{
let chars = this.0.chars();
let mut f: String = crate::util::hint_cap(&chars);
for chr in chars {
if !this.1.check(chr) {
f.push(chr);
}
}
f
}
&self.2.get_or_init(|| fmt(&self))[..]
}
}
pub struct FilterStr<'a>(&'a str, &'a Filter, OnceCell<String>);
impl<'a> fmt::Display for FilterStr<'a>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
write!(f, "{}", self.as_str())
}
}
impl<'a> FilterStr<'a>
{
pub fn filter(&self) -> &Filter
{
&self.1
}
}
pub struct FilterIter<'a, I>(&'a Filter, iter::Fuse<I>);
impl<'a, I: Iterator<Item=char>> Iterator for FilterIter<'a, I>
{
type Item = char;
fn next(&mut self) -> Option<Self::Item>
{
loop {
break match self.1.next() {
Some(chr) if !self.0.check(chr) => Some(chr),
None => None,
_ => continue,
}
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let (_, high) = self.1.size_hint();
(0, high)
}
}
impl<'a, I> FilterIter<'a, I>
{
pub fn filter(&self) -> &Filter
{
self.0
}
}
impl<'a, I: Iterator<Item=char>> iter::FusedIterator for FilterIter<'a, I>{}
#[cfg(test)]
mod tests
{
use super::*;
#[test]
fn filter_cow()
{
let filter: Filter = " hi".chars().collect();
let string = "abcdef ghi jk1\nhian";
assert_eq!(filter.filter_str(&string).to_string(), filter.filter_cow(&string).to_string());
assert_eq!(filter.filter_cow(&string).to_string(), filter.filter(string.chars()).collect::<String>());
}
}