You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
lolistealer/src/tags.rs

329 lines
6.8 KiB

use std::{
cmp::PartialEq,
fmt,
mem,
collections::HashSet,
str,
};
/// Tag matching rules
#[derive(Clone,Debug,PartialEq,Eq,Hash)]
pub enum Rule
{
/// This tag /must/ be present.
Required,
/// At least one tag marked `optional` must be present.
Optional,
/// This tag /must not/ be present.
Rejected,
}
/// Represents a tag
#[derive(Clone,Debug,PartialEq,Eq,Hash)]
pub struct Tag
{
rule: Rule,
strings: Vec<String>,
normal_idx: usize,
repr: RepresentationMode,
}
/// How should tags be interpreted?
#[derive(Debug,Clone,PartialEq,Eq,Hash)]
pub enum RepresentationMode
{
/// Ignores spaces, case, underscores, things in parenthesis, and special symbols.
Lenient,
/// Ignores spaces, case, and underscores.
Normal,
/// Ignores nothing
Strict,
}
impl Default for RepresentationMode
{
fn default() -> Self
{
Self::Strict
}
}
/// Remove consecutive whitespace, and normalise them to `' '`
fn remove_whitespace<T: IntoIterator<Item=char>>(input: T) -> String
{
let mut last = false;
input.into_iter()
.filter_map(|ch| {
if ch.is_whitespace() {
if last {
None
} else {
last = true;
Some(' ')
}
}
else {
last = false;
Some(ch)
}
})
.collect()
}
/// How big are the max output of `fuzz` likely to be?
const FUZZ_SIZE_HINT: usize = 7;
/// Creates representations
fn fuzz<S: Into<String>>(input: S, output: &mut Vec<String>, mode: &RepresentationMode) -> usize
{
let input = input.into();
match mode {
RepresentationMode::Strict => {
output.push(input);
0
},
RepresentationMode::Normal => {
output.push(input.to_lowercase());
{
let normal = remove_whitespace(input.chars());
output.push(normal.replace(" ", "_"));
output.push(normal.chars().filter(|ch| !ch.is_whitespace()).collect());
output.push(normal);
}
output.push(input);
output.len()-1
},
RepresentationMode::Lenient => {
const REMOVE: &[char] = &[
':',
'.',
',',
' ',
];
output.push(input.chars().filter(|ch| !REMOVE.contains(ch)).collect());
fuzz(input, output, mode)
},
}
}
impl Tag
{
/// Create a new tag representation
pub fn new<S: Into<String>>(tag: S, rule: Rule, mode: RepresentationMode) -> Self
{
let mut fz = Vec::with_capacity(FUZZ_SIZE_HINT);
let normal_idx = fuzz(tag, &mut fz, &mode);
Self {
rule,
strings: fz,
normal_idx,
repr: mode,
}
}
}
/// Returned when an empty tag is tried to be parsed
#[derive(Debug)]
pub struct ParseError;
impl std::error::Error for ParseError{}
impl fmt::Display for ParseError
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
write!(f, "cannot have empty tag rule")
}
}
impl str::FromStr for Tag
{
type Err = ParseError;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err>
{
let mut chars = s.chars();
let mut next = String::new();
let rule = match chars.next() {
Some('+') => Rule::Optional,
Some('-') => Rule::Rejected,
Some(x) => {
next.push(x);
Rule::Required
},
_ => return Err(ParseError),
};
let s = chars.as_str();
if s.len() == 0 {
return Err(ParseError);
}
Ok(Self::new(if next.len() > 0 {
next.push_str(s);
next
} else {s.into()}, rule, RepresentationMode::default()))
}
}
impl<T> PartialEq<T> for Tag
where T: AsRef<str>
{
fn eq(&self, other: &T) -> bool
{
let mut fz = Vec::with_capacity(FUZZ_SIZE_HINT);
fuzz(other.as_ref(), &mut fz, &self.repr);
for (i,j) in self.strings.iter().zip(fz.into_iter())
{
if i == j.as_str() {
return true;
}
}
false
}
}
impl From<Tag> for String
{
fn from(mut tag: Tag) -> Self
{
mem::replace(&mut tag.strings[tag.normal_idx], Default::default())
}
}
impl fmt::Display for Tag
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
write!(f, "{}", &self.strings[self.normal_idx])
}
}
#[derive(Debug)]
pub enum Error<'a>
{
Required(&'a Tag),
NoOptional,
Rejected(&'a Tag),
}
#[derive(Debug)]
pub enum OwnedError
{
Required(Tag),
NoOptional,
Rejected(Tag),
}
pub type Result<'a> = std::result::Result<Vec<&'a Tag>, Error<'a>>;
/// Search a set of strings for a matching ruleset for `tags`. Returns the matched tags on success
pub fn search<'a, T, U, V>(matches: T, tags: U) -> Result<'a>
where T: IntoIterator<Item=V>,
U: IntoIterator<Item=&'a Tag>,
V: AsRef<str>,
{
let tags: Vec<&'a Tag> = tags.into_iter().collect();
let mut output = Vec::with_capacity(tags.len());
let mut matched = HashSet::new();
for matches in matches.into_iter()
{
for tag in tags.iter() {
if *tag == &matches {
matched.insert(tag);
break;
}
}
}
let mut needs_opt = false;
let mut has_opt = false;
for tag in tags.iter()
{
if matched.contains(tag) {
match tag.rule {
Rule::Optional => {has_opt = true; needs_opt = true;},
Rule::Rejected => return Err(Error::Rejected(tag)),
_ => (),
}
output.push(tag.clone());
} else if tag.rule == Rule::Optional {
needs_opt = true;
} else if tag.rule == Rule::Required {
return Err(Error::Required(tag));
}
}
if needs_opt && !has_opt {
return Err(Error::NoOptional);
}
Ok(output)
}
/// Parse a string of many tag rules, ignoring empty or invalid entries
#[inline]
pub fn parse<T>(string: T) -> Vec<Tag>
where T: AsRef<str>
{
let string = string.as_ref();
parse_iter(string.split(" "))
}
/// Parse a collection of tag rules, ignoring empty or invalid entries
pub fn parse_iter<T,I>(from: I) -> Vec<Tag>
where I: IntoIterator<Item=T>,
T: AsRef<str>
{
from.into_iter()
.filter_map(|s| {
if let Ok(tag) = s.as_ref().parse() {
Some(tag)
} else {
None
}
}).collect()
}
impl fmt::Display for Error<'_>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
match self {
Self::NoOptional => write!(f, "there were no optional tags, at least one is needed"),
Self::Rejected(tag) => write!(f, "a rejected tag was present: {}", tag),
Self::Required(tag) => write!(f, "a required tag was not present: {}", tag),
}
}
}
impl fmt::Display for OwnedError
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
match self {
Self::NoOptional => write!(f, "there were no optional tags, at least one is needed"),
Self::Rejected(tag) => write!(f, "a rejected tag was present: {}", tag),
Self::Required(tag) => write!(f, "a required tag was not present: {}", tag),
}
}
}
impl std::error::Error for Error<'_>{}
impl std::error::Error for OwnedError{}
impl From<Error<'_>> for OwnedError
{
fn from(er: Error<'_>) -> Self
{
match er {
Error::NoOptional => Self::NoOptional,
Error::Rejected(t) => Self::Rejected(t.to_owned()),
Error::Required(t) => Self::Required(t.to_owned()),
}
}
}