use std::{ cmp::PartialEq, fmt, mem, collections::HashSet, str, }; /// Tag matching rules #[derive(Clone,Debug,PartialEq,Eq,Hash)] pub enum Rule { /// This tag /must/ be present. Required, /// At least one tag marked `optional` must be present. Optional, /// This tag /must not/ be present. Rejected, } /// Represents a tag #[derive(Clone,Debug,PartialEq,Eq,Hash)] pub struct Tag { rule: Rule, strings: Vec, normal_idx: usize, repr: RepresentationMode, } /// How should tags be interpreted? #[derive(Debug,Clone,PartialEq,Eq,Hash)] pub enum RepresentationMode { /// Ignores spaces, case, underscores, things in parenthesis, and special symbols. Lenient, /// Ignores spaces, case, and underscores. Normal, /// Ignores nothing Strict, } impl Default for RepresentationMode { fn default() -> Self { Self::Strict } } /// Remove consecutive whitespace, and normalise them to `' '` fn remove_whitespace>(input: T) -> String { let mut last = false; input.into_iter() .filter_map(|ch| { if ch.is_whitespace() { if last { None } else { last = true; Some(' ') } } else { last = false; Some(ch) } }) .collect() } /// How big are the max output of `fuzz` likely to be? const FUZZ_SIZE_HINT: usize = 7; /// Creates representations fn fuzz>(input: S, output: &mut Vec, mode: &RepresentationMode) -> usize { let input = input.into(); match mode { RepresentationMode::Strict => { output.push(input); 0 }, RepresentationMode::Normal => { output.push(input.to_lowercase()); { let normal = remove_whitespace(input.chars()); output.push(normal.replace(" ", "_")); output.push(normal.chars().filter(|ch| !ch.is_whitespace()).collect()); output.push(normal); } output.push(input); output.len()-1 }, RepresentationMode::Lenient => { const REMOVE: &[char] = &[ ':', '.', ',', ' ', ]; output.push(input.chars().filter(|ch| !REMOVE.contains(ch)).collect()); fuzz(input, output, mode) }, } } impl Tag { /// Create a new tag representation pub fn new>(tag: S, rule: Rule, mode: RepresentationMode) -> Self { let mut fz = Vec::with_capacity(FUZZ_SIZE_HINT); let normal_idx = fuzz(tag, &mut fz, &mode); Self { rule, strings: fz, normal_idx, repr: mode, } } } /// Returned when an empty tag is tried to be parsed #[derive(Debug)] pub struct ParseError; impl std::error::Error for ParseError{} impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "cannot have empty tag rule") } } impl str::FromStr for Tag { type Err = ParseError; fn from_str(s: &str) -> std::result::Result { let mut chars = s.chars(); let mut next = String::new(); let rule = match chars.next() { Some('+') => Rule::Optional, Some('-') => Rule::Rejected, Some(x) => { next.push(x); Rule::Required }, _ => return Err(ParseError), }; let s = chars.as_str(); if s.len() == 0 { return Err(ParseError); } Ok(Self::new(if next.len() > 0 { next.push_str(s); next } else {s.into()}, rule, RepresentationMode::default())) } } impl PartialEq for Tag where T: AsRef { fn eq(&self, other: &T) -> bool { let mut fz = Vec::with_capacity(FUZZ_SIZE_HINT); fuzz(other.as_ref(), &mut fz, &self.repr); for (i,j) in self.strings.iter().zip(fz.into_iter()) { if i == j.as_str() { return true; } } false } } impl From for String { fn from(mut tag: Tag) -> Self { mem::replace(&mut tag.strings[tag.normal_idx], Default::default()) } } impl fmt::Display for Tag { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", &self.strings[self.normal_idx]) } } #[derive(Debug)] pub enum Error<'a> { Required(&'a Tag), NoOptional, Rejected(&'a Tag), } #[derive(Debug)] pub enum OwnedError { Required(Tag), NoOptional, Rejected(Tag), } pub type Result<'a> = std::result::Result, Error<'a>>; /// Search a set of strings for a matching ruleset for `tags`. Returns the matched tags on success pub fn search<'a, T, U, V>(matches: T, tags: U) -> Result<'a> where T: IntoIterator, U: IntoIterator, V: AsRef, { let tags: Vec<&'a Tag> = tags.into_iter().collect(); let mut output = Vec::with_capacity(tags.len()); let mut matched = HashSet::new(); for matches in matches.into_iter() { for tag in tags.iter() { if *tag == &matches { matched.insert(tag); break; } } } let mut needs_opt = false; let mut has_opt = false; for tag in tags.iter() { if matched.contains(tag) { match tag.rule { Rule::Optional => {has_opt = true; needs_opt = true;}, Rule::Rejected => return Err(Error::Rejected(tag)), _ => (), } output.push(tag.clone()); } else if tag.rule == Rule::Optional { needs_opt = true; } else if tag.rule == Rule::Required { return Err(Error::Required(tag)); } } if needs_opt && !has_opt { return Err(Error::NoOptional); } Ok(output) } /// Parse a string of many tag rules, ignoring empty or invalid entries #[inline] pub fn parse(string: T) -> Vec where T: AsRef { let string = string.as_ref(); parse_iter(string.split(" ")) } /// Parse a collection of tag rules, ignoring empty or invalid entries pub fn parse_iter(from: I) -> Vec where I: IntoIterator, T: AsRef { from.into_iter() .filter_map(|s| { if let Ok(tag) = s.as_ref().parse() { Some(tag) } else { None } }).collect() } impl fmt::Display for Error<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::NoOptional => write!(f, "there were no optional tags, at least one is needed"), Self::Rejected(tag) => write!(f, "a rejected tag was present: {}", tag), Self::Required(tag) => write!(f, "a required tag was not present: {}", tag), } } } impl fmt::Display for OwnedError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::NoOptional => write!(f, "there were no optional tags, at least one is needed"), Self::Rejected(tag) => write!(f, "a rejected tag was present: {}", tag), Self::Required(tag) => write!(f, "a required tag was not present: {}", tag), } } } impl std::error::Error for Error<'_>{} impl std::error::Error for OwnedError{} impl From> for OwnedError { fn from(er: Error<'_>) -> Self { match er { Error::NoOptional => Self::NoOptional, Error::Rejected(t) => Self::Rejected(t.to_owned()), Error::Required(t) => Self::Required(t.to_owned()), } } }