From 860b4275847a959f70654a1c8893b9fc93baaacc Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 26 Jul 2020 06:18:57 +0100 Subject: [PATCH] tag filter --- Cargo.toml | 2 +- src/args.rs | 12 +- src/config.rs | 4 +- src/error.rs | 21 +++ src/main.rs | 2 + src/tags.rs | 328 ++++++++++++++++++++++++++++++++++++++++++++++ src/work_async.rs | 34 +++-- 7 files changed, 390 insertions(+), 13 deletions(-) create mode 100644 src/tags.rs diff --git a/Cargo.toml b/Cargo.toml index 964c4ab..a837211 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lolistealer" -version = "0.1.1" +version = "1.0.0" authors = ["Avril "] edition = "2018" diff --git a/src/args.rs b/src/args.rs index 6ee8015..9bd5dee 100644 --- a/src/args.rs +++ b/src/args.rs @@ -17,8 +17,14 @@ lazy_static! { /// Print usage then exit with code `1` pub fn usage() -> ! { - println!("Usage: {} [--rating ] []", &PROGRAM_NAME[..]); + println!("lolistealer version {}\n", env!("CARGO_PKG_VERSION")); + println!("Usage: {} [--rating ] [--tags ][]", &PROGRAM_NAME[..]); println!("Usage: {} --help", &PROGRAM_NAME[..]); + + println!("Filter expression:"); + println!(" tag_name\tMust contain this tag"); + println!(" -tag_name\tMust not contains this tag"); + println!(" +tag_name\tMust contains at least one tag prepended with `+`"); std::process::exit(1) } @@ -57,6 +63,7 @@ where I: IntoIterator let mut paths = Vec::new(); let mut one = String::default(); let mut reading = true; + let mut tags = Vec::new(); macro_rules! take_one { () => { @@ -78,6 +85,7 @@ where I: IntoIterator "-" => reading = false, "--help" => return Ok(Mode::Help), "--rating" if take_one!() => rating = one.parse::()?, + "--tags" if take_one!() => tags = tags::parse(&one), _ => paths.push(try_dir(arg)?), } } else { @@ -89,7 +97,7 @@ where I: IntoIterator return Err(Error::NoOutput); } - Ok(Mode::Normal(config::Config{rating, output: paths})) + Ok(Mode::Normal(config::Config{rating, output: paths, tags})) } #[derive(Debug)] diff --git a/src/config.rs b/src/config.rs index 9924c35..3e4fbb9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -35,7 +35,8 @@ pub enum OutputType pub struct Config { pub rating: Rating, - pub output: Vec + pub output: Vec, + pub tags: Vec, } impl Default for Rating @@ -52,6 +53,7 @@ impl Default for Config Self { rating: Rating::default(), output: Vec::new(), + tags: Vec::new(), } } } diff --git a/src/error.rs b/src/error.rs index 891ebd9..25efda4 100644 --- a/src/error.rs +++ b/src/error.rs @@ -6,6 +6,7 @@ use std::{ use super::{ tempfile, loli, + tags, }; #[derive(Debug)] @@ -16,7 +17,9 @@ pub enum Error HTTP(reqwest::Error), HTTPStatus(reqwest::StatusCode), TempFile(tempfile::error::Error), + Tags(tags::OwnedError), Loli(loli::error::Error), + ChildPanic } impl error::Error for Error @@ -41,6 +44,8 @@ impl fmt::Display for Error Error::HTTP(http) => write!(f, "http internal error: {}", http), Error::HTTPStatus(status) => write!(f, "response returned status code {}", status), Error::Loli(loli) => write!(f, "loli interpretation: {}", loli), + Error::Tags(tags) => write!(f, "no match for tags: {}", tags), + Error::ChildPanic => write!(f, "child panic"), _ => write!(f, "unknown error"), } } @@ -88,3 +93,19 @@ impl From for Error Self::Loli(er.into()) } } + +impl From> for Error +{ + fn from(er: tags::Error<'_>) -> Self + { + Self::Tags(er.into()) + } +} + +impl From for Error +{ + fn from(er: tags::OwnedError) -> Self + { + Self::Tags(er) + } +} diff --git a/src/main.rs b/src/main.rs index 2e86682..b3dcc4a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ #![feature(linked_list_remove)] #![feature(label_break_value)] +#![feature(never_type)] #![allow(dead_code)] @@ -17,6 +18,7 @@ mod url; mod error; mod tempfile; mod loli; +mod tags; #[cfg(feature="async")] mod work_async; diff --git a/src/tags.rs b/src/tags.rs new file mode 100644 index 0000000..2f50542 --- /dev/null +++ b/src/tags.rs @@ -0,0 +1,328 @@ +use std::{ + cmp::PartialEq, + fmt, + mem, + collections::HashSet, + str, +}; + +/// Tag matching rules +#[derive(Clone,Debug,PartialEq,Eq,Hash)] +pub enum Rule +{ + /// This tag /must/ be present. + Required, + /// At least one tag marked `optional` must be present. + Optional, + /// This tag /must not/ be present. + Rejected, +} + +/// Represents a tag +#[derive(Clone,Debug,PartialEq,Eq,Hash)] +pub struct Tag +{ + rule: Rule, + strings: Vec, + normal_idx: usize, + repr: RepresentationMode, +} + +/// How should tags be interpreted? +#[derive(Debug,Clone,PartialEq,Eq,Hash)] +pub enum RepresentationMode +{ + /// Ignores spaces, case, underscores, things in parenthesis, and special symbols. + Lenient, + /// Ignores spaces, case, and underscores. + Normal, + /// Ignores nothing + Strict, +} + +impl Default for RepresentationMode +{ + fn default() -> Self + { + Self::Strict + } +} + +/// Remove consecutive whitespace, and normalise them to `' '` +fn remove_whitespace>(input: T) -> String +{ + let mut last = false; + input.into_iter() + .filter_map(|ch| { + if ch.is_whitespace() { + if last { + None + } else { + last = true; + Some(' ') + } + } + else { + last = false; + Some(ch) + } + }) + .collect() +} + +/// How big are the max output of `fuzz` likely to be? +const FUZZ_SIZE_HINT: usize = 7; +/// Creates representations +fn fuzz>(input: S, output: &mut Vec, mode: &RepresentationMode) -> usize +{ + let input = input.into(); + + match mode { + RepresentationMode::Strict => { + output.push(input); + 0 + }, + RepresentationMode::Normal => { + output.push(input.to_lowercase()); + { + let normal = remove_whitespace(input.chars()); + + output.push(normal.replace(" ", "_")); + output.push(normal.chars().filter(|ch| !ch.is_whitespace()).collect()); + output.push(normal); + } + output.push(input); + output.len()-1 + }, + RepresentationMode::Lenient => { + const REMOVE: &[char] = &[ + ':', + '.', + ',', + ' ', + ]; + output.push(input.chars().filter(|ch| !REMOVE.contains(ch)).collect()); + fuzz(input, output, mode) + }, + } +} + +impl Tag +{ + + /// Create a new tag representation + pub fn new>(tag: S, rule: Rule, mode: RepresentationMode) -> Self + { + let mut fz = Vec::with_capacity(FUZZ_SIZE_HINT); + let normal_idx = fuzz(tag, &mut fz, &mode); + Self { + rule, + strings: fz, + normal_idx, + repr: mode, + } + } +} + +/// Returned when an empty tag is tried to be parsed +#[derive(Debug)] +pub struct ParseError; + +impl std::error::Error for ParseError{} + +impl fmt::Display for ParseError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "cannot have empty tag rule") + } +} + +impl str::FromStr for Tag +{ + type Err = ParseError; + fn from_str(s: &str) -> std::result::Result + { + let mut chars = s.chars(); + let mut next = String::new(); + let rule = match chars.next() { + Some('+') => Rule::Optional, + Some('-') => Rule::Rejected, + Some(x) => { + next.push(x); + Rule::Required + }, + _ => return Err(ParseError), + }; + let s = chars.as_str(); + if s.len() == 0 { + return Err(ParseError); + } + + Ok(Self::new(if next.len() > 0 { + next.push_str(s); + next + } else {s.into()}, rule, RepresentationMode::default())) + } +} + +impl PartialEq for Tag +where T: AsRef +{ + fn eq(&self, other: &T) -> bool + { + let mut fz = Vec::with_capacity(FUZZ_SIZE_HINT); + fuzz(other.as_ref(), &mut fz, &self.repr); + for (i,j) in self.strings.iter().zip(fz.into_iter()) + { + if i == j.as_str() { + return true; + } + } + false + } +} + +impl From for String +{ + fn from(mut tag: Tag) -> Self + { + mem::replace(&mut tag.strings[tag.normal_idx], Default::default()) + } +} + +impl fmt::Display for Tag +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "{}", &self.strings[self.normal_idx]) + } +} + +#[derive(Debug)] +pub enum Error<'a> +{ + Required(&'a Tag), + NoOptional, + Rejected(&'a Tag), +} + +#[derive(Debug)] +pub enum OwnedError +{ + Required(Tag), + NoOptional, + Rejected(Tag), +} + +pub type Result<'a> = std::result::Result, Error<'a>>; + +/// Search a set of strings for a matching ruleset for `tags`. Returns the matched tags on success +pub fn search<'a, T, U, V>(matches: T, tags: U) -> Result<'a> +where T: IntoIterator, + U: IntoIterator, + V: AsRef, +{ + let tags: Vec<&'a Tag> = tags.into_iter().collect(); + let mut output = Vec::with_capacity(tags.len()); + + let mut matched = HashSet::new(); + for matches in matches.into_iter() + { + for tag in tags.iter() { + if *tag == &matches { + matched.insert(tag); + break; + } + } + } + + let mut needs_opt = false; + let mut has_opt = false; + for tag in tags.iter() + { + if matched.contains(tag) { + match tag.rule { + Rule::Optional => {has_opt = true; needs_opt = true;}, + Rule::Rejected => return Err(Error::Rejected(tag)), + _ => (), + } + + output.push(tag.clone()); + } else if tag.rule == Rule::Optional { + needs_opt = true; + } else if tag.rule == Rule::Required { + return Err(Error::Required(tag)); + } + } + + if needs_opt && !has_opt { + return Err(Error::NoOptional); + } + + Ok(output) +} + +/// Parse a string of many tag rules, ignoring empty or invalid entries +#[inline] +pub fn parse(string: T) -> Vec +where T: AsRef +{ + let string = string.as_ref(); + parse_iter(string.split(" ")) +} + +/// Parse a collection of tag rules, ignoring empty or invalid entries +pub fn parse_iter(from: I) -> Vec +where I: IntoIterator, + T: AsRef +{ + from.into_iter() + .filter_map(|s| { + if let Ok(tag) = s.as_ref().parse() { + Some(tag) + } else { + None + } + }).collect() +} + +impl fmt::Display for Error<'_> +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + match self { + Self::NoOptional => write!(f, "there were no optional tags, at least one is needed"), + Self::Rejected(tag) => write!(f, "a rejected tag was present: {}", tag), + Self::Required(tag) => write!(f, "a required tag was not present: {}", tag), + } + } +} + + +impl fmt::Display for OwnedError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + match self { + Self::NoOptional => write!(f, "there were no optional tags, at least one is needed"), + Self::Rejected(tag) => write!(f, "a rejected tag was present: {}", tag), + Self::Required(tag) => write!(f, "a required tag was not present: {}", tag), + } + } +} + +impl std::error::Error for Error<'_>{} +impl std::error::Error for OwnedError{} + +impl From> for OwnedError +{ + fn from(er: Error<'_>) -> Self + { + match er { + Error::NoOptional => Self::NoOptional, + Error::Rejected(t) => Self::Rejected(t.to_owned()), + Error::Required(t) => Self::Required(t.to_owned()), + } + } +} diff --git a/src/work_async.rs b/src/work_async.rs index 194543c..126302a 100644 --- a/src/work_async.rs +++ b/src/work_async.rs @@ -15,7 +15,7 @@ mod tasklist; mod progress; /// Decode a loli from path -pub async fn decode(from: impl AsRef, to: impl AsRef, progress: &mut progress::CommandSender) -> Result +pub async fn decode(from: impl AsRef, to: impl AsRef, tags: impl AsRef<[tags::Tag]>, progress: &mut progress::CommandSender) -> Result { prog_send!(progress.println("Mapping child")); let base = loli::BasedLoli::map(from)?; @@ -26,7 +26,15 @@ pub async fn decode(from: impl AsRef, to: impl AsRef, progress: &mut //Find extension let mut decoded = bounds.create_child(to.as_ref().with_extension(bounds.image().ext()))?; - prog_send!(progress.println("Decoding...")); + + let tags=tags.as_ref(); + if tags.len() > 0 { + let res = tags::search(decoded.tags().iter(), tags)?; + if res.len() > 0 { + prog_send!(progress.println(format!("Matched tags {}", res.into_iter().map(|x| format!("{}", x)).join(", ")))); + } + } + prog_send!(progress.println("Decoding...")); let sz = bounds.decode(&mut decoded)?; prog_send!(link progress.println(format!("Decode complete ({} bytes)", sz))); @@ -92,6 +100,7 @@ pub async fn work(conf: config::Config) -> Result<(), Box { let url = url::parse(&rating); let mut prog = prog_writer.clone_with(format!("-> {:?}", path)); + let tags = conf.tags.clone(); children.push(tokio::task::spawn(async move { prog.println(format!("Starting download ({})...", url)).await.expect("fatal"); @@ -107,19 +116,19 @@ pub async fn work(conf: config::Config) -> Result<(), Box config::OutputType::File(file) => file, config::OutputType::Directory(dir) => unimplemented!(), //TODO: implement get hash to file }; - let loli = match decode(&temp, &path, &mut prog).await { + let loli = match decode(&temp, &path, &tags, &mut prog).await { Ok(v) => v, Err(e) => { prog_send!(link prog.println(format!("Failed decoding: {}", e))); - break 'clean false; + break 'clean Some(e); }, }; prog_send!(link prog.println(format!("{:?} Complete", loli))); - break 'clean true; + break 'clean None; }, }; - false + Some(error::Error::Unknown) }; prog_send!(link prog.pop_task(task)); @@ -130,21 +139,28 @@ pub async fn work(conf: config::Config) -> Result<(), Box prog_send!(link prog_writer.println("Children working...")); let mut done =0; let total = children.len(); + let mut failures = Vec::with_capacity(children.len()); for child in children.into_iter() { match child.await { - Ok(true) => done+=1, + Ok(None) => done+=1, + Ok(Some(err)) => failures.push(err), Err(err) => { prog_send!(try link unwind prog_writer.println(format!("Child panic: {}", err))); + failures.push(error::Error::ChildPanic); }, - _ => (), } } prog_send!(link prog_writer.set_title("")); prog_send!(try link prog_writer.kill()); prog.await.expect("mpsc fatal"); println!("Completed {} / {} lolis ({} failed).", done, total, total-done); - + if failures.len() > 0 { + println!("Reasons for failure(s):"); + for failure in failures.into_iter() { + println!("\t{}", failure); + } + } Ok(()) }