From 77809f6fafbb6343041ccbabaf568bfe9524b4c0 Mon Sep 17 00:00:00 2001 From: Avril Date: Sat, 25 Jul 2020 22:55:03 +0100 Subject: [PATCH] now gets tags from page --- Cargo.toml | 4 +- TODO | 5 +- src/list.rs | 116 +++++++++++++++++++++++++++++++++++++++++++ src/loli/encoding.rs | 26 +++++++++- src/loli/mod.rs | 26 ++++++++-- src/main.rs | 3 ++ 6 files changed, 171 insertions(+), 9 deletions(-) create mode 100644 src/list.rs diff --git a/Cargo.toml b/Cargo.toml index 62aafbf..964c4ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lolistealer" -version = "0.1.0" +version = "0.1.1" authors = ["Avril "] edition = "2018" @@ -17,7 +17,7 @@ codegen-units = 1 [dependencies] termprogress = "0.1.0" lazy_static = "1.4" -tokio = {version = "0.2", features= ["full"], optional=true} +tokio = {version = "0.2", features= ["rt-threaded", "io-driver", "io-util", "macros", "fs"], optional=true} reqwest = {version = "0.10", features= ["stream"]} memmap = "0.7" getrandom = "0.1" diff --git a/TODO b/TODO index 3c90a32..b2b00eb 100644 --- a/TODO +++ b/TODO @@ -1,9 +1,8 @@ +Tags filtering before decode + implement output to dir Remove Cargo.lock when termprogress is uploaded and Cargo.toml is properly formatted New UI: lolistealer --number 5 output-dir/ - - -!!! memmap slice size is incorrect on release build diff --git a/src/list.rs b/src/list.rs new file mode 100644 index 0000000..fee4c3d --- /dev/null +++ b/src/list.rs @@ -0,0 +1,116 @@ +use std::{ + mem, +}; +struct Cons +{ + item: T, + next: Option>>, +} + +/// A safe linked list, with remove() for non-nightly build. +pub struct LinkedList +{ + head: Option>, + sz: usize, +} + +pub struct Iter<'a, T> +{ + current: Option<&'a Cons>, +} + +pub struct IterMut<'a, T> +{ + current: Option<&'a mut Cons>, +} + +pub struct IntoIter +{ + current: Option>>, +} + +impl Default for LinkedList +{ + #[inline] + fn default() -> Self + { + Self::new() + } +} + +impl LinkedList +{ + pub const fn new() -> Self + { + Self { head: None, sz: 0 } + } + + pub fn push(&mut self, value: T) + { + if let Some(head) = &mut self.head + { + head.next = Some(Box::new(mem::replace(head, Cons{ item: value, next: None }))); + self.sz+=1; + } else { + self.head = Some(Cons{item: value, next: None}); + self.sz=1; + } + } + + #[inline] + pub fn len(&self) -> usize + { + self.sz + } + + pub fn pop(&mut self) -> Option + { + if let Some(_) = self.head + { + let old = mem::replace(&mut self.head, None).unwrap(); + self.head = old.next.map(|x| *x); + self.sz-=1; + Some(old.item) + } else { + None + } + } + + pub fn clear(&mut self) + { + self.head = None; + self.sz=0; + } + + pub fn iter<'a>(&'a self) -> Iter<'a, T> + { + Iter{ current: self.head.as_ref() } + } + + pub fn iter_mut<'a>(&'a mut self) -> IterMut<'a, T> + { + IterMut{ current: self.head.as_mut() } + } + + pub fn remove(&mut self, at: usize) -> T + { + assert!(at < self.sz, "Cannot remove at element > than size"); + + todo!() + //TODO: Implement through iter_mut() + } +} + +impl<'a, T> Iterator for Iter<'a, T> +{ + type Item = &'a T; + fn next(&mut self) -> Option + { + if self.current.is_some() { + let mut current = mem::replace(&mut self.current, None); + let mut nref = current.unwrap().next.as_ref().map(|x| x.as_ref()) + } else { + None + } + } +} diff --git a/src/loli/encoding.rs b/src/loli/encoding.rs index 65a82df..7a2320c 100644 --- a/src/loli/encoding.rs +++ b/src/loli/encoding.rs @@ -93,6 +93,9 @@ fn find_back(haystack: &[u8], needle: &[u8]) -> Option { const MARKER_BASE64_BEGIN: &[u8] = b"base64,"; const MARKER_BASE64_END: &[u8] = b"' />"; +const MARKER_TAGS_END: &[u8] = br"

) -> Result, error::DecodeError> { @@ -107,7 +110,28 @@ pub(super) fn find_bounds(from: impl AsRef<[u8]>) -> Result, error: end: end + start, }) } else { - //println!("WAAAAAAH: {:?}", std::str::from_utf8(&from[start..])); + Err(error::DecodeError::Bounds(error::Bound::End)) + } + } else { + Err(error::DecodeError::Bounds(error::Bound::Start)) + } +} + +/// Find the tag bounds in this array +// We should pass this a slice from `base64bounds.end..` +pub(super) fn find_tag_bounds(from: impl AsRef<[u8]>) -> Result, error::DecodeError> +{ + let from = from.as_ref(); + + if let Some(start) = find(from, MARKER_TAGS_BEGIN) { + let start = start + MARKER_TAGS_BEGIN.len(); + if let Some(end) = find_back(&from[start..], MARKER_TAGS_END) {//find_back(from, MARKER_TAGS_END) { + let end = end - MARKER_TAGS_END.len(); + Ok(Range { + start, + end: end + start, + }) + } else { Err(error::DecodeError::Bounds(error::Bound::End)) } } else { diff --git a/src/loli/mod.rs b/src/loli/mod.rs index 3c67050..1e38f9b 100644 --- a/src/loli/mod.rs +++ b/src/loli/mod.rs @@ -77,10 +77,12 @@ impl BasedLoli { let bound = encoding::find_bounds(self.as_ref())?; let image_type = self.try_get_type(bound.start)?; + let tags_range = encoding::find_tag_bounds(&self.map[bound.end..]).ok(); Ok(LoliBounds { loli: self, image_type, range:bound, + tags_range, }) } @@ -143,15 +145,23 @@ impl<'a> LoliBounds<'a> image_type, map: unsafe { MmapOptions::new().map_mut(&file).map_err(|e| error::DecodeError::Map(e, path.into()))? }, file: file, + tags: if let Some(range) = &self.tags_range { + let bytes = self.loli.bytes(); + let range = &bytes[(self.range.end+range.start)..(self.range.end+range.end)]; + if let Ok(string) = std::str::from_utf8(range) { + string.split(' ').map(|x| x.to_owned()).collect() + } else { + Vec::default() + } + } else { Vec::default() }, }) - } - + } /// Attempt to decode to a child container of our owner #[inline] pub fn decode(&self, loli: &mut Loli) -> Result { let bytes = self.loli.bytes(); - + decode(&bytes[self.range.clone()], loli) } } @@ -171,6 +181,7 @@ pub struct Loli image_type: encoding::ImageType, map: MmapMut, file: File, + tags: Vec, } impl AsMut<[u8]> for Loli @@ -186,5 +197,14 @@ pub struct LoliBounds<'a> { loli: &'a BasedLoli, range: Range, + tags_range: Option>, image_type: encoding::ImageType, } + +impl Loli { + /// Get the tags for this loli + pub fn tags(&self) -> &[String] //TODO: Tags filter + { + &self.tags[..] + } +} diff --git a/src/main.rs b/src/main.rs index 5cbcd0d..2e86682 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ #![feature(linked_list_remove)] #![feature(label_break_value)] + #![allow(dead_code)] use termprogress::{ @@ -22,6 +23,8 @@ mod work_async; #[cfg(not(feature="async"))] mod work; +//mod list; + pub fn parse_args() -> Result { match args::parse_args()? {