now gets tags from page

master
Avril 4 years ago
parent ac5f323e2d
commit 77809f6faf
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -1,6 +1,6 @@
[package]
name = "lolistealer"
version = "0.1.0"
version = "0.1.1"
authors = ["Avril <flanchan@cumallover.me>"]
edition = "2018"
@ -17,7 +17,7 @@ codegen-units = 1
[dependencies]
termprogress = "0.1.0"
lazy_static = "1.4"
tokio = {version = "0.2", features= ["full"], optional=true}
tokio = {version = "0.2", features= ["rt-threaded", "io-driver", "io-util", "macros", "fs"], optional=true}
reqwest = {version = "0.10", features= ["stream"]}
memmap = "0.7"
getrandom = "0.1"

@ -1,9 +1,8 @@
Tags filtering before decode
implement output to dir
Remove Cargo.lock when termprogress is uploaded and Cargo.toml is properly formatted
New UI:
lolistealer --number 5 output-dir/
!!! memmap slice size is incorrect on release build

@ -0,0 +1,116 @@
use std::{
mem,
};
struct Cons<T>
{
item: T,
next: Option<Box<Cons<T>>>,
}
/// A safe linked list, with remove() for non-nightly build.
pub struct LinkedList<T>
{
head: Option<Cons<T>>,
sz: usize,
}
pub struct Iter<'a, T>
{
current: Option<&'a Cons<T>>,
}
pub struct IterMut<'a, T>
{
current: Option<&'a mut Cons<T>>,
}
pub struct IntoIter<T>
{
current: Option<Box<Cons<T>>>,
}
impl<T> Default for LinkedList<T>
{
#[inline]
fn default() -> Self
{
Self::new()
}
}
impl<T> LinkedList<T>
{
pub const fn new() -> Self
{
Self { head: None, sz: 0 }
}
pub fn push(&mut self, value: T)
{
if let Some(head) = &mut self.head
{
head.next = Some(Box::new(mem::replace(head, Cons{ item: value, next: None })));
self.sz+=1;
} else {
self.head = Some(Cons{item: value, next: None});
self.sz=1;
}
}
#[inline]
pub fn len(&self) -> usize
{
self.sz
}
pub fn pop(&mut self) -> Option<T>
{
if let Some(_) = self.head
{
let old = mem::replace(&mut self.head, None).unwrap();
self.head = old.next.map(|x| *x);
self.sz-=1;
Some(old.item)
} else {
None
}
}
pub fn clear(&mut self)
{
self.head = None;
self.sz=0;
}
pub fn iter<'a>(&'a self) -> Iter<'a, T>
{
Iter{ current: self.head.as_ref() }
}
pub fn iter_mut<'a>(&'a mut self) -> IterMut<'a, T>
{
IterMut{ current: self.head.as_mut() }
}
pub fn remove(&mut self, at: usize) -> T
{
assert!(at < self.sz, "Cannot remove at element > than size");
todo!()
//TODO: Implement through iter_mut()
}
}
impl<'a, T> Iterator for Iter<'a, T>
{
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item>
{
if self.current.is_some() {
let mut current = mem::replace(&mut self.current, None);
let mut nref = current.unwrap().next.as_ref().map(|x| x.as_ref())
} else {
None
}
}
}

@ -93,6 +93,9 @@ fn find_back(haystack: &[u8], needle: &[u8]) -> Option<usize> {
const MARKER_BASE64_BEGIN: &[u8] = b"base64,";
const MARKER_BASE64_END: &[u8] = b"' /><p"; //Search from end here with .rev()
const MARKER_TAGS_BEGIN: &[u8] = br"<p id='tags'>";
const MARKER_TAGS_END: &[u8] = br"</p><a id='about'";
/// Find the base64 page bounds in this array
pub(super) fn find_bounds(from: impl AsRef<[u8]>) -> Result<Range<usize>, error::DecodeError>
{
@ -107,7 +110,28 @@ pub(super) fn find_bounds(from: impl AsRef<[u8]>) -> Result<Range<usize>, error:
end: end + start,
})
} else {
//println!("WAAAAAAH: {:?}", std::str::from_utf8(&from[start..]));
Err(error::DecodeError::Bounds(error::Bound::End))
}
} else {
Err(error::DecodeError::Bounds(error::Bound::Start))
}
}
/// Find the tag bounds in this array
// We should pass this a slice from `base64bounds.end..`
pub(super) fn find_tag_bounds(from: impl AsRef<[u8]>) -> Result<Range<usize>, error::DecodeError>
{
let from = from.as_ref();
if let Some(start) = find(from, MARKER_TAGS_BEGIN) {
let start = start + MARKER_TAGS_BEGIN.len();
if let Some(end) = find_back(&from[start..], MARKER_TAGS_END) {//find_back(from, MARKER_TAGS_END) {
let end = end - MARKER_TAGS_END.len();
Ok(Range {
start,
end: end + start,
})
} else {
Err(error::DecodeError::Bounds(error::Bound::End))
}
} else {

@ -77,10 +77,12 @@ impl BasedLoli
{
let bound = encoding::find_bounds(self.as_ref())?;
let image_type = self.try_get_type(bound.start)?;
let tags_range = encoding::find_tag_bounds(&self.map[bound.end..]).ok();
Ok(LoliBounds {
loli: self,
image_type,
range:bound,
tags_range,
})
}
@ -143,15 +145,23 @@ impl<'a> LoliBounds<'a>
image_type,
map: unsafe { MmapOptions::new().map_mut(&file).map_err(|e| error::DecodeError::Map(e, path.into()))? },
file: file,
tags: if let Some(range) = &self.tags_range {
let bytes = self.loli.bytes();
let range = &bytes[(self.range.end+range.start)..(self.range.end+range.end)];
if let Ok(string) = std::str::from_utf8(range) {
string.split(' ').map(|x| x.to_owned()).collect()
} else {
Vec::default()
}
} else { Vec::default() },
})
}
}
/// Attempt to decode to a child container of our owner
#[inline]
pub fn decode(&self, loli: &mut Loli) -> Result<usize, error::Error>
{
let bytes = self.loli.bytes();
decode(&bytes[self.range.clone()], loli)
}
}
@ -171,6 +181,7 @@ pub struct Loli
image_type: encoding::ImageType,
map: MmapMut,
file: File,
tags: Vec<String>,
}
impl AsMut<[u8]> for Loli
@ -186,5 +197,14 @@ pub struct LoliBounds<'a>
{
loli: &'a BasedLoli,
range: Range<usize>,
tags_range: Option<Range<usize>>,
image_type: encoding::ImageType,
}
impl Loli {
/// Get the tags for this loli
pub fn tags(&self) -> &[String] //TODO: Tags filter
{
&self.tags[..]
}
}

@ -1,5 +1,6 @@
#![feature(linked_list_remove)]
#![feature(label_break_value)]
#![allow(dead_code)]
use termprogress::{
@ -22,6 +23,8 @@ mod work_async;
#[cfg(not(feature="async"))]
mod work;
//mod list;
pub fn parse_args() -> Result<config::Config, args::Error>
{
match args::parse_args()? {

Loading…
Cancel
Save