now gets tags from page

master
Avril 4 years ago
parent ac5f323e2d
commit 77809f6faf
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -1,6 +1,6 @@
[package] [package]
name = "lolistealer" name = "lolistealer"
version = "0.1.0" version = "0.1.1"
authors = ["Avril <flanchan@cumallover.me>"] authors = ["Avril <flanchan@cumallover.me>"]
edition = "2018" edition = "2018"
@ -17,7 +17,7 @@ codegen-units = 1
[dependencies] [dependencies]
termprogress = "0.1.0" termprogress = "0.1.0"
lazy_static = "1.4" lazy_static = "1.4"
tokio = {version = "0.2", features= ["full"], optional=true} tokio = {version = "0.2", features= ["rt-threaded", "io-driver", "io-util", "macros", "fs"], optional=true}
reqwest = {version = "0.10", features= ["stream"]} reqwest = {version = "0.10", features= ["stream"]}
memmap = "0.7" memmap = "0.7"
getrandom = "0.1" getrandom = "0.1"

@ -1,9 +1,8 @@
Tags filtering before decode
implement output to dir implement output to dir
Remove Cargo.lock when termprogress is uploaded and Cargo.toml is properly formatted Remove Cargo.lock when termprogress is uploaded and Cargo.toml is properly formatted
New UI: New UI:
lolistealer --number 5 output-dir/ lolistealer --number 5 output-dir/
!!! memmap slice size is incorrect on release build

@ -0,0 +1,116 @@
use std::{
mem,
};
struct Cons<T>
{
item: T,
next: Option<Box<Cons<T>>>,
}
/// A safe linked list, with remove() for non-nightly build.
pub struct LinkedList<T>
{
head: Option<Cons<T>>,
sz: usize,
}
pub struct Iter<'a, T>
{
current: Option<&'a Cons<T>>,
}
pub struct IterMut<'a, T>
{
current: Option<&'a mut Cons<T>>,
}
pub struct IntoIter<T>
{
current: Option<Box<Cons<T>>>,
}
impl<T> Default for LinkedList<T>
{
#[inline]
fn default() -> Self
{
Self::new()
}
}
impl<T> LinkedList<T>
{
pub const fn new() -> Self
{
Self { head: None, sz: 0 }
}
pub fn push(&mut self, value: T)
{
if let Some(head) = &mut self.head
{
head.next = Some(Box::new(mem::replace(head, Cons{ item: value, next: None })));
self.sz+=1;
} else {
self.head = Some(Cons{item: value, next: None});
self.sz=1;
}
}
#[inline]
pub fn len(&self) -> usize
{
self.sz
}
pub fn pop(&mut self) -> Option<T>
{
if let Some(_) = self.head
{
let old = mem::replace(&mut self.head, None).unwrap();
self.head = old.next.map(|x| *x);
self.sz-=1;
Some(old.item)
} else {
None
}
}
pub fn clear(&mut self)
{
self.head = None;
self.sz=0;
}
pub fn iter<'a>(&'a self) -> Iter<'a, T>
{
Iter{ current: self.head.as_ref() }
}
pub fn iter_mut<'a>(&'a mut self) -> IterMut<'a, T>
{
IterMut{ current: self.head.as_mut() }
}
pub fn remove(&mut self, at: usize) -> T
{
assert!(at < self.sz, "Cannot remove at element > than size");
todo!()
//TODO: Implement through iter_mut()
}
}
impl<'a, T> Iterator for Iter<'a, T>
{
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item>
{
if self.current.is_some() {
let mut current = mem::replace(&mut self.current, None);
let mut nref = current.unwrap().next.as_ref().map(|x| x.as_ref())
} else {
None
}
}
}

@ -93,6 +93,9 @@ fn find_back(haystack: &[u8], needle: &[u8]) -> Option<usize> {
const MARKER_BASE64_BEGIN: &[u8] = b"base64,"; const MARKER_BASE64_BEGIN: &[u8] = b"base64,";
const MARKER_BASE64_END: &[u8] = b"' /><p"; //Search from end here with .rev() const MARKER_BASE64_END: &[u8] = b"' /><p"; //Search from end here with .rev()
const MARKER_TAGS_BEGIN: &[u8] = br"<p id='tags'>";
const MARKER_TAGS_END: &[u8] = br"</p><a id='about'";
/// Find the base64 page bounds in this array /// Find the base64 page bounds in this array
pub(super) fn find_bounds(from: impl AsRef<[u8]>) -> Result<Range<usize>, error::DecodeError> pub(super) fn find_bounds(from: impl AsRef<[u8]>) -> Result<Range<usize>, error::DecodeError>
{ {
@ -107,7 +110,28 @@ pub(super) fn find_bounds(from: impl AsRef<[u8]>) -> Result<Range<usize>, error:
end: end + start, end: end + start,
}) })
} else { } else {
//println!("WAAAAAAH: {:?}", std::str::from_utf8(&from[start..])); Err(error::DecodeError::Bounds(error::Bound::End))
}
} else {
Err(error::DecodeError::Bounds(error::Bound::Start))
}
}
/// Find the tag bounds in this array
// We should pass this a slice from `base64bounds.end..`
pub(super) fn find_tag_bounds(from: impl AsRef<[u8]>) -> Result<Range<usize>, error::DecodeError>
{
let from = from.as_ref();
if let Some(start) = find(from, MARKER_TAGS_BEGIN) {
let start = start + MARKER_TAGS_BEGIN.len();
if let Some(end) = find_back(&from[start..], MARKER_TAGS_END) {//find_back(from, MARKER_TAGS_END) {
let end = end - MARKER_TAGS_END.len();
Ok(Range {
start,
end: end + start,
})
} else {
Err(error::DecodeError::Bounds(error::Bound::End)) Err(error::DecodeError::Bounds(error::Bound::End))
} }
} else { } else {

@ -77,10 +77,12 @@ impl BasedLoli
{ {
let bound = encoding::find_bounds(self.as_ref())?; let bound = encoding::find_bounds(self.as_ref())?;
let image_type = self.try_get_type(bound.start)?; let image_type = self.try_get_type(bound.start)?;
let tags_range = encoding::find_tag_bounds(&self.map[bound.end..]).ok();
Ok(LoliBounds { Ok(LoliBounds {
loli: self, loli: self,
image_type, image_type,
range:bound, range:bound,
tags_range,
}) })
} }
@ -143,15 +145,23 @@ impl<'a> LoliBounds<'a>
image_type, image_type,
map: unsafe { MmapOptions::new().map_mut(&file).map_err(|e| error::DecodeError::Map(e, path.into()))? }, map: unsafe { MmapOptions::new().map_mut(&file).map_err(|e| error::DecodeError::Map(e, path.into()))? },
file: file, file: file,
tags: if let Some(range) = &self.tags_range {
let bytes = self.loli.bytes();
let range = &bytes[(self.range.end+range.start)..(self.range.end+range.end)];
if let Ok(string) = std::str::from_utf8(range) {
string.split(' ').map(|x| x.to_owned()).collect()
} else {
Vec::default()
}
} else { Vec::default() },
}) })
} }
/// Attempt to decode to a child container of our owner /// Attempt to decode to a child container of our owner
#[inline] #[inline]
pub fn decode(&self, loli: &mut Loli) -> Result<usize, error::Error> pub fn decode(&self, loli: &mut Loli) -> Result<usize, error::Error>
{ {
let bytes = self.loli.bytes(); let bytes = self.loli.bytes();
decode(&bytes[self.range.clone()], loli) decode(&bytes[self.range.clone()], loli)
} }
} }
@ -171,6 +181,7 @@ pub struct Loli
image_type: encoding::ImageType, image_type: encoding::ImageType,
map: MmapMut, map: MmapMut,
file: File, file: File,
tags: Vec<String>,
} }
impl AsMut<[u8]> for Loli impl AsMut<[u8]> for Loli
@ -186,5 +197,14 @@ pub struct LoliBounds<'a>
{ {
loli: &'a BasedLoli, loli: &'a BasedLoli,
range: Range<usize>, range: Range<usize>,
tags_range: Option<Range<usize>>,
image_type: encoding::ImageType, image_type: encoding::ImageType,
} }
impl Loli {
/// Get the tags for this loli
pub fn tags(&self) -> &[String] //TODO: Tags filter
{
&self.tags[..]
}
}

@ -1,5 +1,6 @@
#![feature(linked_list_remove)] #![feature(linked_list_remove)]
#![feature(label_break_value)] #![feature(label_break_value)]
#![allow(dead_code)] #![allow(dead_code)]
use termprogress::{ use termprogress::{
@ -22,6 +23,8 @@ mod work_async;
#[cfg(not(feature="async"))] #[cfg(not(feature="async"))]
mod work; mod work;
//mod list;
pub fn parse_args() -> Result<config::Config, args::Error> pub fn parse_args() -> Result<config::Config, args::Error>
{ {
match args::parse_args()? { match args::parse_args()? {

Loading…
Cancel
Save