diff --git a/src/error.rs b/src/error.rs index 857aabe..891ebd9 100644 --- a/src/error.rs +++ b/src/error.rs @@ -5,6 +5,7 @@ use std::{ }; use super::{ tempfile, + loli, }; #[derive(Debug)] @@ -15,6 +16,7 @@ pub enum Error HTTP(reqwest::Error), HTTPStatus(reqwest::StatusCode), TempFile(tempfile::error::Error), + Loli(loli::error::Error), } impl error::Error for Error @@ -23,6 +25,7 @@ impl error::Error for Error { match &self { Error::IO(io) => Some(io), + Error::Loli(lo) => Some(lo), _ => None } } @@ -37,6 +40,7 @@ impl fmt::Display for Error Error::IO(io) => write!(f, "io: {}", io), Error::HTTP(http) => write!(f, "http internal error: {}", http), Error::HTTPStatus(status) => write!(f, "response returned status code {}", status), + Error::Loli(loli) => write!(f, "loli interpretation: {}", loli), _ => write!(f, "unknown error"), } } @@ -68,3 +72,19 @@ impl From for Error Self::TempFile(er) } } + +impl From for Error +{ + fn from(er: loli::error::Error) -> Self + { + Self::Loli(er) + } +} + +impl From for Error +{ + fn from(er: loli::error::DecodeError) -> Self + { + Self::Loli(er.into()) + } +} diff --git a/src/loli/encoding.rs b/src/loli/encoding.rs index e49c420..ab9966d 100644 --- a/src/loli/encoding.rs +++ b/src/loli/encoding.rs @@ -7,12 +7,12 @@ use std::{ }, }; -const HEADER_BASE64_JPEG: &'static str = "/9j/"; -const HEADER_BASE64_PNG: &'static str = "iVBO"; -const HEADER_BASE64_GIF: &'static str = "R0lG"; +const HEADER_BASE64_JPEG: &'static [u8] = b"/9j/"; +const HEADER_BASE64_PNG: &'static [u8] = b"iVBO"; +const HEADER_BASE64_GIF: &'static [u8] = b"R0lG"; /// An image type header -#[derive(Debug, PartialEq, Eq, Hash)] +#[derive(Debug, PartialEq, Eq, Hash, Clone)] pub enum ImageType { Png, @@ -28,13 +28,17 @@ impl Default for ImageType } } -impl str::FromStr for ImageType +impl ImageType { - type Err = error::Error; - - /// Determine image type from base64 - fn from_str(from: &str) -> Result + /// From base64 byte slice + /// + /// # Notes + /// + /// To parse from `str`, use the `FromStr` trait instead. + pub fn from_base64(from: T) -> Result + where T: AsRef<[u8]> { + let from = from.as_ref(); if from.len() > 4 { Ok(match &from[..4] { HEADER_BASE64_GIF => Self::Gif, @@ -48,8 +52,49 @@ impl str::FromStr for ImageType } } +impl str::FromStr for ImageType +{ + type Err = error::Error; + + /// Determine image type from base64 + fn from_str(from: &str) -> Result + { + let from = from.as_bytes(); + Self::from_base64(from) + } +} + /// Calculate the required data size from base64 input size pub const fn data_size(base64: usize) -> usize { ((4 * base64 / 3) + 3) & !3 } + +#[inline] +fn find(haystack: &[u8], needle: &[u8]) -> Option { + haystack.windows(needle.len()).position(|window| window == needle) +} + +#[inline] +fn find_back(haystack: &[u8], needle: &[u8]) -> Option { + haystack.windows(needle.len()).rev().position(|window| window == needle) +} + +const MARKER_BASE64_BEGIN: &[u8] = b"base64,"; +const MARKER_BASE64_END: &[u8] = b"' />"; //Search from end here with .rev() + +/// Find the base64 page bounds in this array +pub(super) fn find_bounds(from: impl AsRef<[u8]>) -> Result, error::DecodeError> +{ + let from = from.as_ref(); + + if let Some(start) = find(from, MARKER_BASE64_BEGIN) { + if let Some(end) = find_back(from, MARKER_BASE64_END) { + return Ok(Range { + start, + end, + }); + } + } + Err(error::DecodeError::Bounds) +} diff --git a/src/loli/error.rs b/src/loli/error.rs index 7a6ea9f..38c3689 100644 --- a/src/loli/error.rs +++ b/src/loli/error.rs @@ -3,8 +3,48 @@ use std::{ error, fmt, io, + path::PathBuf, }; +#[derive(Debug)] +pub enum DecodeError +{ + /// Map failed + Map(io::Error, PathBuf), + /// Map contained invalid UTF-8 + Corrupt, + /// Map contained invalid base64 + Base64, + /// Couldn't find base64 bounds + Bounds, + /// Bad size + Size, +} + +impl error::Error for DecodeError +{ + fn source(&self) -> Option<&(dyn error::Error + 'static)> + { + Some(match &self { + Self::Map(io, _) => io, + _ => return None, + }) + } +} +impl fmt::Display for DecodeError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + match self { + Self::Map(io, path) => write!(f, "mmap (file {:?}) failed: {}", path, io), + Self::Corrupt => write!(f, "data was corrupt (invalid utf-8)"), + Self::Base64 => write!(f, "data was corrupt (invalid base64)"), + Self::Bounds => write!(f, "couldn't find base64 bounds"), + Self::Size => write!(f, "bad size"), + } + } +} + #[derive(Debug)] pub enum Error { @@ -12,8 +52,8 @@ pub enum Error UnknownFormat, /// Image is not valid InvalidFormat, - /// Image decode failedb - DecodeError, + /// Image decode failed + DecodeError(DecodeError), // Internals /// IO error @@ -31,6 +71,7 @@ impl error::Error for Error Some(match &self { Self::IO(io) => io, Self::Formatter(fmt) => fmt, + Self::DecodeError(de) => de, _ => return None, }) } @@ -44,7 +85,7 @@ impl fmt::Display for Error match self { Self::UnknownFormat => write!(f, "could not determine image format"), Self::InvalidFormat => write!(f, "image is not valid"), - Self::DecodeError => write!(f, "image decode failed"), + Self::DecodeError(m) => write!(f, "image decode failed: {}", m), Self::IO(io) => write!(f, "io: {}", io), Self::Formatter(fmt) => write!(f, "formatting: {}", fmt), _ => write!(f, "unknown error"), @@ -72,6 +113,48 @@ impl From for Error { fn from(_er: base64::DecodeError) -> Self { - Self::DecodeError + Self::DecodeError(DecodeError::Base64) + } +} + +impl From for Error +{ + fn from(_er: std::str::Utf8Error) -> Self + { + Self::DecodeError(DecodeError::Corrupt) + } +} + + +impl From for Error +{ + fn from(_er: std::num::TryFromIntError) -> Self + { + Self::DecodeError(DecodeError::Size) + } +} + + +impl From for Error +{ + fn from(de: DecodeError) -> Self + { + Self::DecodeError(de) + } +} + +impl From for DecodeError +{ + fn from(_er: std::str::Utf8Error) -> Self + { + DecodeError::Corrupt + } +} + +impl From for DecodeError +{ + fn from(_er: std::num::TryFromIntError) -> Self + { + Self::Size } } diff --git a/src/loli/mod.rs b/src/loli/mod.rs index 61be524..b57204a 100644 --- a/src/loli/mod.rs +++ b/src/loli/mod.rs @@ -1,9 +1,24 @@ - +use std::{ + path::Path, + convert::TryInto, + fs::{ + File, + OpenOptions, + }, + ops::{ + Range, + }, +}; +use memmap::{ + MmapOptions, + Mmap, + MmapMut, +}; pub mod error; pub mod encoding; /// Attempt to decode an image -pub fn decode(input: S, mut output: W) -> Result +fn decode(input: S, mut output: W) -> Result where S: AsRef<[u8]>, W: AsMut<[u8]> { @@ -14,15 +29,146 @@ where S: AsRef<[u8]>, } /// Calculate the size for a base64 inpue -pub fn calc_size(input: T) -> usize +fn calc_size(input: T) -> usize where T: AsRef<[u8]> { encoding::data_size(input.as_ref().len()) } /// Try to get the image format from a `str` slice. -pub fn attempt_get_format(input: T) -> Result +#[inline] +fn attempt_get_format(input: T) -> Result where T: AsRef { - input.as_ref().parse() + match input.as_ref().parse() { + Ok(v) => Ok(v), + Err(error::Error::UnknownFormat) => Ok(Default::default()), + Err(e) => Err(e), + } +} + +/// An encoded loli image +#[derive(Debug)] +pub struct BasedLoli +{ + size_decoded: usize, + map: Mmap, // We should probably have this dropped before `file` + file: File, +} + +impl BasedLoli +{ + /// Create a new map to basedloli. + pub fn map(file: impl AsRef) -> Result + { + let file = File::open(file.as_ref())?; + + let meta = file.metadata()?; + Ok(Self { + size_decoded: meta.len().try_into()?, + map: unsafe { MmapOptions::new().map(&file)? }, + file, + }) + } + + /// Find bounds for base64 data + pub fn calculate_bounds<'a>(&'a self) -> Result, error::Error> + { + let bound = encoding::find_bounds(self.as_ref())?; + let image_type = self.try_get_type(bound.start)?; + Ok(LoliBounds { + loli: self, + image_type, + range:bound, + }) + } + + /// Get the raw bytes of this map + pub fn bytes(&self) -> &[u8] + { + self.map.as_ref() + } + + /// Try to get as a str + pub fn try_as_str(&self) -> Result<&str, error::DecodeError> + { + Ok(std::str::from_utf8(self.map.as_ref())?) + } + + /// Try to get the image type from encoded data + pub fn try_get_type(&self, start: usize) -> Result + { + attempt_get_format(&(self.try_as_str()?)[start..]) + } + + /// The calculated size + pub fn decoded_size(&self) -> usize + { + self.size_decoded + } + + /// Attempt to decode to a child container + #[inline] + pub fn decode(&self, loli: &mut Loli) -> Result + { + decode(self, loli) + } +} + +impl<'a> LoliBounds<'a> +{ + /// Try to create a decoding container for this `BasedLoli` to the file in `path`. + pub fn create_child(&self, path: impl AsRef) -> Result + { + let size = self.loli.size_decoded; + let image_type = self.image_type.clone(); + let path = path.as_ref(); + + let file = OpenOptions::new() + .create(true) + .write(true) + .read(true) + .open(path)?; + file.set_len(size.try_into()?)?; + + Ok(Loli { + size, + image_type, + map: unsafe { MmapOptions::new().map_mut(&file).map_err(|e| error::DecodeError::Map(e, path.into()))? }, + file: file, + }) + } +} + +impl AsRef<[u8]> for BasedLoli +{ + fn as_ref(&self) -> &[u8] + { + self.bytes() + } +} + +#[derive(Debug)] +pub struct Loli +{ + size: usize, + image_type: encoding::ImageType, + map: MmapMut, + file: File, +} + +impl AsMut<[u8]> for Loli +{ + fn as_mut(&mut self) -> &mut [u8] + { + self.map.as_mut() + } +} + +#[derive(Debug)] +pub struct LoliBounds<'a> +{ + loli: &'a BasedLoli, + range: Range, + image_type: encoding::ImageType, } diff --git a/src/work_async.rs b/src/work_async.rs index 6701424..1d2b31f 100644 --- a/src/work_async.rs +++ b/src/work_async.rs @@ -14,18 +14,32 @@ mod tasklist; #[macro_use] mod progress; +/// Decode a loli from path +pub async fn decode(from: impl AsRef, to: impl AsRef, progress: &mut progress::CommandSender) -> Result +{ + prog_send!(link progress.println("Mapping child")); + let base = loli::BasedLoli::map(from)?; + let bounds = base.calculate_bounds()?; // If server is returning error code, this will fail. + prog_send!(link progress.println(format!("Decoding {} -> {} bytes", base.as_ref().len(), base.decoded_size()))); + let mut decoded = bounds.create_child(to)?; + prog_send!(progress.println("Decoding...")); + let sz = base.decode(&mut decoded)?; + prog_send!(link progress.println(format!("Decode complete ({} bytes)", sz))); + + Ok(decoded) +} + /// Download a loli async -pub async fn perform(url: impl AsRef, path: impl AsRef, progress: &mut progress::CommandSender) -> Result +pub async fn perform(url: impl AsRef, path: impl AsRef, progress: &mut progress::CommandSender) -> Result<(), error::Error> { let url = url.as_ref(); let path = path.as_ref(); - let task = format!("{:?}", path); //TODO: Real task name let mut resp = reqwest::get(url).await?; let len = resp.content_length(); - prog_send!(link progress.push_task(&task)); + //prog_send!(link progress.push_task(&task)); if let Some(len) = len { prog_send!(progress.bump_max(len)); } else { @@ -56,7 +70,7 @@ pub async fn perform(url: impl AsRef, path: impl AsRef, progress: &mu prog_send!(progress.println(format!("done for {}", url))); //prog_send!(link progress.pop_task(task)); - Ok(task) + Ok(()) } pub async fn work(conf: config::Config) -> Result<(), Box> @@ -74,26 +88,27 @@ pub async fn work(conf: config::Config) -> Result<(), Box let url = url::parse(&rating); let mut prog = prog_writer.clone_with(format!("-> {:?}", path)); children.push(tokio::task::spawn(async move { - - //println!("Starting download ({})...", url); prog.println(format!("Starting download ({})...", url)).await.expect("fatal"); - /*let path = match path { - config::OutputType::File(file) => file, //TODO: Download to temp file, memmap to str slice and then base64 decode. Also, determine if we need .png or .jpg from header. - config::OutputType::Directory(dir) => { - //TODO: Implement downloading to temp and renaming to hash - unimplemented!(); - }, - };*/ + + let task = format!("{:?}", path); //TODO: Real task name + prog_send!(link unwind prog.push_task(&task)); + let temp = tempfile::TempFile::new(); match perform(&url, &temp, &mut prog).await { Err(e) => panic!("Failed downloading {} -> {:?}: {}", url, temp, e), //TODO: Make real error handler - Ok(task) => { - //TODO: memmap `temp` and decode base64 into new file `path`. also determine the encoding. - prog_send!(link prog.pop_task(task)); + Ok(_) => { + let path = match path { + config::OutputType::File(file) => file, + config::OutputType::Directory(dir) => unimplemented!(), //TODO: implement get hash to file + }; + let loli = match decode(&temp, &path, &mut prog).await { + Ok(v) => v, + Err(e) => panic!("Failed decoding: {}", e), + }; }, } - + prog_send!(link prog.pop_task(task)); //TODO: Make sure we don't return gracefully before this is called. })); } diff --git a/src/work_async/progress.rs b/src/work_async/progress.rs index e768284..ce6f0d5 100644 --- a/src/work_async/progress.rs +++ b/src/work_async/progress.rs @@ -105,6 +105,11 @@ macro_rules! prog_send { let _ =$expression.await?; } }; + (link unwind $expression:expr) => { + { + $expression.await.expect("mpsc fatal").wait().await.expect("mpsc fatal"); + } + }; (link $expression:expr) => { { if let Ok(ok) = $expression.await { diff --git a/src/work_async/tasklist.rs b/src/work_async/tasklist.rs index bc58ee3..49202df 100644 --- a/src/work_async/tasklist.rs +++ b/src/work_async/tasklist.rs @@ -8,7 +8,7 @@ use std::{ pub struct TaskList(LinkedList<(usize, String)>, String, usize); fn find(list: &LinkedList, mut fun: F) -> Option - where F: FnMut(&T) -> bool +where F: FnMut(&T) -> bool { for (i, x) in (0..).zip(list.iter()) {