diff --git a/Cargo.toml b/Cargo.toml index 288a1f1..fe17535 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["output-lines", "buffer-output", "ignore-output-errors", "ignore-invalid-args"] +default = ["output-lines", "buffer-output", "ignore-output-errors", "ignore-invalid-args", "byte-strings"] # Output as lines instead of `["2", "1", "0"]` output-lines = [] @@ -25,7 +25,8 @@ ignore-output-errors = [] # Ignore invalid arguments instead of removing invalid UTF8 characters if they exist in the argument ignore-invalid-args = [] -# Operate on OsString/byte arrays instead of strings; so non-utf8 characters will be preserved. +# Operate on raw input byte arrays instead of strings; so non-utf8 characters will be preserved in both input and output +# NOTE: `ignore-invalid-args` will do nothing if this is enabled. byte-strings = [] [profile.release] diff --git a/src/main.rs b/src/main.rs index 20711e5..a3f23f2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -39,11 +39,29 @@ fn collect_input() -> Box + 'static> os::unix::ffi::*, }; - #[derive(Debug)] + //#[derive(Debug)] enum MaybeUTF8 { UTF8(String), Raw(OsString), + Static(&'static [u8]), + } + + impl std::fmt::Debug for MaybeUTF8 + { + // Custom Debug impl to ensure output is identical when `output-quoted` is enabled. + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result + { + let dbg: &dyn std::fmt::Debug = match self { + Self::UTF8(string) => string, + Self::Raw(raw) => raw, + + Self::Static(&[]) => return f.write_str("\"\""), + Self::Static(bytes) => return std::fmt::Debug::fmt(OsStr::from_bytes(bytes), f), + }; + std::fmt::Debug::fmt(dbg, f) + } } impl AsRef<[u8]> for MaybeUTF8 @@ -54,6 +72,7 @@ fn collect_input() -> Box + 'static> match self { Self::UTF8(string) => string.as_bytes(), Self::Raw(raw) => raw.as_bytes(), + Self::Static(bytes) => bytes, } } } @@ -76,16 +95,20 @@ fn collect_input() -> Box + 'static> } } - #[allow(dead_code)] impl MaybeUTF8 { + #[inline(always)] + pub const fn from_static_bytes(bytes: &'static [u8]) -> Self + { + Self::Static(bytes) + } #[inline(always)] pub fn from_raw_bytes(bytes: &[u8]) -> Self { Self::Raw(OsStr::from_bytes(bytes).to_os_string()) } #[inline(always)] - #[deprecated(note="XXX: TODO: Only use this if the read_until() into vec does not add the '\n' into the vec as well. Otherwise, *always* use this.")] + //#[deprecated(note="XXX: TODO: Only use this if the read_until() into vec does not add the '\n' into the vec as well. Otherwise, *always* use this.")] pub fn from_raw_vec(vec: Vec) -> Self { Self::Raw(OsString::from_vec(vec)) @@ -116,10 +139,16 @@ fn collect_input() -> Box + 'static> { Some(match handle_fmt_err_or(self.0.read_until(b'\n', &mut self.1), || 0) { 0 => return None, + 1 if self.1[0] == b'\n' => MaybeUTF8::from_static_bytes(&[]), read_sz => { - let line = MaybeUTF8::from_raw_bytes(&self.1[..]); //TODO: XXX: If self.1 here does not have the '\n' added into it by read_until(); use from_raw_vec(self.1.clone()) instead; it'll be more efficient. + let line = if self.1[read_sz-1] == b'\n' { + MaybeUTF8::from_raw_bytes(&self.1[..(read_sz-1)]) + } else { + MaybeUTF8::from_raw_vec(self.1.clone()) + }; + debug_assert_ne!(line.as_ref().iter().last().copied(), Some(b'\n'), "Deliminator still in output"); + self.1.clear(); - //TODO: todo!("Do we need read_sz ({read_sz}) at all here? Will the `\n` be inside the read string?"); line }, }) @@ -141,20 +170,26 @@ fn collect_input() -> Box + 'static> } } +#[allow(dead_code)] #[cfg_attr(feature="ignore-output-errors", inline)] fn handle_fmt_err_or(res: std::io::Result, or: F) -> T where F: FnOnce() -> T { - #[cfg(not(feature="ignore-output-errors"))] - if let Err(e) = res { - eprintln!("[!] failed to write line: {e}"); - or() + #[cfg(not(feature="ignore-output-errors"))] { + match res { + Ok(v) => return v, + Err(e) => eprintln!("[!] failed to write line: {e}"), + } + return or(); } + + #[cfg(feature="ignore-output-errors")] res.unwrap_or_else(|_| or()) } -#[cfg_attr(feature="ignore-output-errors", inline(always))] +#[allow(dead_code)] + #[cfg_attr(feature="ignore-output-errors", inline(always))] fn handle_fmt_err(res: std::io::Result) { #[cfg(not(feature="ignore-output-errors"))]