Added feature (default) `byte-strings`: Operate on raw byte strings instead of UTF8 strings.

Fortune for reverse's current commit: Blessing − 吉
master
Avril 3 years ago
parent 7c85310ca5
commit 67aee7c96e
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -7,7 +7,7 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
default = ["output-lines", "buffer-output", "ignore-output-errors", "ignore-invalid-args"]
default = ["output-lines", "buffer-output", "ignore-output-errors", "ignore-invalid-args", "byte-strings"]
# Output as lines instead of `["2", "1", "0"]`
output-lines = []
@ -25,7 +25,8 @@ ignore-output-errors = []
# Ignore invalid arguments instead of removing invalid UTF8 characters if they exist in the argument
ignore-invalid-args = []
# Operate on OsString/byte arrays instead of strings; so non-utf8 characters will be preserved.
# Operate on raw input byte arrays instead of strings; so non-utf8 characters will be preserved in both input and output
# NOTE: `ignore-invalid-args` will do nothing if this is enabled.
byte-strings = []
[profile.release]

@ -39,11 +39,29 @@ fn collect_input() -> Box<dyn Iterator<Item= impl Input + 'static> + 'static>
os::unix::ffi::*,
};
#[derive(Debug)]
//#[derive(Debug)]
enum MaybeUTF8
{
UTF8(String),
Raw(OsString),
Static(&'static [u8]),
}
impl std::fmt::Debug for MaybeUTF8
{
// Custom Debug impl to ensure output is identical when `output-quoted` is enabled.
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
{
let dbg: &dyn std::fmt::Debug = match self {
Self::UTF8(string) => string,
Self::Raw(raw) => raw,
Self::Static(&[]) => return f.write_str("\"\""),
Self::Static(bytes) => return std::fmt::Debug::fmt(OsStr::from_bytes(bytes), f),
};
std::fmt::Debug::fmt(dbg, f)
}
}
impl AsRef<[u8]> for MaybeUTF8
@ -54,6 +72,7 @@ fn collect_input() -> Box<dyn Iterator<Item= impl Input + 'static> + 'static>
match self {
Self::UTF8(string) => string.as_bytes(),
Self::Raw(raw) => raw.as_bytes(),
Self::Static(bytes) => bytes,
}
}
}
@ -76,16 +95,20 @@ fn collect_input() -> Box<dyn Iterator<Item= impl Input + 'static> + 'static>
}
}
#[allow(dead_code)]
impl MaybeUTF8
{
#[inline(always)]
pub const fn from_static_bytes(bytes: &'static [u8]) -> Self
{
Self::Static(bytes)
}
#[inline(always)]
pub fn from_raw_bytes(bytes: &[u8]) -> Self
{
Self::Raw(OsStr::from_bytes(bytes).to_os_string())
}
#[inline(always)]
#[deprecated(note="XXX: TODO: Only use this if the read_until() into vec does not add the '\n' into the vec as well. Otherwise, *always* use this.")]
//#[deprecated(note="XXX: TODO: Only use this if the read_until() into vec does not add the '\n' into the vec as well. Otherwise, *always* use this.")]
pub fn from_raw_vec(vec: Vec<u8>) -> Self
{
Self::Raw(OsString::from_vec(vec))
@ -116,10 +139,16 @@ fn collect_input() -> Box<dyn Iterator<Item= impl Input + 'static> + 'static>
{
Some(match handle_fmt_err_or(self.0.read_until(b'\n', &mut self.1), || 0) {
0 => return None,
1 if self.1[0] == b'\n' => MaybeUTF8::from_static_bytes(&[]),
read_sz => {
let line = MaybeUTF8::from_raw_bytes(&self.1[..]); //TODO: XXX: If self.1 here does not have the '\n' added into it by read_until(); use from_raw_vec(self.1.clone()) instead; it'll be more efficient.
let line = if self.1[read_sz-1] == b'\n' {
MaybeUTF8::from_raw_bytes(&self.1[..(read_sz-1)])
} else {
MaybeUTF8::from_raw_vec(self.1.clone())
};
debug_assert_ne!(line.as_ref().iter().last().copied(), Some(b'\n'), "Deliminator still in output");
self.1.clear();
//TODO: todo!("Do we need read_sz ({read_sz}) at all here? Will the `\n` be inside the read string?");
line
},
})
@ -141,19 +170,25 @@ fn collect_input() -> Box<dyn Iterator<Item= impl Input + 'static> + 'static>
}
}
#[allow(dead_code)]
#[cfg_attr(feature="ignore-output-errors", inline)]
fn handle_fmt_err_or<F, T>(res: std::io::Result<T>, or: F) -> T
where F: FnOnce() -> T
{
#[cfg(not(feature="ignore-output-errors"))]
if let Err(e) = res {
eprintln!("[!] failed to write line: {e}");
or()
#[cfg(not(feature="ignore-output-errors"))] {
match res {
Ok(v) => return v,
Err(e) => eprintln!("[!] failed to write line: {e}"),
}
return or();
}
#[cfg(feature="ignore-output-errors")]
res.unwrap_or_else(|_| or())
}
#[allow(dead_code)]
#[cfg_attr(feature="ignore-output-errors", inline(always))]
fn handle_fmt_err<T>(res: std::io::Result<T>)
{

Loading…
Cancel
Save