Added ergonomics for `-n`/`--no-consume`: Will consume data if it is non-blocking to *start* consuming. Added TTY checks for input/output chain file.

Fortune for genmarkov's current commit: Future small blessing − 末小吉
cli
Avril 2 weeks ago
parent 19b5f322dc
commit 72101413f0
Signed by: flanchan
GPG Key ID: 284488987C31F630

5
Cargo.lock generated

@ -376,9 +376,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.169"
version = "0.2.171"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
[[package]]
name = "linked-hash-map"
@ -394,6 +394,7 @@ dependencies = [
"bytes",
"clap",
"futures",
"libc",
"markov 1.1.0",
"num_cpus",
"os_pipe",

@ -20,10 +20,11 @@ lto = "fat"
strip = false
[features]
default = ["threads"]
default = ["posix_io", "threads"]
posix_io = ["dep:libc"]
threads = ["zstd/zstdmt", "dep:num_cpus"]
unstable = ["smallvec/specialization", "smallvec/may_dangle"]
unstable = ["smallvec/specialization", "smallvec/may_dangle", "libc?/const-extern-fn"]
#io_uring = ["dep:tokio-uring", "dep:async-compression", "dep:futures", "dep:tokio", "dep:os_pipe"]
[dependencies]
@ -32,6 +33,7 @@ bytes = { version = "1.10.0", features = ["serde"] }
chain = {package = "markov", version = "1.1.0" }
clap = { version = "4.5.29", features = ["derive"] }
futures = { version = "0.3.31", default-features = false, optional = true, features = ["alloc", "async-await", "std"] }
libc = { version = "0.2.171", features = ["use_std", "align", "extra_traits"], optional = true }
num_cpus = { version = "1.16.0", optional = true }
os_pipe = { version = "1.2.1", optional = true }
serde = { version = "1.0.217", features = ["derive"] }

@ -0,0 +1,77 @@
//! I/O utilities.
//! For working with `ioctl()` & other lower-level file-descriptor operations like peeking & terminal queries.
use std::os::unix::prelude::*;
use std::os::fd::AsFd;
use std::ffi::{
c_int,
};
use std::{
io,
num::NonZeroUsize,
};
/// Check if open stream `file` refers to a terminal.
pub fn is_tty<T: ?Sized>(file: &T) -> bool
where T: AsFd
{
// SAFETY: This function is (essentially) pure.
unsafe extern "C" {
safe fn isatty(fd: c_int) -> c_int;
}
let file = file.as_fd();
match isatty(file.as_raw_fd()) {
1 => true,
0 => false,
bug => unreachable!("BUG: isatty({:?}) invalid return value: {:?}", file, bug),
}
}
/// Check if stream `file` has data ready to read.
///
/// # Returns
/// If `Err` is returned and the kind is `ErrorKind::Unsupported`, the operation could not be carried out.
/// If `ioctl(FIONREAD)` fails on `file`'s fd, the `ioctl()` call's error is returned.
///
/// Otherwise:
/// - If `Ok(None)` is returned, there are 0 bytes pending.
/// - If `Ok(Some(x))`is returned, there are at least `x` bytes pending.
///
/// __NOTE__: It is not guaranteed that this function will be able to return an `Ok()` value, these are not fatal errors.
pub fn has_known_available_data<T: ?Sized>(file: &T) -> io::Result<Option<NonZeroUsize>>
where T: AsFd
{
#[cfg(feature="posix_io")]
fn check_fd_raw(file: impl AsRawFd) -> io::Result<c_int>
{
use libc::{
ioctl,
FIONREAD,
};
let mut num: c_int = 0;
// SAFETY: FIONREAD takes `int*`
match unsafe {
ioctl(file.as_raw_fd(), FIONREAD, &raw mut num)
} {
0 => Ok(num),
_ => Err(io::Error::last_os_error()),
}
}
#[cfg(not(feature="posix_io"))]
#[inline(always)]
fn check_fd_raw(_file: impl AsRawFd) -> io::Result<c_int>
{
Err(io::Error::new(io::ErrorKind::Unsupported, "Operation unsupported"))
}
let file = file.as_fd();
let num = check_fd_raw(file)?;
if num < 0 {
Err(io::Error::new(io::ErrorKind::InvalidData, "ioctl(FIONREAD) returned an invalid invariant"))
} else {
Ok(NonZeroUsize::new(num as usize))
}
}

@ -18,6 +18,8 @@ use std::{
},
};
mod iou;
#[derive(Debug, clap::Args)]
#[group(required=false,multiple=true)]
struct ChainFile {
@ -48,7 +50,10 @@ pub struct Cli {
#[arg(short, long)]
force: bool,
/// Do not read into chain from `stdin` if there is a loaded chain that is not empty.
/// Do not block to read into chain from `stdin` if there is a loaded chain that is not empty.
///
/// If `stdin` is detected as not empty, then it will attempt to consume the buffer still; this prevents the requirement to explicitly close `stdin` when running from a terminal that does not pipe anything into the process input.
/// To ignore input entirely, use `--write`.
#[arg(short, long)]
no_consume: bool,
@ -139,6 +144,13 @@ fn create_chain(cli: &Cli) -> Chain<String>
let mut input = std::fs::OpenOptions::new()
.read(true)
.open(load).expect("Failed to open chain load file");
if iou::is_tty(&input) {
if cli.force {
eprintln!("Warning: Reading binary data from TTY!");
} else {
panic!("Cannot read chain from file {:?}: Refusing to read binary data from TTY, if this is intended pass `--force`", load);
}
}
load_chain(&mut input).expect("Failed to load chain from file")
} else {
Chain::new()
@ -160,6 +172,13 @@ fn complete_chain(cli: &Cli, chain: Chain<String>, is_unmodified: bool) -> io::R
.write(true)
.truncate(force)
.open(save).expect("Failed to open chain save file");
if iou::is_tty(&output) {
if cli.force {
eprintln!("Warning: Writing binary data to TTY!");
} else {
return Err(io::Error::new(io::ErrorKind::InvalidInput, "Refusing to write binary data to TTY, if this is intended pass `--force`"));
}
}
save_chain(&mut output, &chain).expect("Failed to save chain to file") // TODO: Error type
}
} else if cfg!(debug_assertions) {
@ -177,18 +196,35 @@ fn main() {
let stdin = io::stdin();
let mut stdin = stdin.lock();
let mut chain = create_chain(&cli); // TODO: If `self.force_overwrite() && self.is_append()`, keep the file open for the process duration, and `lseek(0)` it before writing instead of opening it again. (XXX: Should we add `O_EXCL` to the opened file if )
if !(cli.write_only || (cli.no_consume && ! chain.is_empty())) {
// TODO: If there are 0 lines fed, set a flag for `unmodified` so if `cli.is_append()`, the file isn't overwritten for no reason.
let has_potential_pending = |stdin| match iou::has_known_available_data(stdin) {
Err(x) if x.kind() == io::ErrorKind::Unsupported => None,
Err(err) => {
if cfg!(debug_assertions) {
eprintln!("Warning: Failed to test for pending input: {err}");
}
None
},
Ok(None) => Some(false),
Ok(Some(_)) => Some(true),
}.unwrap_or(false); // If we cannot see if there is pending data on the stream, assume there is not for the purpose of `no_consume`. (NOTE: `no_consume`'s directive is to only consume *IFF* we **know** there is data to read.)
if cli.write_only || (cli.no_consume && !(chain.is_empty() || has_potential_pending(&stdin))) {
if cfg!(debug_assertions) {
use std::os::fd::AsFd;
eprintln!("Warning: Skipping attempt to read from stdin. write_only: {}, no_consume: {}, is_chain_empty = {}, data_pending_on_p({:?}) = {}", cli.write_only, cli.no_consume, chain.is_empty(), &stdin.as_fd(), has_potential_pending(&stdin));
}
} else {
buffered_read_all_lines(&mut stdin, |string| {
let words = string.split_whitespace()
.filter(|word| !word.is_empty())
.map(|s| s.to_owned()).collect::<SmallVec<[_; 16]>>();
.filter(|word| !word.is_empty())
.map(|s| s.to_owned()).collect::<SmallVec<[_; 16]>>();
if !words.is_empty() {
unmodified = false;
chain.feed(&words);
}
Ok(())
}).expect("Failed to read from stdin");
}
@ -205,5 +241,5 @@ fn main() {
};
}
complete_chain(&cli, chain, unmodified).unwrap();
complete_chain(&cli, chain, unmodified).expect("Failed to save chain");
}

Loading…
Cancel
Save