From 72101413f0d657e267c3f9d60ae1e49fe5f6b28f Mon Sep 17 00:00:00 2001 From: Avril Date: Tue, 18 Mar 2025 17:31:08 +0000 Subject: [PATCH] Added ergonomics for `-n`/`--no-consume`: Will consume data if it is non-blocking to *start* consuming. Added TTY checks for input/output chain file. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for genmarkov's current commit: Future small blessing − 末小吉 --- Cargo.lock | 5 ++-- Cargo.toml | 6 +++-- src/iou.rs | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 52 ++++++++++++++++++++++++++++++------ 4 files changed, 128 insertions(+), 12 deletions(-) create mode 100644 src/iou.rs diff --git a/Cargo.lock b/Cargo.lock index b769835..8152af5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -376,9 +376,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.169" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "linked-hash-map" @@ -394,6 +394,7 @@ dependencies = [ "bytes", "clap", "futures", + "libc", "markov 1.1.0", "num_cpus", "os_pipe", diff --git a/Cargo.toml b/Cargo.toml index faa114a..17e3d0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,10 +20,11 @@ lto = "fat" strip = false [features] -default = ["threads"] +default = ["posix_io", "threads"] +posix_io = ["dep:libc"] threads = ["zstd/zstdmt", "dep:num_cpus"] -unstable = ["smallvec/specialization", "smallvec/may_dangle"] +unstable = ["smallvec/specialization", "smallvec/may_dangle", "libc?/const-extern-fn"] #io_uring = ["dep:tokio-uring", "dep:async-compression", "dep:futures", "dep:tokio", "dep:os_pipe"] [dependencies] @@ -32,6 +33,7 @@ bytes = { version = "1.10.0", features = ["serde"] } chain = {package = "markov", version = "1.1.0" } clap = { version = "4.5.29", features = ["derive"] } futures = { version = "0.3.31", default-features = false, optional = true, features = ["alloc", "async-await", "std"] } +libc = { version = "0.2.171", features = ["use_std", "align", "extra_traits"], optional = true } num_cpus = { version = "1.16.0", optional = true } os_pipe = { version = "1.2.1", optional = true } serde = { version = "1.0.217", features = ["derive"] } diff --git a/src/iou.rs b/src/iou.rs new file mode 100644 index 0000000..593ea14 --- /dev/null +++ b/src/iou.rs @@ -0,0 +1,77 @@ +//! I/O utilities. +//! For working with `ioctl()` & other lower-level file-descriptor operations like peeking & terminal queries. +use std::os::unix::prelude::*; +use std::os::fd::AsFd; +use std::ffi::{ + c_int, +}; +use std::{ + io, + num::NonZeroUsize, +}; + +/// Check if open stream `file` refers to a terminal. +pub fn is_tty(file: &T) -> bool +where T: AsFd +{ + // SAFETY: This function is (essentially) pure. + unsafe extern "C" { + safe fn isatty(fd: c_int) -> c_int; + } + + let file = file.as_fd(); + match isatty(file.as_raw_fd()) { + 1 => true, + 0 => false, + bug => unreachable!("BUG: isatty({:?}) invalid return value: {:?}", file, bug), + } +} + +/// Check if stream `file` has data ready to read. +/// +/// # Returns +/// If `Err` is returned and the kind is `ErrorKind::Unsupported`, the operation could not be carried out. +/// If `ioctl(FIONREAD)` fails on `file`'s fd, the `ioctl()` call's error is returned. +/// +/// Otherwise: +/// - If `Ok(None)` is returned, there are 0 bytes pending. +/// - If `Ok(Some(x))`is returned, there are at least `x` bytes pending. +/// +/// __NOTE__: It is not guaranteed that this function will be able to return an `Ok()` value, these are not fatal errors. +pub fn has_known_available_data(file: &T) -> io::Result> +where T: AsFd +{ + #[cfg(feature="posix_io")] + fn check_fd_raw(file: impl AsRawFd) -> io::Result + { + use libc::{ + ioctl, + FIONREAD, + }; + + let mut num: c_int = 0; + // SAFETY: FIONREAD takes `int*` + match unsafe { + ioctl(file.as_raw_fd(), FIONREAD, &raw mut num) + } { + 0 => Ok(num), + _ => Err(io::Error::last_os_error()), + } + } + + #[cfg(not(feature="posix_io"))] + #[inline(always)] + fn check_fd_raw(_file: impl AsRawFd) -> io::Result + { + Err(io::Error::new(io::ErrorKind::Unsupported, "Operation unsupported")) + } + + let file = file.as_fd(); + + let num = check_fd_raw(file)?; + if num < 0 { + Err(io::Error::new(io::ErrorKind::InvalidData, "ioctl(FIONREAD) returned an invalid invariant")) + } else { + Ok(NonZeroUsize::new(num as usize)) + } +} diff --git a/src/main.rs b/src/main.rs index 5b37de4..ee8d4c3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,6 +18,8 @@ use std::{ }, }; +mod iou; + #[derive(Debug, clap::Args)] #[group(required=false,multiple=true)] struct ChainFile { @@ -48,7 +50,10 @@ pub struct Cli { #[arg(short, long)] force: bool, - /// Do not read into chain from `stdin` if there is a loaded chain that is not empty. + /// Do not block to read into chain from `stdin` if there is a loaded chain that is not empty. + /// + /// If `stdin` is detected as not empty, then it will attempt to consume the buffer still; this prevents the requirement to explicitly close `stdin` when running from a terminal that does not pipe anything into the process input. + /// To ignore input entirely, use `--write`. #[arg(short, long)] no_consume: bool, @@ -139,6 +144,13 @@ fn create_chain(cli: &Cli) -> Chain let mut input = std::fs::OpenOptions::new() .read(true) .open(load).expect("Failed to open chain load file"); + if iou::is_tty(&input) { + if cli.force { + eprintln!("Warning: Reading binary data from TTY!"); + } else { + panic!("Cannot read chain from file {:?}: Refusing to read binary data from TTY, if this is intended pass `--force`", load); + } + } load_chain(&mut input).expect("Failed to load chain from file") } else { Chain::new() @@ -160,6 +172,13 @@ fn complete_chain(cli: &Cli, chain: Chain, is_unmodified: bool) -> io::R .write(true) .truncate(force) .open(save).expect("Failed to open chain save file"); + if iou::is_tty(&output) { + if cli.force { + eprintln!("Warning: Writing binary data to TTY!"); + } else { + return Err(io::Error::new(io::ErrorKind::InvalidInput, "Refusing to write binary data to TTY, if this is intended pass `--force`")); + } + } save_chain(&mut output, &chain).expect("Failed to save chain to file") // TODO: Error type } } else if cfg!(debug_assertions) { @@ -177,18 +196,35 @@ fn main() { let stdin = io::stdin(); let mut stdin = stdin.lock(); let mut chain = create_chain(&cli); // TODO: If `self.force_overwrite() && self.is_append()`, keep the file open for the process duration, and `lseek(0)` it before writing instead of opening it again. (XXX: Should we add `O_EXCL` to the opened file if ) - - if !(cli.write_only || (cli.no_consume && ! chain.is_empty())) { - // TODO: If there are 0 lines fed, set a flag for `unmodified` so if `cli.is_append()`, the file isn't overwritten for no reason. + + let has_potential_pending = |stdin| match iou::has_known_available_data(stdin) { + Err(x) if x.kind() == io::ErrorKind::Unsupported => None, + + Err(err) => { + if cfg!(debug_assertions) { + eprintln!("Warning: Failed to test for pending input: {err}"); + } + None + }, + Ok(None) => Some(false), + Ok(Some(_)) => Some(true), + + }.unwrap_or(false); // If we cannot see if there is pending data on the stream, assume there is not for the purpose of `no_consume`. (NOTE: `no_consume`'s directive is to only consume *IFF* we **know** there is data to read.) + + if cli.write_only || (cli.no_consume && !(chain.is_empty() || has_potential_pending(&stdin))) { + if cfg!(debug_assertions) { + use std::os::fd::AsFd; + eprintln!("Warning: Skipping attempt to read from stdin. write_only: {}, no_consume: {}, is_chain_empty = {}, data_pending_on_p({:?}) = {}", cli.write_only, cli.no_consume, chain.is_empty(), &stdin.as_fd(), has_potential_pending(&stdin)); + } + } else { buffered_read_all_lines(&mut stdin, |string| { let words = string.split_whitespace() - .filter(|word| !word.is_empty()) - .map(|s| s.to_owned()).collect::>(); + .filter(|word| !word.is_empty()) + .map(|s| s.to_owned()).collect::>(); if !words.is_empty() { unmodified = false; chain.feed(&words); } - Ok(()) }).expect("Failed to read from stdin"); } @@ -205,5 +241,5 @@ fn main() { }; } - complete_chain(&cli, chain, unmodified).unwrap(); + complete_chain(&cli, chain, unmodified).expect("Failed to save chain"); }