From bc2357c6b6246bf062e3783e43bcf6e31678edca Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 22 May 2022 23:21:33 +0100 Subject: [PATCH] Attempted to add new feature flag: `memfile-size-output`: Which will pre- or post- set the size of `stdout` to the correct buffer size, allowing consumers of `collect`"s stdout pipe to know the size just like a file. However, `ftruncate()` always fails on stdout. Before re-enabling the feature, we must find out how to set the size of a pipe, if/when you even can, and what syscall(s) you need to do it with. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Small blessing − 小吉 --- Cargo.toml | 8 +++ src/ext.rs | 133 ++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 130 +++++++++++++++++++++++++++++++++++++++++++- src/memfile/fd.rs | 11 +++- 4 files changed, 280 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ace9a49..13554a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,14 @@ memfile = ["bitflags", "lazy_static"] # *NOTE*: Requires `getpagesz()` to be available in libc. memfile-preallocate = ["memfile"] +# Set the size of `stdout` when it is known, so consumers can know exactly the size of the input. +# XXX: Currently doesn't work. TODO: Find out how to make `stdout` `ftruncate()`-able; or some other way to set its size. +memfile-size-output = ["memfile"] + +# Pre-set the `memfile-size-output`. +# TODO: Maybe make this a seperate feature? See comment about pre-setting in `work::memfd()`... +# memfile-size-output-preset = ["memfile-size-output"] + # Use jemalloc instead of system malloc. # # Decreases memory-handling function calls, resulting in less "used" memory and faster allocation speeds at the "cost" of mapping a huge amount of virtual memory. diff --git a/src/ext.rs b/src/ext.rs index 3d6e34d..6e1730f 100644 --- a/src/ext.rs +++ b/src/ext.rs @@ -1,5 +1,13 @@ //! Extensions use super::*; +use std::{ + mem::{ + self, + ManuallyDrop, + }, + marker::PhantomData, + ops, +}; /// Essentially equivelant bound as `eyre::StdError` (private trait) /// @@ -108,3 +116,128 @@ impl FlattenEyreResult for Result, E> } } } + +#[derive(Debug)] +enum RunOnceInternal +{ + Live(ManuallyDrop), + Dead, +} + +impl Clone for RunOnceInternal +{ + #[inline] + fn clone(&self) -> Self { + match self { + Self::Live(l) => Self::Live(l.clone()), + _ => Self::Dead + } + } +} + +impl RunOnceInternal +{ + /// Take `F` now, unless it doesn't need to be dropped. + /// + /// # Returns + /// * if `!needs_drop::()`, `None` is always returned. + /// * if `self` is `Dead`, `None` is returned. + /// * if `self` is `Live(f)`, `Some(f)` is returned, and `self` is set to `Dead`. + #[inline(always)] + fn take_now_for_drop(&mut self) -> Option + { + if mem::needs_drop::() { + self.take_now() + } else { + None + } + } + + /// If `Live`, return the value inside and set to `Dead`. + /// Otherwise, return `None`. + #[inline(always)] + fn take_now(&mut self) -> Option + { + if let Self::Live(live) = self { + let val = unsafe { + ManuallyDrop::take(live) + }; + *self = Self::Dead; + Some(val) + } else { + None + } + } +} + +impl ops::Drop for RunOnceInternal +{ + #[inline] + fn drop(&mut self) { + if mem::needs_drop::() { + if let Self::Live(func) = self { + unsafe { ManuallyDrop::drop(func) }; + } + } + } +} + +/// Holds a 0 argument closure that will only be ran *once*. +#[derive(Debug, Clone)] +pub struct RunOnce(PhantomData T>, RunOnceInternal); + +unsafe impl Send for RunOnce +where F: FnOnce() -> T + Send {} + +impl RunOnce +where F: FnOnce() -> T +{ + pub const fn new(func: F) -> Self + { + Self(PhantomData, RunOnceInternal::Live(ManuallyDrop::new(func))) + } + + pub const fn never() -> Self + { + Self(PhantomData, RunOnceInternal::Dead) + } + + #[inline] + pub fn try_take(&mut self) -> Option + { + match &mut self.1 { + RunOnceInternal::Live(func) => { + Some(unsafe { ManuallyDrop::take(func) }) + }, + _ => None + } + } + + #[inline] + pub fn try_run(&mut self) -> Option + { + self.try_take().map(|func| func()) + } + + #[inline] + pub fn run(mut self) -> T + { + self.try_run().expect("Function has already been consumed") + } + + #[inline] + pub fn take(mut self) -> F + { + self.try_take().expect("Function has already been consumed") + } + + #[inline] + pub fn is_runnable(&self) -> bool + { + if let RunOnceInternal::Dead = &self.1 { + false + } else { + true + } + } +} diff --git a/src/main.rs b/src/main.rs index d7e760f..df51799 100644 --- a/src/main.rs +++ b/src/main.rs @@ -201,12 +201,135 @@ mod work { .unwrap_or_else(|e| format!("")) } + + #[cfg_attr(feature="logging", instrument(skip_all, err, fields(i = ?i.as_raw_fd())))] + #[inline] + fn truncate_file(i: impl AsRawFd, to: S) -> eyre::Result<()> + where S: TryInto, + >::Error: EyreError + { + truncate_file_raw(i, to.try_into().wrap_err(eyre!("Size too large"))?)?; + Ok(()) + } + + fn truncate_file_raw(i: impl AsRawFd, to: impl Into) -> io::Result<()> + { + use libc::ftruncate; + let fd = i.as_raw_fd(); + let to = { + let to = to.into(); + #[cfg(feature="logging")] + let span_size_chk = debug_span!("chk_size", size = ?to); + #[cfg(feature="logging")] + let _span = span_size_chk.enter(); + + if_trace!{ + if to > i64::MAX as u64 { + error!("Size too large (over max by {}) (max {})", to - (i64::MAX as u64), i64::MAX); + } else { + trace!("Setting {fd} size to {to}"); + } + } + + if cfg!(debug_assertions) { + i64::try_from(to).map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "Size too large for ftruncate() offset"))? + } else { + to as i64 + } + }; + + match unsafe { ftruncate(fd, to) } { + -1 => Err(io::Error::last_os_error()), + _ => Ok(()) + } + } + + //TODO: How to `ftruncate()` stdout only once... If try_get_size succeeds, we want to do it then. If it doesn't, we want to do it when `stdin` as been consumed an we know the size of the memory-file... `RunOnce` won't work unless we can give it an argument.... + #[allow(unused_mut)] + let mut set_stdout_len = { + cfg_if! { + if #[cfg(feature="memfile-size-output")] { + if_trace!(warn!("Feature `memfile-size-output` is not yet stable and will cause crash.")); + + const STDOUT: memfile::fd::RawFileDescriptor = unsafe { memfile::fd::RawFileDescriptor::new_unchecked(libc::STDOUT_FILENO) }; //TODO: Get this from `std::io::Stdout.as_raw_fd()` instead. + + use std::sync::atomic::{self, AtomicUsize}; + #[cfg(feature="logging")] + let span_ro = debug_span!("run_once", stdout = ?STDOUT); + + static LEN_HOLDER: AtomicUsize = AtomicUsize::new(0); + + let mut set_len = RunOnce::new(move || { + #[cfg(feature="logging")] + let _span = span_ro.enter(); + + let len = LEN_HOLDER.load(atomic::Ordering::Acquire); + + if_trace!(debug!("Attempting single `ftruncate()` on `STDOUT_FILENO` -> {len}")); + truncate_file(STDOUT, len) + .wrap_err(eyre!("Failed to set length of stdout ({STDOUT}) to {len}")) + + }); + + move |len: usize| { + #[cfg(feature="logging")] + let span_ssl = info_span!("set_stdout_len", len = ?len); + + #[cfg(feature="logging")] + let _span = span_ssl.enter(); + + if_trace!(trace!("Setting static-stored len for RunOnce")); + + LEN_HOLDER.store(len, atomic::Ordering::Release); + + if_trace!(trace!("Calling RunOnce for `set_stdout_len`")); + match set_len.try_run() { + Some(result) => result + .with_section(|| len.header("Attempted length set was")) + .with_warning(|| libc::off_t::MAX.header("Max length is")) + .with_note(|| STDOUT.header("STDOUT_FILENO is")), + None => { + if_trace!(warn!("Already called `set_stdout_len()`")); + Ok(()) + }, + } + } + } else { + |len: usize| -> Result<(), std::convert::Infallible> { + #[cfg(feature="logging")] + let span_ssl = info_span!("set_stdout_len", len = ?len); + #[cfg(feature="logging")] + let _span = span_ssl.enter(); + + if_trace!(info!("Feature `memfile-size-output` is disabled; ignoring.")); + let _ = len; + Ok(()) + } + } + } + }; + let (mut file, read) = { let stdin = io::stdin(); let buffsz = try_get_size(&stdin); if_trace!(debug!("Attempted determining input size: {:?}", buffsz)); - let buffsz = buffsz.or_else(DEFAULT_BUFFER_SIZE); + let buffsz = if cfg!(feature="memfile-size-output") { + //TODO: XXX: Even if this actually works, is it safe to do this? Won't the consumer try to read `value` bytes before we've written them? Perhaps remove pre-setting entirely... + match buffsz { + y @ Some(ref value) => { + let value = value.get(); + + set_stdout_len(value).wrap_err("Failed to set stdout len to that of stdin") + .with_section(|| value.header("Stdin len was calculated as")) + .with_warning(|| "This is a pre-setting")?; + + y + }, + n => n, + } + } else { buffsz }.or_else(DEFAULT_BUFFER_SIZE); + if_trace!(if let Some(buf) = buffsz.as_ref() { trace!("Failed to determine input size: preallocating to {}", buf); } else { @@ -305,6 +428,11 @@ mod work { }; if_trace!(info!("collected {} from stdin. starting write.", read)); + // TODO: XXX: Currently causes crash. But if we can get this to work, leaving this in is definitely safe (as opposed to the pre-setting (see above.)) + set_stdout_len(read) + .wrap_err(eyre!("Failed to `ftruncate()` stdout after collection of {read} bytes")) + .with_note(|| "Was not pre-set")?; + let written = io::copy(&mut file, &mut io::stdout().lock()) .with_section(|| read.header("Bytes read from stdin")) diff --git a/src/memfile/fd.rs b/src/memfile/fd.rs index 1973a5f..3eb69bd 100644 --- a/src/memfile/fd.rs +++ b/src/memfile/fd.rs @@ -109,10 +109,19 @@ impl fmt::Display for BadFDError pub type FileNo = RawFd; -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] #[repr(transparent)] pub struct RawFileDescriptor(NonNegativeI32); +impl fmt::Debug for RawFileDescriptor +{ + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "RawFileDescriptor({})", self.0.get()) + } +} + impl RawFileDescriptor { pub const STDIN: Self = Self(unsafe { NonNegativeI32::new_unchecked(0) });