From a5ad02a6de7b18da2738dba49f385d9f9ec3c711 Mon Sep 17 00:00:00 2001 From: Avril Date: Fri, 8 Apr 2022 04:02:49 +0100 Subject: [PATCH 01/15] Working primitive, slow, prototype. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Small blessing − 小吉 --- .gitignore | 10 +- Cargo.lock | 57 +++++++++++ Cargo.toml | 33 ++++++ collect.c | 289 ---------------------------------------------------- src/main.rs | 64 ++++++++++++ 5 files changed, 163 insertions(+), 290 deletions(-) create mode 100644 Cargo.lock create mode 100644 Cargo.toml delete mode 100644 collect.c create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore index c30ad65..667042e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,9 @@ -collect +/target +*~ + +# Profiling +*.png +*.svg +perf.* +vgcore.* + diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..5d26709 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,57 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "bytes" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "collect" +version = "0.1.0" +dependencies = [ + "bytes", + "jemallocator", + "libc", +] + +[[package]] +name = "fs_extra" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" + +[[package]] +name = "jemalloc-sys" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45" +dependencies = [ + "cc", + "fs_extra", + "libc", +] + +[[package]] +name = "jemallocator" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69" +dependencies = [ + "jemalloc-sys", + "libc", +] + +[[package]] +name = "libc" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..8cc6b51 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "collect" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[features] +default = ["jemalloc"] + +# TODO: mmap, memfd_create() ver + +# TODO: make `bytes` optional, and use Vec by default instead? idk... + +# Use jemalloc instead of system malloc. +# Seems to reduce overall memory usage at the cost of a very small speed drop. +jemalloc = ["jemallocator"] + +[profile.release] +opt-level = 3 +lto = "fat" +codegen-units = 1 +strip=true + +[profile.symbols] +inherits="release" +#incremental=true +strip=false + +[dependencies] +bytes = "1.1.0" +jemallocator = { version = "0.3.2", optional = true } +libc = "0.2.122" diff --git a/collect.c b/collect.c deleted file mode 100644 index fb45138..0000000 --- a/collect.c +++ /dev/null @@ -1,289 +0,0 @@ -// Collect all stdin into memory, then -#define _GNU_SOURCE - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#define LIKELY(expr) __builtin_expect(!!(expr), true) -#define UNLIKELY(expr) __builtin_expect(!!(expr), false) - -#define _$_if__L true -#define _$_if__U false -#define $if(l, expr) if(__builtin_expect(!!(expr), _$_if__ ## l)) - -#define F_STDIN 0 -#define F_STDOUT 1 -#define F_STDERR 2 - -typedef union arguments { - struct { - off_t pages_per_buffer; - } sized; - struct { - size_t buffsz; - } unsized; -} option_t; - -#define DEFAULT_OPTION ((option_t){ .sized = { .pages_per_buffer = 8 } }) - -static bool has_size(int fd, off_t* restrict size) -{ - struct stat st; - if( fstat(fd, &st) < 0 ) { perror("failed to stat stdin"); return false; } - else if (st.st_size > 0) { - // Non-zero size - *size = st.st_size; - return true; - } - fprintf(stderr, "returned sz (fd %d): %ld\n", fd, st.st_size); - return false; -} - -int collect_sized(off_t sz, const option_t* opt); -int collect_unsized(const option_t* opt); - -int main(void) -{ - off_t sz; - option_t args = DEFAULT_OPTION; - if(has_size(F_STDIN, &sz)) { - return collect_sized((size_t)sz, &args); - } else { - return collect_unsized(&args); - } -} - - -inline static -const void* map_input_buffer(int fd, size_t sz) -{ - void* map = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0); - - if(UNLIKELY(map == MAP_FAILED)) { - perror("input mmap()"); - return NULL; - } - - return map; -} - -inline static -bool unmap_mem(void* mem, size_t len) -{ - if(UNLIKELY( munmap(mem, len) != 0 )) { - perror("munmap()"); - return false; - } - return true; -} - -static int page_size() -{ - static int _page_size=0; - if(UNLIKELY(!_page_size)) return _page_size = getpagesize(); - return _page_size; -} - -inline static -bool alloc_pages(off_t pages, int *restrict _fd, size_t* restrict _size) -{ - int fd = memfd_create("collect-sized-buffer", O_RDWR); - $if(U, fd < 0) goto _e_memfd; - $if(U, fallocate(fd, 0, 0, __builtin_constant_p(_size) && !_size - ? pages * page_size() - : _size ? (off_t)( *_size = pages * page_size() ) - : pages * page_size()) != 0) goto _e_fallocate; - $if(L, _fd) *_fd = fd; - else close(fd); - - return true; - // +Unwind+ // -_e_fallocate: - perror("fallocate()"); - close(fd); - if(0) -_e_memfd: - perror("memfd_create()"); - // -Unwind- // - return false; -} - -struct map_fd { - void* map; - size_t len; - int fd; -}; - -static -bool map_pages(off_t pages, struct map_fd* restrict out) -{ - $if(U, !out) return alloc_pages(pages, NULL, NULL); - - $if(U, !alloc_pages(pages, &out->fd, &out->len)) goto _e_ap; - $if(U, (out->map = mmap(NULL, out->len, PROT_READ|PROT_WRITE, MAP_PRIVATE, out->fd, 0)) == MAP_FAILED) goto _e_map; - $if(U, madvise(out->map, out->len, MADV_MERGEABLE | MADV_WILLNEED)) goto _e_madv; - - return true; - - // +Unwind+ // -_e_madv: - perror("madv()"); - munmap(out->map, out->len); - if(0) -_e_map: - perror("mmap()"); - close(out->fd); - if(0) -_e_ap: - (void)0; // no perror() needed - // -Unwind- // - return false; -} - -inline static -void unmap_pages(struct map_fd in, int *restrict keep_fd) -{ - $if(U, munmap(in.map, in.len)) perror("munmap()"); - if(__builtin_constant_p(keep_fd) && keep_fd) *keep_fd = in.fd; - else { - if(!keep_fd) { - $if(U, close(in.fd)) perror("close()"); - } else *keep_fd = in.fd; - } -} - -int collect_sized(off_t isz, const option_t* gopt) -{ - register int rc=0; - __auto_type opt = gopt->sized; - const off_t real_max_size = page_size() * opt.pages_per_buffer; -// const off_t pages_per_isz = isz % page_size(); -// const off_t page_leftover_isz = isz / page_size(); - - struct map_fd buffer; - if(!map_pages(opt.pages_per_buffer, &buffer)) return 1; - - if(isz > real_max_size) { - // Multiple buffers needed - } else $if(U, isz == real_max_size) { - // Exactly one buffer (unlikely, but possible) - ssize_t r = splice(F_STDIN, NULL, - buffer.fd, NULL, - (size_t)isz, - SPLICE_F_MOVE); - switch(r) { - case -1: goto _e_splice; - case 0: /* TODO: splice reported end-of-input, should we ignore this? */ - rc = 10; - goto _cleanup_splice; - default: { - fprintf(stderr, "splice()'d %lu bytes into buffer (%ld size @ %d)\n", r, buffer.len, buffer.fd); - } - break; - } - //TODO: splice() all bytes from that buffer into STDOUT - rc = 0; - } else { - // Less than one buffer - ssize_t r = splice(F_STDIN, NULL, // TODO: XXX: WHY does splice() **ALWAYS** fail??? it literally never works??? - buffer.fd, NULL, - (size_t)isz, - SPLICE_F_MOVE); - switch(r) { - case -1: goto _e_splice; - case 0: /* TODO: splice reported end-of-input, should we ignore this? */ - rc = 10; - goto _cleanup_splice; - default: { - fprintf(stderr, "splice()'d %lu bytes into buffer (%ld size @ %d)\n", r, buffer.len, buffer.fd); - } - break; - } - // TODO: splice() isz bytes from buffer into stdout - rc = 0; - } - - // +Cleanup+ // -_cleanup_splice: if(0) -_e_splice: rc = (perror("splice()"), -1); - unmap_pages(buffer, NULL); - // -Cleanup- // - return rc; -} - -int collect_unsized(const option_t* opt) -{ - return 0; -} - -#if 0 -int collect_sized(off_t isz, const option_t* opt) -{ - const size_t sz = (size_t)isz; - fprintf(stderr, "size of input: %lu, max size of mapping: %lu (buffers %lu / lo %lu)\n", sz, opt->sized.maxsz, - sz % opt->sized.maxsz, - sz / opt->sized.maxsz); - - //fcntl(F_STDOUT, ... SOMETHING to make splice() work here... - //TODO :: XXX: : WHY can't we splice() here???? w/e.. -#if 1 - if( fallocate(F_STDOUT, 0 /* | FALLOC_FL_KEEP_SIZE*/, 0, isz) != 0) { - perror("fallocate(STDOUT)"); -// return 1; - } -#endif - - if( fcntl(F_STDOUT, F_SETFL, fcntl(F_STDOUT, F_GETFL) & ~O_APPEND) < 0 ) - { - perror("fcntl(stdout) + O_APPEND"); - return -O_APPEND; - } - ssize_t sprc = splice(F_STDIN, NULL, - F_STDOUT, NULL, //TODO: XXX: Why does this always fail? I've seen splice(1, 2) work before... - sz, - SPLICE_F_MOVE); - switch(sprc) { - case -1: perror("splice() whole buffer failed"); - return 1; - case 0: - fprintf(stderr, "splice() reported end-of-input. TODO: continue splicing, or ignore?\n"); - return 2; - default: - if((size_t)sprc == sz) return 0; - else if (sprc < sz) { - fprintf(stderr, "splice() moved only %ld / %lu bytes. TODO: move the other %lu bytes\n", - sprc, sz, - sz - (size_t)sprc); - return 3; - } else if(sprc > sz) fprintf(stderr, "splice() somehow moved %ld / %lu (+ %ld bytes more)\n", - sprc, sz, - (size_t)sprc - sz); - return -1; - } -#if 0 - // Map stdin - const void* stdin_map = map_input_buffer(F_STDIN, sz); - if(!stdin_map) goto e_map_input; - - - -cleanup: - - unmap_mem((void*)stdin_map, sz); - if(0) -e_map_input: - { fprintf(stderr, "failed to map stdin (%lu)\n", sz); rc = 1; } - return rc; -#endif -} - -#endif diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..34f4525 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,64 @@ +#[cfg(feature="jemalloc")] +extern crate jemallocator; + +#[cfg(feature="jemalloc")] +const _:() = { + #[global_allocator] + static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; +}; + +use std::{ + io, + mem::MaybeUninit, + os::unix::prelude::*, + num::NonZeroUsize, +}; + +use bytes::{ + BytesMut, + Buf, + BufMut, +}; + +fn try_get_size(reader: &R) -> Option +where R: AsRawFd +{ + let fd = reader.as_raw_fd(); + use libc::{ + fstat64, + stat64, + }; + if fd < 0 { + return None; + } + let mut st: MaybeUninit = MaybeUninit::uninit(); + unsafe { + match fstat64(fd, st.as_mut_ptr()) { + 0 => { + NonZeroUsize::new(st.assume_init().st_size as usize) + }, + _ => None, + } + } +} + +fn main() -> io::Result<()> { + let (bytes, read) = { + let stdin = io::stdin(); + let mut bytes = match try_get_size(&stdin) { + Some(sz) => BytesMut::with_capacity(sz.into()), + None => BytesMut::new(), + }; + + let read = io::copy(&mut stdin.lock(), &mut (&mut bytes).writer())?; + (bytes.freeze(), read as usize) + }; + + let written = io::copy(&mut bytes.slice(..read).reader() , &mut io::stdout().lock())?; + + if read != written as usize { + return Err(io::Error::new(io::ErrorKind::BrokenPipe, format!("read {read} bytes, but only wrote {written}"))); + } + + Ok(()) +} From 1cdd5b218784aa10e19bf8bbb1c5b932e94df19a Mon Sep 17 00:00:00 2001 From: Avril Date: Sat, 9 Apr 2022 20:32:28 +0100 Subject: [PATCH 02/15] Made `bytes` an optional feature. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Future small blessing − 末小吉 --- Cargo.toml | 4 +- src/buffers.rs | 336 +++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 13 +- 3 files changed, 345 insertions(+), 8 deletions(-) create mode 100644 src/buffers.rs diff --git a/Cargo.toml b/Cargo.toml index 8cc6b51..687f510 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ default = ["jemalloc"] # TODO: mmap, memfd_create() ver -# TODO: make `bytes` optional, and use Vec by default instead? idk... +# bytes: use `bytes` crate for collecting instead of `std::vec` # Use jemalloc instead of system malloc. # Seems to reduce overall memory usage at the cost of a very small speed drop. @@ -28,6 +28,6 @@ inherits="release" strip=false [dependencies] -bytes = "1.1.0" +bytes = {version = "1.1.0", optional = true } jemallocator = { version = "0.3.2", optional = true } libc = "0.2.122" diff --git a/src/buffers.rs b/src/buffers.rs new file mode 100644 index 0000000..d652530 --- /dev/null +++ b/src/buffers.rs @@ -0,0 +1,336 @@ +//! Buffers and helpers +use super::*; +use std::num::NonZeroUsize; + +#[cfg(feature="bytes")] +/// Default mutable buffer +#[allow(dead_code)] +pub type DefaultMut = bytes::BytesMut; + +#[cfg(not(feature="bytes"))] +/// Default mutable buffer +#[allow(dead_code)] +pub type DefaultMut = Vec; + +/// Default immutable buffer +#[allow(dead_code)] +pub type Default = ::Frozen; + + +/// Reader from a mutable reference of a `Buffer`. +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct BufferReader<'a, B: ?Sized>(&'a mut B, usize); + + +/// Writer to a mutable reference of a `MutBuffer`. +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct BufferWriter<'a, B: ?Sized>(&'a mut B, usize); + +#[allow(dead_code)] +const _: () = { + impl<'a, B: ?Sized + Buffer> BufferReader<'a, B> + { + #[inline(always)] + pub fn get(&self) -> &B + { + &self.0 + } + #[inline(always)] + pub fn get_mut(&mut self) -> &B + { + &mut self.0 + } + #[inline(always)] + pub fn amount_read(&self) -> usize + { + self.1 + } + } + impl<'a, 'b: 'a, B: Buffer + 'b> BufferReader<'a, B> + { + #[inline] + pub fn unsize(self) -> BufferReader<'a, (dyn Buffer + 'b)> + { + BufferReader(self.0, self.1) + } + } + + impl<'a, B: ?Sized + Buffer> BufferWriter<'a, B> + { + #[inline(always)] + pub fn get(&self) -> &B + { + &self.0 + } + #[inline(always)] + pub fn get_mut(&mut self) -> &B + { + &mut self.0 + } + #[inline(always)] + pub fn amount_written(&self) -> usize + { + self.1 + } + } + impl<'a, 'b: 'a, B: Buffer + 'b> BufferWriter<'a, B> + { + #[inline] + pub fn unsize(self) -> BufferWriter<'a, (dyn Buffer + 'b)> + { + BufferWriter(self.0, self.1) + } + } +}; + +impl<'a, B: ?Sized + Buffer> io::Read for BufferReader<'a, B> +{ + #[inline] + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let adv = self.0.copy_to_slice(self.1, buf); + self.1 += adv; + Ok(adv) + } +} + +impl<'a, B: ?Sized + MutBuffer> io::Write for BufferWriter<'a, B> +{ + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result { + let adv = self.0.copy_from_slice(self.1, buf); + self.1 += adv; + Ok(adv) + } + #[inline(always)] + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +/// An immutable contiguous buffer +pub trait Buffer: AsRef<[u8]> +{ + #[inline] + fn copy_to_slice(&self, st: usize, slice: &mut [u8]) -> usize + { + let by = self.as_ref(); + if st >= by.len() { + return 0; + } + + let by = &by[st..]; + let len = std::cmp::min(by.len(), slice.len()); + // SAFETY: We know `self`'s AsRef impl cannot overlap with `slice`, since `slice` is a mutable reference. + if len > 0 { + unsafe { + std::ptr::copy_nonoverlapping(by.as_ptr(), slice.as_mut_ptr(), len) + } + } + len + } + +} +pub trait BufferExt: Buffer +{ + #[inline(always)] + fn reader_from(&mut self, st: usize) -> BufferReader<'_, Self> + { + BufferReader(self, st) + } + #[inline] + fn reader(&mut self) -> BufferReader<'_, Self> + { + self.reader_from(0) + } +} +impl BufferExt for B{} + +impl Buffer for T +where T: AsRef<[u8]> +{} + +/// A mutable contiguous buffer +pub trait MutBuffer: AsMut<[u8]> +{ + type Frozen: Sized + Buffer; + + /// Make immutable + fn freeze(self) -> Self::Frozen; + + #[inline] + fn copy_from_slice(&mut self, st: usize, slice: &[u8]) -> usize + { + let by = self.as_mut(); + if st >= by.len() { + return 0; + } + + let by = &mut by[st..]; + let len = std::cmp::min(by.len(), slice.len()); + + if len > 0 { + // SAFETY: We know `self`'s AsRef impl cannot overlap with `slice`, since `slice` is a mutable reference. + unsafe { + std::ptr::copy_nonoverlapping(slice.as_ptr(), by.as_mut_ptr(), len); + } + } + len + } +} + +pub trait MutBufferExt: MutBuffer +{ + #[inline(always)] + fn writer_from(&mut self, st: usize) -> BufferWriter<'_, Self> + { + BufferWriter(self, st) + } + #[inline] + fn writer(&mut self) -> BufferWriter<'_, Self> + { + self.writer_from(0) + } +} +impl MutBufferExt for B{} + +#[cfg(feature="bytes")] +impl MutBuffer for bytes::BytesMut +{ + type Frozen = bytes::Bytes; + #[inline(always)] + fn freeze(self) -> Self::Frozen { + bytes::BytesMut::freeze(self) + } +} + +impl MutBuffer for Vec +{ + type Frozen = Box<[u8]>; + #[inline] + fn freeze(self) -> Self::Frozen { + self.into_boxed_slice() + } +} + +/// A trait for buffers that can be allocated with a capacity +pub trait WithCapacity: Sized +{ + fn wc_new() -> Self; + fn wc_with_capacity(_: usize) -> Self; +} + +impl WithCapacity for Box<[u8]> +{ + #[inline(always)] + fn wc_new() -> Self { + Vec::wc_new().into_boxed_slice() + } + #[inline(always)] + fn wc_with_capacity(cap: usize) -> Self { + Vec::wc_with_capacity(cap).into_boxed_slice() + } +} + +pub trait WithCapExt: WithCapacity +{ + fn maybe_with_capacity(maybe: Option) -> Self; + #[inline(always)] + fn try_with_capacity(cap: usize) -> Self + { + Self::maybe_with_capacity(NonZeroUsize::new(cap)) + } +} + +/// A type that can be used as a size for creating a `WithCapacity` buffer +pub trait TryCreateBuffer +{ + fn create_buffer(&self) -> T; +} + +impl TryCreateBuffer for Option +{ + #[inline(always)] + fn create_buffer(&self) -> T { + T::maybe_with_capacity(*self) + } +} + +impl TryCreateBuffer for usize +{ + #[inline(always)] + fn create_buffer(&self) -> T { + T::try_with_capacity(*self) + } +} + +impl WithCapExt for T +{ + #[inline] + fn maybe_with_capacity(maybe: Option) -> Self { + match maybe { + Some(sz) => Self::wc_with_capacity(sz.into()), + None => Self::wc_new() + } + } + +} + +/// Implement `WithCapacity` for a type that supports it. +macro_rules! cap_buffer { + ($name:ty) => { + impl $crate::buffers::WithCapacity for $name + { + #[inline(always)] + fn wc_new() -> Self + { + Self::new() + } + #[inline(always)] + fn wc_with_capacity(cap: usize) -> Self + { + Self::with_capacity(cap) + } + } + }; +} + +pub mod prelude +{ + /// Export these items anonymously. + macro_rules! export_anon { + ($($name:ident),+ $(,)?) => { + $( + pub use super::$name as _; + )* + }; + } + + // Causes conflicts for `.writer()`, so remove them from prelude. + #[cfg(feature="bytes")] + export_anon!{ + WithCapExt, + //BufferExt, + //MutBufferExt, + WithCapExt, + } + + #[cfg(not(feature="bytes"))] + export_anon!{ + WithCapExt, + BufferExt, + MutBufferExt, + WithCapExt, + } + + pub use super::{ + WithCapacity, + TryCreateBuffer, + MutBuffer, + Buffer, + }; +} + +pub(crate) use cap_buffer; + +#[cfg(feature="bytes")] buffers::cap_buffer!(bytes::BytesMut); +cap_buffer!(Vec); diff --git a/src/main.rs b/src/main.rs index 34f4525..52ae69d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,8 +14,11 @@ use std::{ num::NonZeroUsize, }; +mod buffers; +use buffers::prelude::*; + +#[cfg(feature="bytes")] use bytes::{ - BytesMut, Buf, BufMut, }; @@ -45,16 +48,14 @@ where R: AsRawFd fn main() -> io::Result<()> { let (bytes, read) = { let stdin = io::stdin(); - let mut bytes = match try_get_size(&stdin) { - Some(sz) => BytesMut::with_capacity(sz.into()), - None => BytesMut::new(), - }; + let mut bytes: buffers::DefaultMut = try_get_size(&stdin).create_buffer(); let read = io::copy(&mut stdin.lock(), &mut (&mut bytes).writer())?; (bytes.freeze(), read as usize) }; - let written = io::copy(&mut bytes.slice(..read).reader() , &mut io::stdout().lock())?; + let written = + io::copy(&mut (&bytes[..read]).reader() , &mut io::stdout().lock())?; if read != written as usize { return Err(io::Error::new(io::ErrorKind::BrokenPipe, format!("read {read} bytes, but only wrote {written}"))); From bea5cda4a1e35ee057ee23ce078ca2f992bc50d8 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 10 Apr 2022 23:27:05 +0100 Subject: [PATCH 03/15] Fixed not copying to buffer when not using feature `bytes`. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Curse − 凶 --- Cargo.lock | 360 +++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 5 + src/buffers.rs | 38 +++++- src/main.rs | 65 ++++++++- 4 files changed, 459 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5d26709..4e8e978 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,51 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9ecd88a8c8378ca913a680cd98f0f13ac67383d35993f86c90a70e3f137816b" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "backtrace" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e121dee8023ce33ab248d9ce1493df03c3b38a659b240096fcbd7048ff9c31f" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "bytes" version = "1.1.0" @@ -14,13 +59,60 @@ version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "collect" version = "0.1.0" dependencies = [ "bytes", + "color-eyre", "jemallocator", "libc", + "tracing", + "tracing-error", + "tracing-subscriber", +] + +[[package]] +name = "color-eyre" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ebf286c900a6d5867aeff75cfee3192857bb7f24b547d4f0df2ed6baa812c90" +dependencies = [ + "backtrace", + "color-spantrace", + "eyre", + "indenter", + "once_cell", + "owo-colors", + "tracing-error", +] + +[[package]] +name = "color-spantrace" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba75b3d9449ecdccb27ecbc479fdc0b87fa2dd43d2f8298f9bf0e59aacc8dce" +dependencies = [ + "once_cell", + "owo-colors", + "tracing-core", + "tracing-error", +] + +[[package]] +name = "eyre" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c2b6b5a29c02cdc822728b7d7b8ae1bab3e3b05d44522770ddd49722eeac7eb" +dependencies = [ + "indenter", + "once_cell", ] [[package]] @@ -29,6 +121,18 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" +[[package]] +name = "gimli" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4" + +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + [[package]] name = "jemalloc-sys" version = "0.3.2" @@ -50,8 +154,264 @@ dependencies = [ "libc", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259" + +[[package]] +name = "log" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "miniz_oxide" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" +dependencies = [ + "adler", + "autocfg", +] + +[[package]] +name = "object" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" + +[[package]] +name = "owo-colors" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e72e30578e0d0993c8ae20823dd9cff2bc5517d2f586a8aef462a581e8a03eb" + +[[package]] +name = "pin-project-lite" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e280fbe77cc62c91527259e9442153f4688736748d24660126286329742b4c6c" + +[[package]] +name = "proc-macro2" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "632d02bff7f874a36f33ea8bb416cd484b90cc66c1194b1a1110d067a7013f58" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "rustc-demangle" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" + +[[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + +[[package]] +name = "syn" +version = "1.0.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "thread_local" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +dependencies = [ + "once_cell", +] + +[[package]] +name = "tracing" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80b9fa4360528139bc96100c160b7ae879f5567f49f1782b0b02035b0358ebf3" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e65ce065b4b5c53e73bb28912318cb8c9e9ad3921f1d669eb0e68b4c8143a2b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90442985ee2f57c9e1b548ee72ae842f4a9a20e3f417cc38dbc5dc684d9bb4ee" +dependencies = [ + "lazy_static", + "valuable", +] + +[[package]] +name = "tracing-error" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e" +dependencies = [ + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "tracing-log" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bc28f93baff38037f64e6f43d34cfa1605f27a49c34e8a04c5e78b0babf2596" +dependencies = [ + "ansi_term", + "lazy_static", + "matchers", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index 687f510..5cb7b24 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ default = ["jemalloc"] # TODO: mmap, memfd_create() ver +# XXX: without bytes, it completely fails?? # bytes: use `bytes` crate for collecting instead of `std::vec` # Use jemalloc instead of system malloc. @@ -29,5 +30,9 @@ strip=false [dependencies] bytes = {version = "1.1.0", optional = true } +color-eyre = { version = "0.6.1", features = ["capture-spantrace"] } jemallocator = { version = "0.3.2", optional = true } libc = "0.2.122" +tracing = { version = "0.1.33", features = ["attributes"] } +tracing-error = "0.2.0" +tracing-subscriber = { version = "0.3.11", features = ["tracing", "env-filter"] } diff --git a/src/buffers.rs b/src/buffers.rs index d652530..aecb246 100644 --- a/src/buffers.rs +++ b/src/buffers.rs @@ -21,7 +21,6 @@ pub type Default = ::Frozen; #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct BufferReader<'a, B: ?Sized>(&'a mut B, usize); - /// Writer to a mutable reference of a `MutBuffer`. #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct BufferWriter<'a, B: ?Sized>(&'a mut B, usize); @@ -96,10 +95,13 @@ impl<'a, B: ?Sized + Buffer> io::Read for BufferReader<'a, B> impl<'a, B: ?Sized + MutBuffer> io::Write for BufferWriter<'a, B> { #[inline] + #[instrument(skip(self))] fn write(&mut self, buf: &[u8]) -> io::Result { let adv = self.0.copy_from_slice(self.1, buf); + self.1 += adv; - Ok(adv) + Ok(adv) + } #[inline(always)] fn flush(&mut self) -> io::Result<()> { @@ -157,13 +159,17 @@ pub trait MutBuffer: AsMut<[u8]> /// Make immutable fn freeze(self) -> Self::Frozen; - #[inline] + #[inline] + #[instrument(skip(self))] fn copy_from_slice(&mut self, st: usize, slice: &[u8]) -> usize { let by = self.as_mut(); + dbg!(&by); + if st >= by.len() { return 0; } + dbg!(st); let by = &mut by[st..]; let len = std::cmp::min(by.len(), slice.len()); @@ -180,12 +186,14 @@ pub trait MutBuffer: AsMut<[u8]> pub trait MutBufferExt: MutBuffer { - #[inline(always)] + #[inline(always)] + #[instrument(skip(self))] fn writer_from(&mut self, st: usize) -> BufferWriter<'_, Self> { BufferWriter(self, st) } - #[inline] + #[inline] + #[instrument(skip(self))] fn writer(&mut self) -> BufferWriter<'_, Self> { self.writer_from(0) @@ -201,15 +209,35 @@ impl MutBuffer for bytes::BytesMut fn freeze(self) -> Self::Frozen { bytes::BytesMut::freeze(self) } + //TODO: XXX: Impl copy_from_slice() as is done in impl for Vec } impl MutBuffer for Vec { type Frozen = Box<[u8]>; #[inline] + #[instrument] fn freeze(self) -> Self::Frozen { self.into_boxed_slice() } + + #[instrument] + fn copy_from_slice(&mut self, st: usize, buf: &[u8]) -> usize + { + if (st + buf.len()) <= self.len() { + // We can put `buf` in st..buf.len() + self[st..].copy_from_slice(buf); + } else if st <= self.len() { + // The start is lower but the end is not + let rem = self.len() - st; + self[st..].copy_from_slice(&buf[..rem]); + self.extend_from_slice(&buf[rem..]); + } else { + // it is past the end, extend. + self.extend_from_slice(buf); + } + buf.len() + } } /// A trait for buffers that can be allocated with a capacity diff --git a/src/main.rs b/src/main.rs index 52ae69d..33f5f61 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,6 @@ + +#[macro_use] extern crate tracing; + #[cfg(feature="jemalloc")] extern crate jemallocator; @@ -14,6 +17,17 @@ use std::{ num::NonZeroUsize, }; +#[allow(unused_imports)] +use color_eyre::{ + eyre::{ + self, + eyre, + WrapErr, + }, + Section, + SectionExt, Help, +}; + mod buffers; use buffers::prelude::*; @@ -45,20 +59,63 @@ where R: AsRawFd } } -fn main() -> io::Result<()> { + +fn init() -> eyre::Result<()> +{ + fn install_tracing() + { + //! Install spantrace handling + + use tracing_error::ErrorLayer; + use tracing_subscriber::prelude::*; + use tracing_subscriber::{fmt, EnvFilter}; + + let fmt_layer = fmt::layer().with_target(false); + let filter_layer = EnvFilter::try_from_default_env() + .or_else(|_| EnvFilter::try_new("info")) + .unwrap(); + + tracing_subscriber::registry() + .with(filter_layer) + .with(fmt_layer) + .with(ErrorLayer::default()) + .init(); + } + + install_tracing(); + + color_eyre::install() +} + +#[instrument] +fn main() -> eyre::Result<()> { + init()?; + //info!("Initialised"); + let (bytes, read) = { let stdin = io::stdin(); let mut bytes: buffers::DefaultMut = try_get_size(&stdin).create_buffer(); - let read = io::copy(&mut stdin.lock(), &mut (&mut bytes).writer())?; + let read = io::copy(&mut stdin.lock(), &mut bytes.writer()) + .with_section(|| bytes.len().header("Buffer size is")) + .with_section(|| bytes.capacity().header("Buffer cap is")) + .with_section(|| format!("{:?}", bytes).header("Buffer is")) + .wrap_err("Failed to read into buffer")?; + (bytes.freeze(), read as usize) }; let written = - io::copy(&mut (&bytes[..read]).reader() , &mut io::stdout().lock())?; + io::copy(&mut (&bytes[..read]).reader() , &mut io::stdout().lock()) + .with_section(|| read.header("Bytes read")) + .with_section(|| bytes.len().header("Buffer length (frozen)")) + .with_section(|| format!("{:?}", &bytes[..read]).header("Read Buffer")) + .with_section(|| format!("{:?}", bytes).header("Full Buffer")) + .wrap_err("Failed to write from buffer")?; if read != written as usize { - return Err(io::Error::new(io::ErrorKind::BrokenPipe, format!("read {read} bytes, but only wrote {written}"))); + return Err(io::Error::new(io::ErrorKind::BrokenPipe, format!("read {read} bytes, but only wrote {written}"))) + .wrap_err("Writing failed: size mismatch"); } Ok(()) From cc37f604f1e3447d51a19baa7b2851f9cbcde50a Mon Sep 17 00:00:00 2001 From: Avril Date: Wed, 13 Apr 2022 04:09:06 +0100 Subject: [PATCH 04/15] Fixed tracing causing huge unneeded bottleneck in non `bytes` builds. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Future small blessing − 末小吉 --- .gitignore | 1 + Cargo.toml | 7 ++++-- src/buffers.rs | 58 +++++++++++++++++++++++++++++++++++++++----------- src/main.rs | 27 ++++++++++++++++------- 4 files changed, 71 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 667042e..d9a10ac 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ perf.* vgcore.* +collect-* diff --git a/Cargo.toml b/Cargo.toml index 5cb7b24..65a2653 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["jemalloc"] +default = ["jemalloc", "tracing/release_max_level_warn"] # TODO: mmap, memfd_create() ver @@ -17,6 +17,9 @@ default = ["jemalloc"] # Seems to reduce overall memory usage at the cost of a very small speed drop. jemalloc = ["jemallocator"] +# Remove all tracing points +no-logging = ["tracing/max_level_off"] + [profile.release] opt-level = 3 lto = "fat" @@ -29,7 +32,7 @@ inherits="release" strip=false [dependencies] -bytes = {version = "1.1.0", optional = true } +bytes = { version = "1.1.0", optional = true } color-eyre = { version = "0.6.1", features = ["capture-spantrace"] } jemallocator = { version = "0.3.2", optional = true } libc = "0.2.122" diff --git a/src/buffers.rs b/src/buffers.rs index aecb246..0c8328e 100644 --- a/src/buffers.rs +++ b/src/buffers.rs @@ -85,6 +85,7 @@ const _: () = { impl<'a, B: ?Sized + Buffer> io::Read for BufferReader<'a, B> { #[inline] + #[instrument(level="trace", skip_all, fields(buf = ?buf.len()))] fn read(&mut self, buf: &mut [u8]) -> io::Result { let adv = self.0.copy_to_slice(self.1, buf); self.1 += adv; @@ -95,7 +96,7 @@ impl<'a, B: ?Sized + Buffer> io::Read for BufferReader<'a, B> impl<'a, B: ?Sized + MutBuffer> io::Write for BufferWriter<'a, B> { #[inline] - #[instrument(skip(self))] + #[instrument(level="trace", skip_all, fields(buf = ?buf.len()))] fn write(&mut self, buf: &[u8]) -> io::Result { let adv = self.0.copy_from_slice(self.1, buf); @@ -112,7 +113,8 @@ impl<'a, B: ?Sized + MutBuffer> io::Write for BufferWriter<'a, B> /// An immutable contiguous buffer pub trait Buffer: AsRef<[u8]> { - #[inline] + #[inline] + #[instrument(level="trace", skip_all, fields(buf = ?slice.len()))] fn copy_to_slice(&self, st: usize, slice: &mut [u8]) -> usize { let by = self.as_ref(); @@ -160,7 +162,7 @@ pub trait MutBuffer: AsMut<[u8]> fn freeze(self) -> Self::Frozen; #[inline] - #[instrument(skip(self))] + #[instrument(level="debug", skip_all, fields(st, buflen = ?slice.len()))] fn copy_from_slice(&mut self, st: usize, slice: &[u8]) -> usize { let by = self.as_mut(); @@ -187,13 +189,14 @@ pub trait MutBuffer: AsMut<[u8]> pub trait MutBufferExt: MutBuffer { #[inline(always)] - #[instrument(skip(self))] + #[instrument(level="info", skip(self))] fn writer_from(&mut self, st: usize) -> BufferWriter<'_, Self> { + debug!("creating writer at start {st}"); BufferWriter(self, st) } #[inline] - #[instrument(skip(self))] + //#[instrument(level="info", skip(self))] fn writer(&mut self) -> BufferWriter<'_, Self> { self.writer_from(0) @@ -205,23 +208,44 @@ impl MutBufferExt for B{} impl MutBuffer for bytes::BytesMut { type Frozen = bytes::Bytes; - #[inline(always)] + + #[inline(always)] + #[instrument(level="trace")] fn freeze(self) -> Self::Frozen { bytes::BytesMut::freeze(self) } //TODO: XXX: Impl copy_from_slice() as is done in impl for Vec + /*#[instrument] + fn copy_from_slice(&mut self, st: usize, buf: &[u8]) -> usize + { + //TODO: Special case for `st == 0` maybe? No slicing of the BytesMut might increase perf? Idk. + if (st + buf.len()) <= self.len() { + // We can put `buf` in st..buf.len() + self[st..].copy_from_slice(buf); +} else if st <= self.len() { + // The start is lower but the end is not + let rem = self.len() - st; + self[st..].copy_from_slice(&buf[..rem]); + self.extend_from_slice(&buf[rem..]); +} else { + // it is past the end, extend. + self.extend_from_slice(buf); +} + buf.len() +}*/ } impl MutBuffer for Vec { type Frozen = Box<[u8]>; + #[inline] - #[instrument] + #[instrument(level="trace")] fn freeze(self) -> Self::Frozen { self.into_boxed_slice() } - #[instrument] + #[instrument(level="trace", skip_all, fields(st, buflen = ?buf.len()))] fn copy_from_slice(&mut self, st: usize, buf: &[u8]) -> usize { if (st + buf.len()) <= self.len() { @@ -249,12 +273,16 @@ pub trait WithCapacity: Sized impl WithCapacity for Box<[u8]> { - #[inline(always)] + #[inline(always)] + #[instrument(level="info", fields(cap = "(unbound)"))] fn wc_new() -> Self { + info!("creating new boxed slice with size 0"); Vec::wc_new().into_boxed_slice() } - #[inline(always)] + #[inline(always)] + #[instrument(level="info")] fn wc_with_capacity(cap: usize) -> Self { + info!("creating new boxed slice with size {cap}"); Vec::wc_with_capacity(cap).into_boxed_slice() } } @@ -308,14 +336,18 @@ macro_rules! cap_buffer { ($name:ty) => { impl $crate::buffers::WithCapacity for $name { - #[inline(always)] + #[inline(always)] + #[instrument(level="info", fields(cap = "(unbound)"))] fn wc_new() -> Self { + info!("creating {} with no cap", std::any::type_name::()); Self::new() } - #[inline(always)] + #[inline(always)] + #[instrument(level="info")] fn wc_with_capacity(cap: usize) -> Self { + info!("creating {} with {cap}", std::any::type_name::()); Self::with_capacity(cap) } } @@ -360,5 +392,7 @@ pub mod prelude pub(crate) use cap_buffer; +// cap_buffer impls + #[cfg(feature="bytes")] buffers::cap_buffer!(bytes::BytesMut); cap_buffer!(Vec); diff --git a/src/main.rs b/src/main.rs index 33f5f61..94b6954 100644 --- a/src/main.rs +++ b/src/main.rs @@ -37,6 +37,7 @@ use bytes::{ BufMut, }; +#[instrument(level="debug", skip(reader), fields(reader = ?std::any::type_name::()))] fn try_get_size(reader: &R) -> Option where R: AsRawFd { @@ -70,33 +71,43 @@ fn init() -> eyre::Result<()> use tracing_subscriber::prelude::*; use tracing_subscriber::{fmt, EnvFilter}; - let fmt_layer = fmt::layer().with_target(false); + let fmt_layer = fmt::layer() + .with_target(false) + .with_writer(io::stderr); + let filter_layer = EnvFilter::try_from_default_env() - .or_else(|_| EnvFilter::try_new("info")) + .or_else(|_| EnvFilter::try_new(if cfg!(debug_assertions) { + "info" + } else if cfg!(feature="no-logging") { + "off" + } else { + "warn" + })) .unwrap(); tracing_subscriber::registry() - .with(filter_layer) .with(fmt_layer) + .with(filter_layer) .with(ErrorLayer::default()) .init(); } - - install_tracing(); + + //if !cfg!(feature="no-logging") { + install_tracing(); + //} color_eyre::install() } -#[instrument] +#[instrument(err)] fn main() -> eyre::Result<()> { init()?; - //info!("Initialised"); let (bytes, read) = { let stdin = io::stdin(); let mut bytes: buffers::DefaultMut = try_get_size(&stdin).create_buffer(); - let read = io::copy(&mut stdin.lock(), &mut bytes.writer()) + let read = io::copy(&mut stdin.lock(), &mut (&mut bytes).writer()) .with_section(|| bytes.len().header("Buffer size is")) .with_section(|| bytes.capacity().header("Buffer cap is")) .with_section(|| format!("{:?}", bytes).header("Buffer is")) From 3af34dc78f066d0bce7e99947792bb0c75990898 Mon Sep 17 00:00:00 2001 From: Avril Date: Wed, 13 Apr 2022 04:20:44 +0100 Subject: [PATCH 05/15] Better documented `jemalloc` feature. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unsure if the massive page mapping is an issue or not... Fortune for collect's current commit: Blessing − 吉 --- Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 65a2653..a7a6442 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,8 @@ default = ["jemalloc", "tracing/release_max_level_warn"] # bytes: use `bytes` crate for collecting instead of `std::vec` # Use jemalloc instead of system malloc. -# Seems to reduce overall memory usage at the cost of a very small speed drop. +# +# Decreases memory-handling function calls, resulting in less "used" memory and faster allocation speeds at the cost of mapping a huge amount of virtual memory. jemalloc = ["jemallocator"] # Remove all tracing points From 80a2d652628ab3229de2ac899af099d58fe523d3 Mon Sep 17 00:00:00 2001 From: Avril Date: Wed, 13 Apr 2022 07:37:42 +0100 Subject: [PATCH 06/15] Made spantrace capture optional at the compilation level. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added `logging` (default) feature for enabling/disabling spantrace captures entirely on build. Fortune for collect's current commit: Half curse − 半凶 --- Cargo.lock | 41 ++++++++++++ Cargo.toml | 26 +++++--- src/buffers.rs | 162 ++++++++++++++++++++++++++++++++++++++++------ src/main.rs | 172 +++++++++++++++++++++++++++++++++++++++---------- 4 files changed, 340 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4e8e978..524f8e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -70,9 +81,13 @@ name = "collect" version = "0.1.0" dependencies = [ "bytes", + "cfg-if", "color-eyre", "jemallocator", + "lazy_format", "libc", + "memchr", + "recolored", "tracing", "tracing-error", "tracing-subscriber", @@ -127,6 +142,15 @@ version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "indenter" version = "0.3.3" @@ -154,6 +178,12 @@ dependencies = [ "libc", ] +[[package]] +name = "lazy_format" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05662be9cd63006934464f935195ae936460edb75de7b9a07e0509795afbdc3" + [[package]] name = "lazy_static" version = "1.4.0" @@ -245,6 +275,17 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "recolored" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1584c92dd8a87686229f766bb3a62d263a90c47c81e45a49f1a6d684a1b7968d" +dependencies = [ + "atty", + "lazy_static", + "winapi", +] + [[package]] name = "regex" version = "1.5.5" diff --git a/Cargo.toml b/Cargo.toml index a7a6442..fcfff04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,11 +6,10 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["jemalloc", "tracing/release_max_level_warn"] +default = ["jemalloc", "logging", "tracing/release_max_level_warn"] # TODO: mmap, memfd_create() ver -# XXX: without bytes, it completely fails?? # bytes: use `bytes` crate for collecting instead of `std::vec` # Use jemalloc instead of system malloc. @@ -18,8 +17,15 @@ default = ["jemalloc", "tracing/release_max_level_warn"] # Decreases memory-handling function calls, resulting in less "used" memory and faster allocation speeds at the cost of mapping a huge amount of virtual memory. jemalloc = ["jemallocator"] -# Remove all tracing points -no-logging = ["tracing/max_level_off"] +# Remove all runtime logging code. +# +# The capturing of spantraces will still happen if `logging` is enabled. +disable-logging = [] #["tracing/max_level_off"] <-- no longer needed, would enable the `tracing` feature which we don't want. + +# Capture spantraces +# +# Will cause a slowdown, but provide more information in the event of an error or when debugging. +logging = ["tracing", "tracing-subscriber", "tracing-error", "color-eyre/capture-spantrace"] #, "recolored" <- XXX doesn't work in tracing output for some reason...] [profile.release] opt-level = 3 @@ -34,9 +40,13 @@ strip=false [dependencies] bytes = { version = "1.1.0", optional = true } -color-eyre = { version = "0.6.1", features = ["capture-spantrace"] } +cfg-if = { version = "1.0.0" } jemallocator = { version = "0.3.2", optional = true } libc = "0.2.122" -tracing = { version = "0.1.33", features = ["attributes"] } -tracing-error = "0.2.0" -tracing-subscriber = { version = "0.3.11", features = ["tracing", "env-filter"] } +tracing = { version = "0.1.33", features = ["attributes"], optional = true } +tracing-error = {version = "0.2.0", optional = true } +tracing-subscriber = { version = "0.3.11", features = ["tracing", "env-filter"], optional = true } +color-eyre = { version = "0.6.1", default-features=false }#, features = ["capture-spantrace"] } +recolored = { version = "1.9.3", optional = true } +memchr = "2.4.1" +lazy_format = "1.10.0" diff --git a/src/buffers.rs b/src/buffers.rs index 0c8328e..554306e 100644 --- a/src/buffers.rs +++ b/src/buffers.rs @@ -85,10 +85,11 @@ const _: () = { impl<'a, B: ?Sized + Buffer> io::Read for BufferReader<'a, B> { #[inline] - #[instrument(level="trace", skip_all, fields(buf = ?buf.len()))] + #[cfg_attr(feature="logging", instrument(level="trace", skip_all, fields(buf = ?buf.len())))] fn read(&mut self, buf: &mut [u8]) -> io::Result { let adv = self.0.copy_to_slice(self.1, buf); self.1 += adv; + if_trace!(? trace!(" -> reading one buffer +{adv}")); Ok(adv) } } @@ -96,11 +97,13 @@ impl<'a, B: ?Sized + Buffer> io::Read for BufferReader<'a, B> impl<'a, B: ?Sized + MutBuffer> io::Write for BufferWriter<'a, B> { #[inline] - #[instrument(level="trace", skip_all, fields(buf = ?buf.len()))] + #[cfg_attr(feature="logging", instrument(level="trace", skip_all, fields(buf = ?buf.len())))] fn write(&mut self, buf: &[u8]) -> io::Result { let adv = self.0.copy_from_slice(self.1, buf); self.1 += adv; + + if_trace!(? trace!(" <- writing one buffer {adv}")); Ok(adv) } @@ -114,7 +117,7 @@ impl<'a, B: ?Sized + MutBuffer> io::Write for BufferWriter<'a, B> pub trait Buffer: AsRef<[u8]> { #[inline] - #[instrument(level="trace", skip_all, fields(buf = ?slice.len()))] + #[cfg_attr(feature="logging", instrument(level="trace", skip_all, fields(buf = ?slice.len())))] fn copy_to_slice(&self, st: usize, slice: &mut [u8]) -> usize { let by = self.as_ref(); @@ -162,7 +165,7 @@ pub trait MutBuffer: AsMut<[u8]> fn freeze(self) -> Self::Frozen; #[inline] - #[instrument(level="debug", skip_all, fields(st, buflen = ?slice.len()))] + #[cfg_attr(feature="logging", instrument(level="debug", skip_all, fields(st, buflen = ?slice.len())))] fn copy_from_slice(&mut self, st: usize, slice: &[u8]) -> usize { let by = self.as_mut(); @@ -189,10 +192,10 @@ pub trait MutBuffer: AsMut<[u8]> pub trait MutBufferExt: MutBuffer { #[inline(always)] - #[instrument(level="info", skip(self))] + #[cfg_attr(feature="logging", instrument(level="info", skip(self)))] fn writer_from(&mut self, st: usize) -> BufferWriter<'_, Self> { - debug!("creating writer at start {st}"); + if_trace!(debug!("creating writer at start {st}")); BufferWriter(self, st) } #[inline] @@ -210,7 +213,7 @@ impl MutBuffer for bytes::BytesMut type Frozen = bytes::Bytes; #[inline(always)] - #[instrument(level="trace")] + #[cfg_attr(feature="logging", instrument(level="trace"))] fn freeze(self) -> Self::Frozen { bytes::BytesMut::freeze(self) } @@ -222,7 +225,7 @@ impl MutBuffer for bytes::BytesMut if (st + buf.len()) <= self.len() { // We can put `buf` in st..buf.len() self[st..].copy_from_slice(buf); -} else if st <= self.len() { +} else if st < self.len() { // The start is lower but the end is not let rem = self.len() - st; self[st..].copy_from_slice(&buf[..rem]); @@ -235,29 +238,150 @@ impl MutBuffer for bytes::BytesMut }*/ } +#[cfg(feature="recolored")] +mod perc { + #[deprecated = "this is absolutely retardedly unsafe and unsound... fuck this shit man lole"] + pub(super) unsafe fn gen_perc_boring(low: f64, high: f64) -> std::pin::Pin<&'static (impl std::fmt::Display + ?Sized + 'static)> + { + use std::{ + cell::RefCell, + mem::MaybeUninit, + pin::Pin, + + }; + thread_local! { + static STRING_BUFFER: RefCell> = RefCell::new(MaybeUninit::uninit()); + } + STRING_BUFFER.try_with(|buffer| -> Result, Box>{ + let mut buffer = buffer.try_borrow_mut()?; + use std::io::Write; + write!(unsafe {&mut buffer.assume_init_mut()[..]}, "{:0.2}", (low / high) * 100f64)?; + let s_ref = unsafe { + #[derive(Debug)] + struct FindFailed; + impl std::error::Error for FindFailed{} + impl std::fmt::Display for FindFailed { + #[inline(always)] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result + { + f.write_str("boring perc: failed to write whole string into buffer of size 16") + } + } + let buf = buffer.assume_init_mut(); + let spl = memchr::memchr(b'%', &buf[..]).ok_or(FindFailed)?; + std::str::from_utf8_mut(&mut buf[..=spl])? + }; + unsafe { + Ok(Pin::new(std::mem::transmute::<_, &'static _>(s_ref))) + } + }).expect("bad static memory access").expect("failed to calc") + } + + #[inline] + //XXX::: WHY::: TRACING IGNORES MY COLOURS!!! + #[deprecated(note="my colouring is ignored. we'll have to either: figure out why. or, use a different method to highlight abnormal (above 100) percentages")] + pub(super) fn gen_perc(low: f64, high: f64) -> impl std::fmt::Display + { + use std::fmt; + let f = low / match high { + 0f64 => if low != 0f64 { + return Perc::Invalid + } else { + 0f64 + } + x => x, + }; + enum Perc { + Normal(f64), + Goal(String), + High(String), + Zero(String), + Low(String), + + Invalid, + } + + macro_rules! fmt_str { + (%) => ("{:0.2}%"); + () => ("{:0.2}") + } + impl fmt::Display for Perc + { + #[inline(always)] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + use recolored::Colorize; + + write!(f, "{}", match self { + Self::Normal(p) => return write!(f, fmt_str!(%), p), + Self::Goal(p) => p.green(), + Self::High(p) => p.red(), + Self::Zero(p) => p.purple().bold(), + Self::Low(p) => p.on_red().white().underline(), + Self::Invalid => return write!(f, fmt_str!(%), ("0.00%".on_bright_red().white().strikethrough())), + })?; + { + use fmt::Write; + f.write_char('%') + } + } + } + + //TODO: StackStr instead of String + (match f { + 0f64 => Perc::Zero, + 1f64 => Perc::Goal, + 0f64..=1f64 => return Perc::Normal(f * 100f64), + 1f64.. => Perc::High, + _ => Perc::Low, + })(format!(fmt_str!(), f * 100f64)) + } +} + impl MutBuffer for Vec { type Frozen = Box<[u8]>; #[inline] - #[instrument(level="trace")] + #[cfg_attr(feature="logging", instrument(level="trace"))] fn freeze(self) -> Self::Frozen { self.into_boxed_slice() } - #[instrument(level="trace", skip_all, fields(st, buflen = ?buf.len()))] + #[cfg_attr(feature="logging", instrument(level="trace", skip(buf, self), fields(st = ?st, self = ?self.len(), alloc= ?self.capacity())))] fn copy_from_slice(&mut self, st: usize, buf: &[u8]) -> usize { if (st + buf.len()) <= self.len() { // We can put `buf` in st..buf.len() self[st..].copy_from_slice(buf); - } else if st <= self.len() { + } else if st < self.len() { // The start is lower but the end is not let rem = self.len() - st; self[st..].copy_from_slice(&buf[..rem]); + if_trace!(trace!("extending buffer (partial, +{})", buf[rem..].len())); self.extend_from_slice(&buf[rem..]); } else { // it is past the end, extend. + if_trace!(trace!("extending buffer (whole, self + buf = {} / {}: {})" + ,self.len() + buf.len() + , self.capacity() + , { + cfg_if! { + if #[cfg(feature="recolored")] { + use perc::*; + (if cfg!(feature="recolored") { + |x,y| -> Box { Box::new(gen_perc(x,y)) } + } else { + |x,y| -> Box { Box::new(unsafe {gen_perc_boring(x,y)}.get_ref()) } + })((self.len() + buf.len()) as f64, self.capacity() as f64) + } else { + let t= self.len(); + let c= self.capacity(); + let b = buf.len(); + lazy_format::lazy_format!("{:0.2}", ((t + b) as f64 / c as f64) * 100f64) + } + } + })); self.extend_from_slice(buf); } buf.len() @@ -274,15 +398,15 @@ pub trait WithCapacity: Sized impl WithCapacity for Box<[u8]> { #[inline(always)] - #[instrument(level="info", fields(cap = "(unbound)"))] + #[cfg_attr(feature="logging", instrument(level="info", fields(cap = "(unbound)")))] fn wc_new() -> Self { - info!("creating new boxed slice with size 0"); + if_trace!(debug!("creating new boxed slice with size 0")); Vec::wc_new().into_boxed_slice() } #[inline(always)] - #[instrument(level="info")] + #[cfg_attr(feature="logging", instrument(level="info"))] fn wc_with_capacity(cap: usize) -> Self { - info!("creating new boxed slice with size {cap}"); + if_trace!(debug!("creating new boxed slice with size {cap}")); Vec::wc_with_capacity(cap).into_boxed_slice() } } @@ -337,17 +461,17 @@ macro_rules! cap_buffer { impl $crate::buffers::WithCapacity for $name { #[inline(always)] - #[instrument(level="info", fields(cap = "(unbound)"))] + #[cfg_attr(feature="logging", instrument(level="info", fields(cap = "(unbound)")))] fn wc_new() -> Self { - info!("creating {} with no cap", std::any::type_name::()); + if_trace! (debug!("creating {} with no cap", std::any::type_name::())); Self::new() } #[inline(always)] - #[instrument(level="info")] + #[cfg_attr(feature="logging", instrument(level="info"))] fn wc_with_capacity(cap: usize) -> Self { - info!("creating {} with {cap}", std::any::type_name::()); + if_trace!(debug!("creating {} with {cap}", std::any::type_name::())); Self::with_capacity(cap) } } diff --git a/src/main.rs b/src/main.rs index 94b6954..3c7f97d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,26 @@ +#[macro_use] extern crate cfg_if; +#[cfg(feature="logging")] #[macro_use] extern crate tracing; +/// Run this statement only if `tracing` is enabled +macro_rules! if_trace { + (? $expr:expr) => { + cfg_if! { + if #[cfg(all(feature="logging", debug_assertions))] { + $expr; + } + } + }; + ($expr:expr) => { + cfg_if! { + if #[cfg(feature="logging")] { + $expr; + } + } + }; +} + #[cfg(feature="jemalloc")] extern crate jemallocator; @@ -37,7 +57,88 @@ use bytes::{ BufMut, }; -#[instrument(level="debug", skip(reader), fields(reader = ?std::any::type_name::()))] +/* TODO: XXX: For colouring buffer::Perc +#[derive(Debug)] +struct StackStr(usize, std::mem::MaybeUninit<[u8; MAXLEN]>); + +impl StackStr +{ + #[inline] + pub const fn new() -> Self + { + Self(0, std::mem::MaybeUninit::uninit()) + } + + #[inline(always)] + pub const unsafe fn slice_mut(&mut self) -> &mut [u8] + { + &mut self.1[self.0..] + } + #[inline] + pub const fn slice(&self) -> &[u8] + { + &self.1[self.0..] + } + + #[inline] + pub const unsafe fn as_str_unchecked(&self) -> &str + { + std::str::from_utf8_unchecked(&self.1[self.0..]) + } + + #[inline] + pub const unsafe fn as_mut_str_unchecked(&mut self) -> &mut str + { + std::str::from_utf8_unchecked_mut(&mut self.1[..self.0]) + } + + #[inline] + #[cfg_attr(feature="logging", instrument(level="debug"))] + pub fn as_str(&self) -> &str + { + std::str::from_utf8(self.slice()).expect("Invalid string") + } + + #[inline(always)] + const fn left(&self) -> usize { + SZ - self.0 + } + + #[inline(always)] + pub fn write_bytes(&mut self, s: &[u8]) -> usize { + let b = &s[..std::cmp::min(match self.left() { + 0 => return 0, + x => x, + }, s.len())]; + unsafe { &mut self.slice_mut() [self.0..] }.copy_from_slice(b); + let v = b.len(); + self.0 += v; + v + } +} + +impl std::fmt::Write for StackStr +{ + #[inline] + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.write_bytes(s.as_bytes()); + Ok(()) + } + #[inline] + fn write_char(&mut self, c: char) -> std::fmt::Result { + let l = c.len_utf8(); + if l > self.left() { + return Ok(()) + } + self.write_bytes(c.encode_utf8(unsafe { &mut self.slice_mut() [self.0..] })); + self.0 += l; + + Ok(()) + } +} +*/ + +#[cfg_attr(feature="logging", instrument(level="info", skip(reader), fields(reader = ?std::any::type_name::())))] fn try_get_size(reader: &R) -> Option where R: AsRawFd { @@ -63,45 +164,47 @@ where R: AsRawFd fn init() -> eyre::Result<()> { - fn install_tracing() - { - //! Install spantrace handling - - use tracing_error::ErrorLayer; - use tracing_subscriber::prelude::*; - use tracing_subscriber::{fmt, EnvFilter}; - - let fmt_layer = fmt::layer() - .with_target(false) - .with_writer(io::stderr); - - let filter_layer = EnvFilter::try_from_default_env() - .or_else(|_| EnvFilter::try_new(if cfg!(debug_assertions) { - "info" - } else if cfg!(feature="no-logging") { - "off" - } else { - "warn" - })) - .unwrap(); - - tracing_subscriber::registry() - .with(fmt_layer) - .with(filter_layer) - .with(ErrorLayer::default()) - .init(); - } + cfg_if!{ if #[cfg(feature="logging")] { + fn install_tracing() + { + //! Install spantrace handling + + use tracing_error::ErrorLayer; + use tracing_subscriber::prelude::*; + use tracing_subscriber::{fmt, EnvFilter}; + + let fmt_layer = fmt::layer() + .with_target(false) + .with_writer(io::stderr); + + let filter_layer = EnvFilter::try_from_default_env() + .or_else(|_| EnvFilter::try_new(if cfg!(debug_assertions) { + "debug" + } else { + "info" + })) + .unwrap(); + + tracing_subscriber::registry() + .with(fmt_layer) + .with(filter_layer) + .with(ErrorLayer::default()) + .init(); + } - //if !cfg!(feature="no-logging") { - install_tracing(); - //} + if !cfg!(feature="disable-logging") { + install_tracing(); + if_trace!(trace!("installed tracing")); + } + } } color_eyre::install() } -#[instrument(err)] +#[cfg_attr(tracing, instrument(err))] fn main() -> eyre::Result<()> { init()?; + if_trace!(debug!("initialised")); let (bytes, read) = { let stdin = io::stdin(); @@ -112,9 +215,9 @@ fn main() -> eyre::Result<()> { .with_section(|| bytes.capacity().header("Buffer cap is")) .with_section(|| format!("{:?}", bytes).header("Buffer is")) .wrap_err("Failed to read into buffer")?; - (bytes.freeze(), read as usize) }; + if_trace!(info!("collected {read} from stdin. starting write.")); let written = io::copy(&mut (&bytes[..read]).reader() , &mut io::stdout().lock()) @@ -123,6 +226,7 @@ fn main() -> eyre::Result<()> { .with_section(|| format!("{:?}", &bytes[..read]).header("Read Buffer")) .with_section(|| format!("{:?}", bytes).header("Full Buffer")) .wrap_err("Failed to write from buffer")?; + if_trace!(info!("written {written} to stdout.")); if read != written as usize { return Err(io::Error::new(io::ErrorKind::BrokenPipe, format!("read {read} bytes, but only wrote {written}"))) From 65c297b228c1979da53c7c044f26dab80ba3a2f9 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 24 Apr 2022 07:46:14 +0100 Subject: [PATCH 07/15] Started adding `memfile` feature: Use a `memfd_create()`d in-memory temporary file. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Curse − 凶 --- Cargo.toml | 3 +- src/buffers.rs | 2 +- src/main.rs | 2 + src/memfile.rs | 224 +++++++++++++++++++++++++++++++++++++++++++ src/memfile/error.rs | 112 ++++++++++++++++++++++ src/memfile/fd.rs | 198 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 539 insertions(+), 2 deletions(-) create mode 100644 src/memfile.rs create mode 100644 src/memfile/error.rs create mode 100644 src/memfile/fd.rs diff --git a/Cargo.toml b/Cargo.toml index fcfff04..ab2aeea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,9 +6,10 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["jemalloc", "logging", "tracing/release_max_level_warn"] +default = ["jemalloc", "memfile", "logging", "tracing/release_max_level_warn"] # TODO: mmap, memfd_create() ver +memfile = [] # bytes: use `bytes` crate for collecting instead of `std::vec` diff --git a/src/buffers.rs b/src/buffers.rs index 554306e..620d08d 100644 --- a/src/buffers.rs +++ b/src/buffers.rs @@ -217,7 +217,7 @@ impl MutBuffer for bytes::BytesMut fn freeze(self) -> Self::Frozen { bytes::BytesMut::freeze(self) } - //TODO: XXX: Impl copy_from_slice() as is done in impl for Vec + //TODO: XXX: Impl copy_from_slice() as is done in impl for Vec? Or change how `.writer()` works for us to return the BytesMut writer which seems more efficient. /*#[instrument] fn copy_from_slice(&mut self, st: usize, buf: &[u8]) -> usize { diff --git a/src/main.rs b/src/main.rs index 3c7f97d..f5be30a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -51,6 +51,8 @@ use color_eyre::{ mod buffers; use buffers::prelude::*; +#[cfg(feature="memfile")] mod memfile; + #[cfg(feature="bytes")] use bytes::{ Buf, diff --git a/src/memfile.rs b/src/memfile.rs new file mode 100644 index 0000000..24b01d8 --- /dev/null +++ b/src/memfile.rs @@ -0,0 +1,224 @@ +//! Memory file handling +use super::*; +use std::os::unix::io::*; +use std::{ + mem, + ops, + fs, + io, + path::Path, + borrow::{ + Borrow, + BorrowMut, + }, +}; + +mod fd; +pub mod error; + +#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(transparent)] +pub struct RawFile(fd::RawFileDescriptor); + +impl RawFile +{ + /// Get the raw fd for this raw file + #[inline(always)] + pub const fn fileno(&self) -> RawFd + { + self.0.get() + } + + #[inline(always)] + pub fn into_fileno(self) -> RawFd + { + self.into_raw_fd() + } + + #[inline(always)] + pub unsafe fn from_fileno(fd: RawFd) -> Self + { + Self::from_raw_fd(fd) + } + + /// Attempt to link this instance's fd to another container over an fd + /// + /// This is a safe wrapper around `dup2()`, as `clone()` is a safe wrapper around `dup()`. + /// + /// # Note + /// If `T` is a buffered container (e.g. `std::fs::File`), make sure the buffer is flushed *before* calling this method on it, or the buffered data will be lost. + pub fn try_link<'o, T: ?Sized>(&self, other: &'o mut T) -> Result<&'o mut T, error::DuplicateError> + where T: AsRawFd + { + if unsafe { + libc::dup2(self.fileno(), other.as_raw_fd()) + } < 0 { + Err(error::DuplicateError::new_dup2(self, other)) + } else { + Ok(other) + } + } + + /// Consume a managed file into a raw file, attempting to synchronise it first. + /// + /// # Note + /// This method attempts to sync the file's data. + /// To also attempt to sync the file's metadata, set `metadata` to true. + /// + /// # Returns + /// If the sync should fail, the original file is returned, along with the error from the sync. + #[inline(always)] + pub fn try_from_file_synced(file: fs::File, metadata: bool) -> Result + { + match if metadata { + file.sync_all() + } else { + file.sync_data() + } { + Ok(()) => unsafe { + Ok(Self::from_raw_fd(file.into_raw_fd())) + }, + Err(ioe) => Err((file, ioe)) + } + } + + /// Consume a managed fd type into a raw file + #[inline(always)] + pub fn from_file(file: impl IntoRawFd) -> Self + { + unsafe { + Self::from_raw_fd(file.into_raw_fd()) + } + } + + /// Consume into a managed file + #[inline(always)] + pub fn into_file(self) -> fs::File + { + unsafe { + fs::File::from_raw_fd(self.into_raw_fd()) + } + } + + /// Attempt to open a new raw file with these options + #[inline] + pub fn open(path: impl AsRef, opt: impl Borrow) -> io::Result + { + opt.borrow().open(path).map(Into::into) + } +} + +impl io::Write for RawFile +{ + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result { + match unsafe { + libc::write(self.fileno(), buf.as_ptr() as *const _, buf.len()) + } { + -1 => Err(io::Error::last_os_error()), + wr => Ok(wr as usize) + } + } + #[inline] + fn flush(&mut self) -> io::Result<()> { + // Not buffered + Ok(()) + } + + #[inline] + fn write_vectored(&mut self, bufs: &[io::IoSlice<'_>]) -> io::Result { + // SAFETY: IoSlice is guaranteed to be ABI-compatible with `struct iovec` + match unsafe { + libc::writev(self.fileno(), bufs.as_ptr() as *const _, bufs.len() as i32) + } { + -1 => Err(io::Error::last_os_error()), + wr => Ok(wr as usize) + } + } +} + +impl io::Read for RawFile +{ + #[inline] + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match unsafe { + libc::read(self.fileno(), buf.as_mut_ptr() as *mut _, buf.len()) + } { + -1 => Err(io::Error::last_os_error()), + wr => Ok(wr as usize) + } + } + + #[inline] + fn read_vectored(&mut self, bufs: &mut [io::IoSliceMut<'_>]) -> io::Result { + // SAFETY: IoSlice is guaranteed to be ABI-compatible with `struct iovec` + match unsafe { + libc::readv(self.fileno(), bufs.as_mut_ptr() as *mut _, bufs.len() as i32) + } { + -1 => Err(io::Error::last_os_error()), + wr => Ok(wr as usize) + } + } +} + +impl From for RawFile +{ + #[inline] + fn from(from: fs::File) -> Self + { + Self::from_file(from) + } +} + +impl From for fs::File +{ + #[inline] + fn from(from: RawFile) -> Self + { + from.into_file() + } +} + + +impl Clone for RawFile +{ + #[inline] + fn clone(&self) -> Self { + unsafe { Self::from_raw_fd(libc::dup(self.0.get())) } + } +} +impl ops::Drop for RawFile +{ + #[inline] + fn drop(&mut self) { + unsafe { + libc::close(self.0.get()); + } + } +} + +impl AsRawFd for RawFile +{ + #[inline] + fn as_raw_fd(&self) -> RawFd { + self.0.get() + } +} + +impl FromRawFd for RawFile +{ + #[inline] + unsafe fn from_raw_fd(fd: RawFd) -> Self { + Self(fd::RawFileDescriptor::new(fd)) + } +} + +impl IntoRawFd for RawFile +{ + #[inline] + fn into_raw_fd(self) -> RawFd { + let fd = self.0.get(); + mem::forget(self); // prevent close + fd + } +} diff --git a/src/memfile/error.rs b/src/memfile/error.rs new file mode 100644 index 0000000..5260a1f --- /dev/null +++ b/src/memfile/error.rs @@ -0,0 +1,112 @@ +//! Errors +use super::*; +use std::{fmt, error}; + +/// The kind of duplicate fd syscall that was attempted +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)] +pub enum DuplicateKind +{ + /// A `dup()` call failed + Duplicate, + /// A `dup2(fd)` call failed + Link(RawFd), +} + +/// Error returned when duplicating a file descriptor fails +#[derive(Debug)] +pub struct DuplicateError { + pub(super) from: RawFd, + pub(super) to: DuplicateKind, + pub(super) inner: io::Error, +} + +impl DuplicateError +{ + #[inline(always)] + pub fn new_dup(from: &T) -> Self + { + Self{ + inner: io::Error::last_os_error(), + from: from.as_raw_fd(), + to: DuplicateKind::Duplicate, + } + } + + #[inline(always)] + pub fn new_dup2(from: &T, to: &U) -> Self + { + Self { + inner: io::Error::last_os_error(), + from: from.as_raw_fd(), + to: DuplicateKind::Link(to.as_raw_fd()), + } + } + + #[inline] + pub fn new(from: &T, kind: DuplicateKind, reason: impl Into) -> Self + { + Self { + from: from.as_raw_fd(), + to: kind, + inner: reason.into() + } + } + + #[inline(always)] + pub fn reason(&self) -> &io::Error + { + &self.inner + } + + #[inline(always)] + pub fn kind(&self) -> &DuplicateKind + { + &self.to + } + + #[inline(always)] + pub fn source_fileno(&self) -> RawFd + { + self.from + } +} + +impl fmt::Display for DuplicateKind +{ + #[inline(always)] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + match self { + Self::Duplicate => f.write_str("dup()"), + Self::Link(fd) => write!(f, "dup2({fd})"), + } + } +} + +impl error::Error for DuplicateError +{ + #[inline] + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + Some(&self.inner) + } +} + +impl std::borrow::Borrow for DuplicateError +{ + #[inline] + fn borrow(&self) -> &io::Error + { + self.reason() + } +} + + +impl fmt::Display for DuplicateError +{ + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "failed to {} fd {}", self.to, self.from) + } +} + diff --git a/src/memfile/fd.rs b/src/memfile/fd.rs new file mode 100644 index 0000000..19f2b04 --- /dev/null +++ b/src/memfile/fd.rs @@ -0,0 +1,198 @@ +//! Managing raw `fd`s +use super::*; +use std::num::NonZeroU32; +use libc::{ + c_int, +}; +use std::{ + fmt, error +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)] +#[repr(transparent)] +struct NonNegativeI32(NonZeroU32); + +impl NonNegativeI32 +{ + pub const MASK: u32 = c_int::MIN as u32; //0b10000000_00000000_00000000_00000000; + + #[inline(always)] + pub const fn new(from: i32) -> Option + { + if from < 0 { + None + } else { + Some(unsafe { + Self::new_unchecked(from) + }) + } + } + + #[inline(always)] + pub const unsafe fn new_unchecked(from: i32) -> Self + { + Self(NonZeroU32::new_unchecked( (from as u32) | Self::MASK )) + } + + #[inline(always)] + pub const fn get(self) -> i32 + { + (self.0.get() & (!Self::MASK)) as i32 + } +} + +impl PartialEq for NonNegativeI32 +{ + #[inline] + fn eq(&self, other: &i32) -> bool + { + self.get() == *other + } +} + +impl PartialOrd for NonNegativeI32 +{ + #[inline] + fn partial_cmp(&self, other: &i32) -> Option { + self.get().partial_cmp(other) + } +} + +impl Default for NonNegativeI32 +{ + #[inline(always)] + fn default() -> Self + { + unsafe { + Self::new_unchecked(0) + } + } +} + +impl From for i32 +{ + #[inline(always)] + fn from(from: NonNegativeI32) -> Self + { + from.get() + } +} + +impl TryFrom for NonNegativeI32 +{ + type Error = std::num::TryFromIntError; + + #[inline(always)] + fn try_from(from: i32) -> Result + { + NonZeroU32::try_from((!from as u32) & Self::MASK)?; + debug_assert!(from >= 0, "Bad check"); + unsafe { + Ok(Self::new_unchecked(from)) + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct BadFDError(()); + +impl error::Error for BadFDError{} +impl fmt::Display for BadFDError +{ + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + f.write_str("invalid file descriptor") + } +} + + +pub type FileNo = RawFd; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(transparent)] +pub struct RawFileDescriptor(NonNegativeI32); + +impl RawFileDescriptor +{ + pub const STDIN: Self = Self(unsafe { NonNegativeI32::new_unchecked(0) }); + pub const STDOUT: Self = Self(unsafe { NonNegativeI32::new_unchecked(1) }); + pub const STDERR: Self = Self(unsafe { NonNegativeI32::new_unchecked(2) }); + + #[inline(always)] + pub fn try_new(fd: FileNo) -> Result + { + NonNegativeI32::new(fd).ok_or(BadFDError(())).map(Self) + } + + #[inline] + pub fn new(fd: FileNo) -> Self + { + Self::try_new(fd).expect("Invalid fileno") + } + + #[inline(always)] + pub const unsafe fn new_unchecked(fd: FileNo) -> Self + { + Self(NonNegativeI32::new_unchecked(fd)) + } + + #[inline(always)] + pub const fn get(&self) -> FileNo + { + self.0.get() + } +} + +impl PartialEq for RawFileDescriptor +{ + #[inline] + fn eq(&self, other: &FileNo) -> bool + { + self.get() == *other + } +} + +impl PartialOrd for RawFileDescriptor +{ + #[inline] + fn partial_cmp(&self, other: &FileNo) -> Option { + self.get().partial_cmp(other) + } +} + +impl From for RawFileDescriptor +{ + #[inline(always)] + fn from(from: NonNegativeI32) -> Self + { + Self(from) + } +} + +impl TryFrom for RawFileDescriptor +{ + type Error = BadFDError; + + #[inline(always)] + fn try_from(from: FileNo) -> Result + { + Self::try_new(from) + } +} + +impl From for FileNo +{ + #[inline(always)] + fn from(from: RawFileDescriptor) -> Self + { + from.get() + } +} + +impl AsRawFd for RawFileDescriptor +{ + fn as_raw_fd(&self) -> RawFd { + self.get() + } +} From ed957bcec855516a9f44add5b93021a78b092555 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 24 Apr 2022 09:18:05 +0100 Subject: [PATCH 08/15] feature memfile: added `RawFile::open_mem()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Needs testing, and the XXX comment about `memfd_create()` name lifetimes also needs testing. (If they do need to be static; `stackalloc` as a dependancy can be removed entirely, then: `DEFAULT_NAME` will be changed to a CString, function parameter `name` and `error::MemfileCreationStep::Create(name)` will be changed to `Option<&"static CStr>`. Fortune for collect's current commit: Future blessing − 末吉 --- Cargo.lock | 43 ++++++++++++++++ Cargo.toml | 5 +- src/main.rs | 15 ++++++ src/memfile.rs | 119 +++++++++++++++++++++++++++++++++++++------ src/memfile/error.rs | 84 +++++++++++++++++++++++++++++- src/memfile/fd.rs | 20 +++++++- src/memfile/map.rs | 22 ++++++++ 7 files changed, 289 insertions(+), 19 deletions(-) create mode 100644 src/memfile/map.rs diff --git a/Cargo.lock b/Cargo.lock index 524f8e6..8beefb0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -58,6 +58,12 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bytes" version = "1.1.0" @@ -80,14 +86,17 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" name = "collect" version = "0.1.0" dependencies = [ + "bitflags", "bytes", "cfg-if", "color-eyre", "jemallocator", "lazy_format", + "lazy_static", "libc", "memchr", "recolored", + "stackalloc", "tracing", "tracing-error", "tracing-subscriber", @@ -316,6 +325,30 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver", +] + +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + [[package]] name = "sharded-slab" version = "0.1.4" @@ -331,6 +364,16 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +[[package]] +name = "stackalloc" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4f5c9dd3feb8a4adc8eae861e5f48862a92f9a9f38cf8fc99b92fc6ec016121" +dependencies = [ + "cc", + "rustc_version", +] + [[package]] name = "syn" version = "1.0.91" diff --git a/Cargo.toml b/Cargo.toml index ab2aeea..1cfb1c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ edition = "2021" default = ["jemalloc", "memfile", "logging", "tracing/release_max_level_warn"] # TODO: mmap, memfd_create() ver -memfile = [] +memfile = ["bitflags", "lazy_static"] # bytes: use `bytes` crate for collecting instead of `std::vec` @@ -51,3 +51,6 @@ color-eyre = { version = "0.6.1", default-features=false }#, features = ["captur recolored = { version = "1.9.3", optional = true } memchr = "2.4.1" lazy_format = "1.10.0" +bitflags = {version = "1.3.2", optional = true } +stackalloc = "1.1.1" +lazy_static = { version = "1.4.0", optional = true } diff --git a/src/main.rs b/src/main.rs index f5be30a..5857749 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,9 @@ #[cfg(feature="logging")] #[macro_use] extern crate tracing; +#[macro_use] extern crate lazy_static; +#[macro_use] extern crate stackalloc; + /// Run this statement only if `tracing` is enabled macro_rules! if_trace { (? $expr:expr) => { @@ -48,6 +51,18 @@ use color_eyre::{ SectionExt, Help, }; +/// Get an `&'static str` of the current function name. +macro_rules! function { + () => {{ + fn f() {} + fn type_name_of(_: T) -> &'static str { + ::std::any::type_name::() + } + let name = type_name_of(f); + &name[..name.len() - 3] + }} +} + mod buffers; use buffers::prelude::*; diff --git a/src/memfile.rs b/src/memfile.rs index 24b01d8..9907f23 100644 --- a/src/memfile.rs +++ b/src/memfile.rs @@ -13,8 +13,12 @@ use std::{ }, }; -mod fd; +pub mod fd; pub mod error; +mod map; + +/// Flags passed to `memfd_create()` when used in this module +const MEMFD_CREATE_FLAGS: libc::c_uint = libc::MFD_CLOEXEC; #[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] #[repr(transparent)] @@ -24,21 +28,55 @@ impl RawFile { /// Get the raw fd for this raw file #[inline(always)] - pub const fn fileno(&self) -> RawFd + pub const fn fileno(&self) -> &fd::RawFileDescriptor { - self.0.get() + &self.0//.clone_const() } #[inline(always)] - pub fn into_fileno(self) -> RawFd + pub fn into_fileno(self) -> fd::RawFileDescriptor { - self.into_raw_fd() + // SAFETY: We know this is safe since we are just converting the released (valid) fd from `self` + unsafe { + fd::RawFileDescriptor::new_unchecked(self.into_raw_fd()) + } } #[inline(always)] - pub unsafe fn from_fileno(fd: RawFd) -> Self + pub unsafe fn from_fileno(fd: fd::RawFileDescriptor) -> Self { - Self::from_raw_fd(fd) + Self::from_raw_fd(fd.get()) + } + + #[inline(always)] + const fn take_ownership_of_unchecked(fd: RawFd) -> Self + { + //! **Internal**: Non-`unsafe` and `const` version of `take_ownership_of_raw_unchecked()` + //! : assumes `fd` is `>= 0` + //! + //! For use in `memfile` functions where `fd` has already been checked for validation (since `unsafe fn`s aren't first-class :/) + unsafe { + Self(fd::RawFileDescriptor::new_unchecked(fd)) + } + } + + #[inline] + pub fn take_ownership_of(fd: impl Into) -> Self + { + Self(fd.into()) + } + + #[inline] + pub fn take_ownership_of_raw(fd: impl Into) -> Result + { + let fd = fd.into(); + Ok(Self(fd.try_into().map_err(|_| fd)?)) + } + + #[inline] + pub unsafe fn take_ownership_of_raw_unchecked(fd: impl Into) -> Self + { + Self(fd::RawFileDescriptor::new_unchecked(fd.into())) } /// Attempt to link this instance's fd to another container over an fd @@ -51,7 +89,7 @@ impl RawFile where T: AsRawFd { if unsafe { - libc::dup2(self.fileno(), other.as_raw_fd()) + libc::dup2(self.0.get(), other.as_raw_fd()) } < 0 { Err(error::DuplicateError::new_dup2(self, other)) } else { @@ -91,12 +129,12 @@ impl RawFile } } - /// Consume into a managed file + /// Consume into another managed file type container #[inline(always)] - pub fn into_file(self) -> fs::File + pub fn into_file(self) -> T { unsafe { - fs::File::from_raw_fd(self.into_raw_fd()) + T::from_raw_fd(self.into_raw_fd()) } } @@ -106,14 +144,65 @@ impl RawFile { opt.borrow().open(path).map(Into::into) } + + /// Open a new in-memory (W+R) file with an optional name and a fixed size. + pub fn open_mem(name: Option<&str>, len: usize) -> Result + { + lazy_static! { + static ref DEFAULT_NAME: String = format!(concat!("", "{}", ":", line!(), "-", column!(), ">"), function!()); //TODO: If it turns out memfd_create() requires an `&'static str`; remove the use of stackalloc, and have this variable be a nul-terminated CString instead. + } + + use libc::{ + memfd_create, + fallocate, + }; + use error::MemfileCreationStep::*; + + let rname = name.unwrap_or(&DEFAULT_NAME); + + stackalloc::alloca_zeroed(rname.len()+1, move |bname| { //XXX: Isn't the whole point of making `name` `&'static` that I don't know if `memfd_create()` requires static-lifetime name strings? TODO: Check this + macro_rules! attempt_call + { + ($errcon:literal, $expr:expr, $step:expr) => { + match unsafe { + $expr + } { + $errcon => Err($step), + x => Ok(x) + } + } + } + + let bname = { + unsafe { + std::ptr::copy_nonoverlapping(rname.as_ptr(), bname.as_mut_ptr(), rname.len()); + } + debug_assert_eq!(bname[rname.len()], 0, "Copied name string not null-terminated?"); + bname.as_ptr() + }; + + let fd = attempt_call!(-1, memfd_create(bname as *const _, MEMFD_CREATE_FLAGS), Create(name.map(str::to_owned), MEMFD_CREATE_FLAGS)) + .map(Self::take_ownership_of_unchecked)?; // Ensures `fd` is dropped if any subsequent calls fail + + attempt_call!(-1 + , fallocate(fd.0.get(), 0, 0, len.try_into() + .map_err(|_| Allocate(fd.fileno().clone(), len))?) + , Allocate(fd.fileno().clone(), len))?; + + Ok(fd) + + }) + } } + + impl io::Write for RawFile { #[inline] fn write(&mut self, buf: &[u8]) -> io::Result { match unsafe { - libc::write(self.fileno(), buf.as_ptr() as *const _, buf.len()) + libc::write(self.0.get(), buf.as_ptr() as *const _, buf.len()) } { -1 => Err(io::Error::last_os_error()), wr => Ok(wr as usize) @@ -129,7 +218,7 @@ impl io::Write for RawFile fn write_vectored(&mut self, bufs: &[io::IoSlice<'_>]) -> io::Result { // SAFETY: IoSlice is guaranteed to be ABI-compatible with `struct iovec` match unsafe { - libc::writev(self.fileno(), bufs.as_ptr() as *const _, bufs.len() as i32) + libc::writev(self.0.get(), bufs.as_ptr() as *const _, bufs.len() as i32) } { -1 => Err(io::Error::last_os_error()), wr => Ok(wr as usize) @@ -142,7 +231,7 @@ impl io::Read for RawFile #[inline] fn read(&mut self, buf: &mut [u8]) -> io::Result { match unsafe { - libc::read(self.fileno(), buf.as_mut_ptr() as *mut _, buf.len()) + libc::read(self.0.get(), buf.as_mut_ptr() as *mut _, buf.len()) } { -1 => Err(io::Error::last_os_error()), wr => Ok(wr as usize) @@ -153,7 +242,7 @@ impl io::Read for RawFile fn read_vectored(&mut self, bufs: &mut [io::IoSliceMut<'_>]) -> io::Result { // SAFETY: IoSlice is guaranteed to be ABI-compatible with `struct iovec` match unsafe { - libc::readv(self.fileno(), bufs.as_mut_ptr() as *mut _, bufs.len() as i32) + libc::readv(self.0.get(), bufs.as_mut_ptr() as *mut _, bufs.len() as i32) } { -1 => Err(io::Error::last_os_error()), wr => Ok(wr as usize) diff --git a/src/memfile/error.rs b/src/memfile/error.rs index 5260a1f..662cfad 100644 --- a/src/memfile/error.rs +++ b/src/memfile/error.rs @@ -22,7 +22,7 @@ pub struct DuplicateError { impl DuplicateError { - #[inline(always)] + #[inline] pub fn new_dup(from: &T) -> Self { Self{ @@ -32,7 +32,7 @@ impl DuplicateError } } - #[inline(always)] + #[inline] pub fn new_dup2(from: &T, to: &U) -> Self { Self { @@ -110,3 +110,83 @@ impl fmt::Display for DuplicateError } } + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum MemfileCreationStep +{ + /// `memfd_create()` call + Create(Option, libc::c_uint), + /// `fallocate()` call + Allocate(fd::RawFileDescriptor, usize), + /// `mmap()` call + Map { + addr: usize, + size: usize, + prot: map::MapProtection, + flags: libc::c_int, + fd: Option, + offset: libc::off_t, + }, +} + +#[derive(Debug)] +pub struct MemfileError +{ + step: MemfileCreationStep, + inner: io::Error, +} + +impl fmt::Display for MemfileCreationStep +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + match self { + Self::Create(None, 0 | MEMFD_CREATE_FLAGS) => f.write_str("memfd_create()"), + Self::Create(None, flags) => write!(f, "memfd_create(, {flags})"), + Self::Create(Some(name), flag) => write!(f, "memfd_create({name}, {flag})"), + Self::Allocate(fd, size) => write!(f, "fallocate({fd}, 0, 0, {size})"), + Self::Map{ addr: 0, size, prot, flags, fd: Some(fd), offset } => write!(f, "mmap(NULL, {size}, {prot:?}, {flags}, {fd}, {offset})"), + Self::Map{ addr: 0, size, prot, flags, fd: None, offset } => write!(f, "mmap(NULL, {size}, {prot:?}, {flags}, -1, {offset})"), + Self::Map{ addr, size, prot, flags, fd: Some(fd), offset } => write!(f, "mmap(0x{addr:x}, {size}, {prot:?}, {flags}, {fd}, {offset})"), + Self::Map{ addr, size, prot, flags, fd: None, offset } => write!(f, "mmap(0x{addr:x}, {size}, {prot:?}, {flags}, -1, {offset})"), + } + } +} + +impl error::Error for MemfileError +{ + #[inline] + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + Some(&self.inner) + } +} +impl fmt::Display for MemfileError +{ + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "failed to create in-memory file: `{}` failed", self.step) + } +} + +impl MemfileError +{ + #[inline] + pub fn from_step(step: MemfileCreationStep) -> Self + { + Self { + step, + inner: io::Error::last_os_error() + } + } +} + +impl From for MemfileError +{ + #[inline] + fn from(from: MemfileCreationStep) -> Self + { + Self::from_step(from) + } +} + diff --git a/src/memfile/fd.rs b/src/memfile/fd.rs index 19f2b04..1973a5f 100644 --- a/src/memfile/fd.rs +++ b/src/memfile/fd.rs @@ -118,7 +118,7 @@ impl RawFileDescriptor pub const STDIN: Self = Self(unsafe { NonNegativeI32::new_unchecked(0) }); pub const STDOUT: Self = Self(unsafe { NonNegativeI32::new_unchecked(1) }); pub const STDERR: Self = Self(unsafe { NonNegativeI32::new_unchecked(2) }); - + #[inline(always)] pub fn try_new(fd: FileNo) -> Result { @@ -142,8 +142,26 @@ impl RawFileDescriptor { self.0.get() } + + #[inline(always)] + pub(super) const fn clone_const(&self) -> Self + { + //! **Internal**: `clone()` but useable in `memfile`-local `const fn`s + //! : since this type is essentially a `Copy` type, but without implicit copying. + Self(self.0) + } } +impl fmt::Display for RawFileDescriptor +{ + #[inline(always)] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "{}", self.get()) + } +} + + impl PartialEq for RawFileDescriptor { #[inline] diff --git a/src/memfile/map.rs b/src/memfile/map.rs new file mode 100644 index 0000000..8300d88 --- /dev/null +++ b/src/memfile/map.rs @@ -0,0 +1,22 @@ +//! Memory mapping +use super::*; +use libc::{ + c_int, + + PROT_NONE, + PROT_READ, + PROT_WRITE, + PROT_EXEC, +}; + +//TODO: Make this a `bitflags` struct. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy, Default)] +#[repr(i32)] +pub enum MapProtection +{ + #[default] + None = PROT_NONE, + Read = PROT_READ, + Write = PROT_WRITE, + Execute = PROT_EXEC, +} From b3dbb30deb32dc88f154d9fc58cde5023ecc7433 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 24 Apr 2022 09:28:41 +0100 Subject: [PATCH 09/15] Fix imports for when feature `memfile` is disabled. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Blessing − 吉 --- Cargo.toml | 4 ++-- src/main.rs | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1cfb1c8..671a13f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ edition = "2021" default = ["jemalloc", "memfile", "logging", "tracing/release_max_level_warn"] # TODO: mmap, memfd_create() ver -memfile = ["bitflags", "lazy_static"] +memfile = ["bitflags", "lazy_static", "stackalloc"] # bytes: use `bytes` crate for collecting instead of `std::vec` @@ -52,5 +52,5 @@ recolored = { version = "1.9.3", optional = true } memchr = "2.4.1" lazy_format = "1.10.0" bitflags = {version = "1.3.2", optional = true } -stackalloc = "1.1.1" +stackalloc = {version = "1.1.1", optional = true } lazy_static = { version = "1.4.0", optional = true } diff --git a/src/main.rs b/src/main.rs index 5857749..022092b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,9 @@ #[cfg(feature="logging")] #[macro_use] extern crate tracing; +#[cfg(feature="memfile")] #[macro_use] extern crate lazy_static; +#[cfg(feature="memfile")] #[macro_use] extern crate stackalloc; /// Run this statement only if `tracing` is enabled From edb57c7c9558e2f4f620443b965b6b533c0e3068 Mon Sep 17 00:00:00 2001 From: Avril Date: Sun, 24 Apr 2022 13:27:05 +0100 Subject: [PATCH 10/15] memfile: Disambiguated memfile errors resulting in `fallocate()` sizes being too large or the call itself failing. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Half blessing − 半吉 --- Cargo.toml | 4 +-- src/main.rs | 47 ++++++++++++++++++++----- src/memfile.rs | 84 +++++++++++++++++++++++++++++++++++++------- src/memfile/error.rs | 10 ++++-- 4 files changed, 120 insertions(+), 25 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 671a13f..ed743dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,12 @@ [package] name = "collect" -version = "0.1.0" +version = "0.2.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["jemalloc", "memfile", "logging", "tracing/release_max_level_warn"] +default = ["jemalloc", "logging", "tracing/release_max_level_warn"] #, "memfile" ] # TODO: mmap, memfd_create() ver memfile = ["bitflags", "lazy_static", "stackalloc"] diff --git a/src/main.rs b/src/main.rs index 022092b..fa557bf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -82,10 +82,10 @@ struct StackStr(usize, std::mem::MaybeUninit<[u8; MAXLEN]>) impl StackStr { - #[inline] - pub const fn new() -> Self - { - Self(0, std::mem::MaybeUninit::uninit()) +#[inline] +pub const fn new() -> Self +{ +Self(0, std::mem::MaybeUninit::uninit()) } #[inline(always)] @@ -220,10 +220,11 @@ fn init() -> eyre::Result<()> color_eyre::install() } -#[cfg_attr(tracing, instrument(err))] -fn main() -> eyre::Result<()> { - init()?; - if_trace!(debug!("initialised")); +#[cfg_attr(feature="logging", instrument(err))] +#[inline] +fn non_map_work() -> eyre::Result<()> +{ + if_trace!(trace!("strategy: allocated buffer")); let (bytes, read) = { let stdin = io::stdin(); @@ -254,3 +255,33 @@ fn main() -> eyre::Result<()> { Ok(()) } + +#[cfg_attr(feature="logging", instrument(err))] +#[inline] +#[cfg(feature="memfile")] +fn map_work() -> eyre::Result<()> +{ + + if_trace!(trace!("strategy: mapped memory file")); + + let file = memfile::create_memfile(Some("this is a test file"), 4096)?; + unimplemented!("Feature not yet implemented") +} + +#[cfg_attr(feature="logging", instrument(err))] +fn main() -> eyre::Result<()> { + init()?; + if_trace!(debug!("initialised")); + + cfg_if!{ + if #[cfg(feature="memfile")] { + map_work() + .wrap_err(eyre!("Operation failed").with_note(|| "With mapped memfd algorithm"))?; + } else { + non_map_work() + .wrap_err(eyre!("Operation failed").with_note(|| "With alloc-buf (non-mapped) algorithm"))?; + } + } + + Ok(()) +} diff --git a/src/memfile.rs b/src/memfile.rs index 9907f23..f41559c 100644 --- a/src/memfile.rs +++ b/src/memfile.rs @@ -24,6 +24,25 @@ const MEMFD_CREATE_FLAGS: libc::c_uint = libc::MFD_CLOEXEC; #[repr(transparent)] pub struct RawFile(fd::RawFileDescriptor); +/// Create an in-memory `File`, with an optional name +#[cfg_attr(feature="logging", instrument(level="info", err))] +pub fn create_memfile(name: Option<&str>, size: usize) -> eyre::Result +{ + if_trace!(debug!("Attempting to allocate {size} bytes of contiguous physical memory for memory file named {:?}", name.unwrap_or(""))); + RawFile::open_mem(name, size).map(Into::into) + .wrap_err(eyre!("Failed to open in-memory file") + .with_section(move || format!("{:?}", name).header("Proposed name")) + .with_section(|| size.header("Requested physical memory buffer size"))) +} + +impl Clone for RawFile +{ + #[inline] + fn clone(&self) -> Self { + self.try_clone().expect("failed to duplicate raw fd") + } +} + impl RawFile { /// Get the raw fd for this raw file @@ -97,6 +116,16 @@ impl RawFile } } + /// Attempt to duplicate this raw file + pub fn try_clone(&self) -> Result + { + match unsafe { libc::dup(self.0.get()) } + { + -1 => Err(error::DuplicateError::new_dup(self)), + fd => Ok(Self::take_ownership_of_unchecked(fd)) + } + } + /// Consume a managed file into a raw file, attempting to synchronise it first. /// /// # Note @@ -146,6 +175,8 @@ impl RawFile } /// Open a new in-memory (W+R) file with an optional name and a fixed size. + + #[cfg_attr(feature="logging", instrument(err))] pub fn open_mem(name: Option<&str>, len: usize) -> Result { lazy_static! { @@ -160,19 +191,29 @@ impl RawFile let rname = name.unwrap_or(&DEFAULT_NAME); - stackalloc::alloca_zeroed(rname.len()+1, move |bname| { //XXX: Isn't the whole point of making `name` `&'static` that I don't know if `memfd_create()` requires static-lifetime name strings? TODO: Check this + stackalloc::alloca_zeroed(rname.len()+1, move |bname| { //XXX: Isn't the whole point of making `name` `&'static` that I don't know if `memfd_create()` requires static-lifetime name strings? TODO: Check this + #[cfg(feature="logging")] + let _span = info_span!("stack_name_cpy", size = bname.len()); + #[cfg(feature="logging")] + let _span_lock = _span.enter(); + macro_rules! attempt_call { ($errcon:literal, $expr:expr, $step:expr) => { + //if_trace!(debug!("attempting systemcall")); match unsafe { $expr } { - $errcon => Err($step), + $errcon => { + if_trace!(warn!("systemcall failed: {}", error::raw_errno())); + Err($step) + }, x => Ok(x) } } } + if_trace!(trace!("copying {rname:p} `{rname}' (sz: {}) -> nul-terminated {:p}", rname.len(), bname)); let bname = { unsafe { std::ptr::copy_nonoverlapping(rname.as_ptr(), bname.as_mut_ptr(), rname.len()); @@ -186,11 +227,10 @@ impl RawFile attempt_call!(-1 , fallocate(fd.0.get(), 0, 0, len.try_into() - .map_err(|_| Allocate(fd.fileno().clone(), len))?) - , Allocate(fd.fileno().clone(), len))?; + .map_err(|_| Allocate(None, len))?) + , Allocate(Some(fd.fileno().clone()), len))?; Ok(fd) - }) } } @@ -268,14 +308,6 @@ impl From for fs::File } } - -impl Clone for RawFile -{ - #[inline] - fn clone(&self) -> Self { - unsafe { Self::from_raw_fd(libc::dup(self.0.get())) } - } -} impl ops::Drop for RawFile { #[inline] @@ -311,3 +343,29 @@ impl IntoRawFd for RawFile fd } } + +#[cfg(test)] +mod tests +{ + use super::*; + #[test] + fn memory_mapping() -> eyre::Result<()> + { + use std::io::*; + const STRING: &[u8] = b"Hello world!"; + let mut file = { + let mut file = RawFile::open_mem(None, 4096)?; + file.write_all(STRING)?; + let mut file = fs::File::from(file); + file.seek(SeekFrom::Start(0))?; + file + }; + let v: Vec = stackalloc::alloca_zeroed(STRING.len(), |buf| { + file.read_exact(buf).map(|_| buf.into()) + })?; + + assert_eq!(v.len(), STRING.len(), "Invalid read size."); + assert_eq!(&v[..], &STRING[..], "Invalid read data."); + Ok(()) + } +} diff --git a/src/memfile/error.rs b/src/memfile/error.rs index 662cfad..87ee37e 100644 --- a/src/memfile/error.rs +++ b/src/memfile/error.rs @@ -2,6 +2,11 @@ use super::*; use std::{fmt, error}; +pub(super) fn raw_errno() -> libc::c_int +{ + unsafe { *libc::__errno_location() } +} + /// The kind of duplicate fd syscall that was attempted #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)] pub enum DuplicateKind @@ -117,7 +122,7 @@ pub enum MemfileCreationStep /// `memfd_create()` call Create(Option, libc::c_uint), /// `fallocate()` call - Allocate(fd::RawFileDescriptor, usize), + Allocate(Option, usize), /// `mmap()` call Map { addr: usize, @@ -144,7 +149,8 @@ impl fmt::Display for MemfileCreationStep Self::Create(None, 0 | MEMFD_CREATE_FLAGS) => f.write_str("memfd_create()"), Self::Create(None, flags) => write!(f, "memfd_create(, {flags})"), Self::Create(Some(name), flag) => write!(f, "memfd_create({name}, {flag})"), - Self::Allocate(fd, size) => write!(f, "fallocate({fd}, 0, 0, {size})"), + Self::Allocate(None, size) => write!(f, "checked_cast({size})"), + Self::Allocate(Some(fd), size) => write!(f, "fallocate({fd}, 0, 0, {size})"), Self::Map{ addr: 0, size, prot, flags, fd: Some(fd), offset } => write!(f, "mmap(NULL, {size}, {prot:?}, {flags}, {fd}, {offset})"), Self::Map{ addr: 0, size, prot, flags, fd: None, offset } => write!(f, "mmap(NULL, {size}, {prot:?}, {flags}, -1, {offset})"), Self::Map{ addr, size, prot, flags, fd: Some(fd), offset } => write!(f, "mmap(0x{addr:x}, {size}, {prot:?}, {flags}, {fd}, {offset})"), From 0628853b432605a0bbdcbc34fe29387135711aa0 Mon Sep 17 00:00:00 2001 From: Avril Date: Mon, 25 Apr 2022 08:21:40 +0100 Subject: [PATCH 11/15] Working memfile implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Future blessing − 末吉 --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/main.rs | 70 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/memfile.rs | 14 ++++++---- 4 files changed, 79 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8beefb0..5e65c26 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -84,7 +84,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "collect" -version = "0.1.0" +version = "0.2.0" dependencies = [ "bitflags", "bytes", diff --git a/Cargo.toml b/Cargo.toml index ed743dc..1022970 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["jemalloc", "logging", "tracing/release_max_level_warn"] #, "memfile" ] +default = ["jemalloc", "memfile", "logging", "tracing/release_max_level_warn"] #, "memfile" ] # TODO: mmap, memfd_create() ver memfile = ["bitflags", "lazy_static", "stackalloc"] diff --git a/src/main.rs b/src/main.rs index fa557bf..713a194 100644 --- a/src/main.rs +++ b/src/main.rs @@ -261,11 +261,77 @@ fn non_map_work() -> eyre::Result<()> #[cfg(feature="memfile")] fn map_work() -> eyre::Result<()> { + extern "C" { + fn getpagesize() -> libc::c_int; + } + /// 8 pages + const DEFAULT_BUFFER_SIZE: fn () -> Option = || { unsafe { std::num::NonZeroUsize::new((getpagesize() as usize) * 8) } }; if_trace!(trace!("strategy: mapped memory file")); - let file = memfile::create_memfile(Some("this is a test file"), 4096)?; - unimplemented!("Feature not yet implemented") + use std::borrow::Borrow; + + #[inline(always)] + fn tell_file(file: &mut T) -> io::Result + where T: io::Seek + ?Sized + { + file.stream_position() + } + + #[inline(always)] + fn unwrap_int_string(i: impl Borrow>) -> String + where T: std::fmt::Display, + E: std::fmt::Display + { + i.borrow().as_ref().map(ToString::to_string) + .unwrap_or_else(|e| format!("")) + } + + let (mut file, read) = { + let stdin = io::stdin(); + + let buffsz = try_get_size(&stdin).or_else(DEFAULT_BUFFER_SIZE); + if_trace!(trace!("Attempted determining input size: {:?}", buffsz)); + let mut file = memfile::create_memfile(Some("collect-buffer"), + buffsz.map(|x| x.get()).unwrap_or(0)) + .with_section(|| format!("{:?}", buffsz).header("Deduced input buffer size")) + .wrap_err(eyre!("Failed to create in-memory buffer"))?; + + let read = io::copy(&mut stdin.lock(), &mut file) + .with_section(|| format!("{:?}", file).header("Memory buffer file"))?; + + { + use io::*; + file.seek(SeekFrom::Start(0)) + .with_section(|| read.header("Actual read bytes")) + .wrap_err(eyre!("Failed to seek back to start of memory buffer file for output") + .with_section(|| unwrap_int_string(file.stream_position()).header("Memfile position")) + /*.with_section(|| file.stream_len().map(|x| x.to_string()) + .unwrap_or_else(|e| format!("")).header("Memfile full length"))*/)?; + } + + (file, usize::try_from(read) + .wrap_err(eyre!("Failed to convert read bytes to `usize`") + .with_section(|| read.header("Number of bytes was")) + .with_section(|| u128::abs_diff(read.into(), usize::MAX as u128).header("Difference between `read` and `usize::MAX` is")) + .with_suggestion(|| "It is likely you are running on a 32-bit ptr width machine and this input exceeds that of the maximum 32-bit unsigned integer value") + .with_note(|| usize::MAX.header("Maximum value of `usize`")))?) + }; + if_trace!(info!("collected {read} from stdin. starting write.")); + + let written = + io::copy(&mut file, &mut io::stdout().lock()) + .with_section(|| read.header("Bytes read from stdin")) + .with_section(|| unwrap_int_string(tell_file(&mut file)).header("Current buffer position")) + .wrap_err("Failed to write buffer to stdout")?; + if_trace!(info!("written {written} to stdout.")); + + if read != written as usize { + return Err(io::Error::new(io::ErrorKind::BrokenPipe, format!("read {read} bytes, but only wrote {written}"))) + .wrap_err("Writing failed: size mismatch"); + } + + Ok(()) } #[cfg_attr(feature="logging", instrument(err))] diff --git a/src/memfile.rs b/src/memfile.rs index f41559c..18a8bba 100644 --- a/src/memfile.rs +++ b/src/memfile.rs @@ -224,11 +224,15 @@ impl RawFile let fd = attempt_call!(-1, memfd_create(bname as *const _, MEMFD_CREATE_FLAGS), Create(name.map(str::to_owned), MEMFD_CREATE_FLAGS)) .map(Self::take_ownership_of_unchecked)?; // Ensures `fd` is dropped if any subsequent calls fail - - attempt_call!(-1 - , fallocate(fd.0.get(), 0, 0, len.try_into() - .map_err(|_| Allocate(None, len))?) - , Allocate(Some(fd.fileno().clone()), len))?; + + if len > 0 { + attempt_call!(-1 + , fallocate(fd.0.get(), 0, 0, len.try_into() + .map_err(|_| Allocate(None, len))?) + , Allocate(Some(fd.fileno().clone()), len))?; + } else { + if_trace!(trace!("No length provided, skipping fallocate() call")); + } Ok(fd) }) From 23d022b5fe0e153f09ba2194104cde0ddb3fb62f Mon Sep 17 00:00:00 2001 From: Avril Date: Mon, 25 Apr 2022 09:37:14 +0100 Subject: [PATCH 12/15] Added working memfile implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Blessing − 吉 --- Cargo.toml | 3 ++ src/main.rs | 97 +++++++++++++++++++++++++++++++++++++++++++------- src/memfile.rs | 26 ++++++++++++++ 3 files changed, 113 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1022970..84b0d2e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,9 @@ default = ["jemalloc", "memfile", "logging", "tracing/release_max_level_warn"] # # TODO: mmap, memfd_create() ver memfile = ["bitflags", "lazy_static", "stackalloc"] +# When unable to determine the size of the input, preallocate the buffer to a multiple of the system page-size before writing to it. This can save extra `ftruncate()` calls, but will also result in the buffer needing to be truncated to the correct size at the end if the sizes as not matched. +memfile-preallocate = ["memfile"] + # bytes: use `bytes` crate for collecting instead of `std::vec` # Use jemalloc instead of system malloc. diff --git a/src/main.rs b/src/main.rs index 713a194..0570690 100644 --- a/src/main.rs +++ b/src/main.rs @@ -260,12 +260,19 @@ fn non_map_work() -> eyre::Result<()> #[inline] #[cfg(feature="memfile")] fn map_work() -> eyre::Result<()> -{ - extern "C" { - fn getpagesize() -> libc::c_int; - } - /// 8 pages - const DEFAULT_BUFFER_SIZE: fn () -> Option = || { unsafe { std::num::NonZeroUsize::new((getpagesize() as usize) * 8) } }; +{ + const DEFAULT_BUFFER_SIZE: fn () -> Option = || { + cfg_if!{ + if #[cfg(feature="memfile-preallocate")] { + extern "C" { + fn getpagesize() -> libc::c_int; + } + unsafe { std::num::NonZeroUsize::new(getpagesize() as usize * 8) } + } else { + std::num::NonZeroUsize::new(0) + } + } + }; if_trace!(trace!("strategy: mapped memory file")); @@ -290,8 +297,15 @@ fn map_work() -> eyre::Result<()> let (mut file, read) = { let stdin = io::stdin(); - let buffsz = try_get_size(&stdin).or_else(DEFAULT_BUFFER_SIZE); - if_trace!(trace!("Attempted determining input size: {:?}", buffsz)); + let buffsz = try_get_size(&stdin); + if_trace!(debug!("Attempted determining input size: {:?}", buffsz)); + let buffsz = buffsz.or_else(DEFAULT_BUFFER_SIZE); + if_trace!(if let Some(buf) = buffsz.as_ref() { + trace!("Failed to determine input size: preallocating to {}", buf); + } else { + trace!("Failed to determine input size: alllocating on-the-fly (no preallocation)"); + }); + let mut file = memfile::create_memfile(Some("collect-buffer"), buffsz.map(|x| x.get()).unwrap_or(0)) .with_section(|| format!("{:?}", buffsz).header("Deduced input buffer size")) @@ -300,15 +314,72 @@ fn map_work() -> eyre::Result<()> let read = io::copy(&mut stdin.lock(), &mut file) .with_section(|| format!("{:?}", file).header("Memory buffer file"))?; - { + let read = if cfg!(any(feature="memfile-preallocate", debug_assertions)) { use io::*; + let sp = file.stream_position(); + let sl = memfile::stream_len(&file); + + if_trace!(trace!("Stream position after read: {:?}", sp)); + if_trace!(trace!("Stream length after read: {:?}", sp)); + let read = match sp.as_ref() { + Ok(&v) if v != read => { + if_trace!(warn!("Reported read value not equal to memfile stream position: expected from `io::copy()`: {v}, got {read}")); + v + }, + Ok(&x) => { + if_trace!(trace!("Reported memfile stream position and copy result equal: {x} == {}", read)); + x + }, + Err(e) => { + if_trace!(error!("Could not report memfile stream position, ignoring check on {read}: {e}")); + read + }, + }; + + let truncate_stream = |bad: u64, good: u64| { + use std::num::NonZeroU64; + use std::borrow::Cow; + file.set_len(good) + .map(|_| good) + .with_section(|| match NonZeroU64::new(bad) {Some (b) => Cow::Owned(b.get().to_string()), None => Cow::Borrowed("") }.header("Original (bad) length")) + .with_section(|| good.header("New (correct) length")) + .wrap_err(eyre!("Failed to truncate stream to correct length") + .with_section(|| format!("{:?}", file).header("Memory buffer file"))) + }; + + let read = match sl.as_ref() { + Ok(&v) if v != read => { + if_trace!(warn!("Reported read value not equal to memfile stream length: expected from `io::copy()`: {read}, got {v}")); + if_trace!(debug!("Attempting to correct memfile stream length from {v} to {read}")); + + truncate_stream(v, read)? + }, + Ok(&v) => { + if_trace!(trace!("Reported memfile stream length and copy result equal: {v} == {}", read)); + v + }, + Err(e) => { + if_trace!(error!("Could not report memfile stream length, ignoring check on {read}: {e}")); + if_trace!(warn!("Attempting to correct memfile stream length anyway")); + if let Err(e) = truncate_stream(0, read) { + if_trace!(error!("Truncate failed: {e}")); + } + + read + } + }; + file.seek(SeekFrom::Start(0)) .with_section(|| read.header("Actual read bytes")) .wrap_err(eyre!("Failed to seek back to start of memory buffer file for output") - .with_section(|| unwrap_int_string(file.stream_position()).header("Memfile position")) + .with_section(|| unwrap_int_string(sp).header("Memfile position")) /*.with_section(|| file.stream_len().map(|x| x.to_string()) - .unwrap_or_else(|e| format!("")).header("Memfile full length"))*/)?; - } + .unwrap_or_else(|e| format!("")).header("Memfile full length"))*/)?; + + read + } else { + read + }; (file, usize::try_from(read) .wrap_err(eyre!("Failed to convert read bytes to `usize`") @@ -317,7 +388,7 @@ fn map_work() -> eyre::Result<()> .with_suggestion(|| "It is likely you are running on a 32-bit ptr width machine and this input exceeds that of the maximum 32-bit unsigned integer value") .with_note(|| usize::MAX.header("Maximum value of `usize`")))?) }; - if_trace!(info!("collected {read} from stdin. starting write.")); + if_trace!(info!("collected {} from stdin. starting write.", read)); let written = io::copy(&mut file, &mut io::stdout().lock()) diff --git a/src/memfile.rs b/src/memfile.rs index 18a8bba..a0e7431 100644 --- a/src/memfile.rs +++ b/src/memfile.rs @@ -24,6 +24,22 @@ const MEMFD_CREATE_FLAGS: libc::c_uint = libc::MFD_CLOEXEC; #[repr(transparent)] pub struct RawFile(fd::RawFileDescriptor); +/// Attempt to get the length of a stream's file descriptor +#[inline] +#[cfg_attr(feature="logging", instrument(level="debug", err, skip_all, fields(from_fd = from.as_raw_fd())))] +pub fn stream_len(from: &(impl AsRawFd + ?Sized)) -> io::Result +{ + let mut stat = std::mem::MaybeUninit::uninit(); + match unsafe { libc::fstat(from.as_raw_fd(), stat.as_mut_ptr()) } { + -1 => Err(io::Error::last_os_error()), + _ => { + let stat = unsafe { stat.assume_init() }; + debug_assert!(stat.st_size >= 0, "bad stat size"); + Ok(stat.st_size as u64) + }, + } +} + /// Create an in-memory `File`, with an optional name #[cfg_attr(feature="logging", instrument(level="info", err))] pub fn create_memfile(name: Option<&str>, size: usize) -> eyre::Result @@ -230,6 +246,16 @@ impl RawFile , fallocate(fd.0.get(), 0, 0, len.try_into() .map_err(|_| Allocate(None, len))?) , Allocate(Some(fd.fileno().clone()), len))?; + if cfg!(debug_assertions) { + if_trace!(trace!("Allocated {len} bytes to memory buffer")); + let seeked; + assert_eq!(attempt_call!(-1 + , { seeked = libc::lseek(fd.0.get(), 0, libc::SEEK_CUR); seeked } + , io::Error::last_os_error()) + .expect("Failed to check seek position in fd") + , 0, "memfd seek position is non-zero after fallocate()"); + if_trace!(if seeked != 0 { warn!("Trace offset is non-zero: {seeked}") } else { trace!("Trace offset verified ok") }); + } } else { if_trace!(trace!("No length provided, skipping fallocate() call")); } From 515a63c3a2de758ea117fc9ff14825a42362fba0 Mon Sep 17 00:00:00 2001 From: Avril Date: Mon, 25 Apr 2022 09:51:09 +0100 Subject: [PATCH 13/15] Fixed bug with `memfile` feature not setting stream length properly without `memfile-preallocate` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Future small blessing − 末小吉 --- src/main.rs | 118 ++++++++++++++++++++++++++++------------------------ 1 file changed, 63 insertions(+), 55 deletions(-) diff --git a/src/main.rs b/src/main.rs index 0570690..e3252d1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -314,70 +314,78 @@ fn map_work() -> eyre::Result<()> let read = io::copy(&mut stdin.lock(), &mut file) .with_section(|| format!("{:?}", file).header("Memory buffer file"))?; - let read = if cfg!(any(feature="memfile-preallocate", debug_assertions)) { + let read = { use io::*; - let sp = file.stream_position(); - let sl = memfile::stream_len(&file); - - if_trace!(trace!("Stream position after read: {:?}", sp)); - if_trace!(trace!("Stream length after read: {:?}", sp)); - let read = match sp.as_ref() { - Ok(&v) if v != read => { - if_trace!(warn!("Reported read value not equal to memfile stream position: expected from `io::copy()`: {v}, got {read}")); - v - }, - Ok(&x) => { - if_trace!(trace!("Reported memfile stream position and copy result equal: {x} == {}", read)); - x - }, - Err(e) => { - if_trace!(error!("Could not report memfile stream position, ignoring check on {read}: {e}")); - read - }, - }; - - let truncate_stream = |bad: u64, good: u64| { - use std::num::NonZeroU64; - use std::borrow::Cow; - file.set_len(good) - .map(|_| good) - .with_section(|| match NonZeroU64::new(bad) {Some (b) => Cow::Owned(b.get().to_string()), None => Cow::Borrowed("") }.header("Original (bad) length")) - .with_section(|| good.header("New (correct) length")) - .wrap_err(eyre!("Failed to truncate stream to correct length") - .with_section(|| format!("{:?}", file).header("Memory buffer file"))) - }; - - let read = match sl.as_ref() { - Ok(&v) if v != read => { - if_trace!(warn!("Reported read value not equal to memfile stream length: expected from `io::copy()`: {read}, got {v}")); - if_trace!(debug!("Attempting to correct memfile stream length from {v} to {read}")); - - truncate_stream(v, read)? - }, - Ok(&v) => { - if_trace!(trace!("Reported memfile stream length and copy result equal: {v} == {}", read)); - v - }, - Err(e) => { - if_trace!(error!("Could not report memfile stream length, ignoring check on {read}: {e}")); - if_trace!(warn!("Attempting to correct memfile stream length anyway")); - if let Err(e) = truncate_stream(0, read) { - if_trace!(error!("Truncate failed: {e}")); + use std::borrow::Cow; + + let (read, sp, sl) = if cfg!(any(feature="memfile-preallocate", debug_assertions)) { + let sp = file.stream_position(); + let sl = memfile::stream_len(&file); + + if_trace!(trace!("Stream position after read: {:?}", sp)); + if_trace!(trace!("Stream length after read: {:?}", sp)); + + let read = match sp.as_ref() { + Ok(&v) if v != read => { + if_trace!(warn!("Reported read value not equal to memfile stream position: expected from `io::copy()`: {v}, got {read}")); + v + }, + Ok(&x) => { + if_trace!(trace!("Reported memfile stream position and copy result equal: {x} == {}", read)); + x + }, + Err(e) => { + if_trace!(error!("Could not report memfile stream position, ignoring check on {read}: {e}")); + read + }, + }; + + let truncate_stream = |bad: u64, good: u64| { + use std::num::NonZeroU64; + file.set_len(good) + .map(|_| good) + .with_section(|| match NonZeroU64::new(bad) {Some (b) => Cow::Owned(b.get().to_string()), None => Cow::Borrowed("") }.header("Original (bad) length")) + .with_section(|| good.header("New (correct) length")) + .wrap_err(eyre!("Failed to truncate stream to correct length") + .with_section(|| format!("{:?}", file).header("Memory buffer file"))) + }; + + let read = match sl.as_ref() { + Ok(&v) if v != read => { + if_trace!(warn!("Reported read value not equal to memfile stream length: expected from `io::copy()`: {read}, got {v}")); + if_trace!(debug!("Attempting to correct memfile stream length from {v} to {read}")); + + truncate_stream(v, read)? + }, + Ok(&v) => { + if_trace!(trace!("Reported memfile stream length and copy result equal: {v} == {}", read)); + v + }, + Err(e) => { + if_trace!(error!("Could not report memfile stream length, ignoring check on {read}: {e}")); + if_trace!(warn!("Attempting to correct memfile stream length anyway")); + if let Err(e) = truncate_stream(0, read) { + if_trace!(error!("Truncate failed: {e}")); + } + + read } - - read - } + }; + (read, Some(sp), Some(sl)) + } else { + (read, None, None) }; - + file.seek(SeekFrom::Start(0)) .with_section(|| read.header("Actual read bytes")) .wrap_err(eyre!("Failed to seek back to start of memory buffer file for output") - .with_section(|| unwrap_int_string(sp).header("Memfile position")) + .with_section(move || if let Some(sp) = sp { Cow::Owned(unwrap_int_string(sp)) } + else { Cow::Borrowed("") }.header("Memfile position")) + .with_section(move || if let Some(sp) = sl { Cow::Owned(unwrap_int_string(sp)) } + else { Cow::Borrowed("") }.header("Memfile full length")) /*.with_section(|| file.stream_len().map(|x| x.to_string()) .unwrap_or_else(|e| format!("")).header("Memfile full length"))*/)?; - read - } else { read }; From 8390072309b326da06fd5e1de2d9159e99cb6ec9 Mon Sep 17 00:00:00 2001 From: Avril Date: Wed, 27 Apr 2022 17:43:05 +0100 Subject: [PATCH 14/15] Reached version 1.0.0! MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added `mode-*` feature flags for specific working methods (default: `mode-memfile` [+logging]) Fortune for collect's current commit: Half curse − 半凶 --- Cargo.lock | 2 +- Cargo.toml | 49 +++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e65c26..1a2a829 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -84,7 +84,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "collect" -version = "0.2.0" +version = "1.0.0" dependencies = [ "bitflags", "bytes", diff --git a/Cargo.toml b/Cargo.toml index 84b0d2e..07114cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,24 +1,57 @@ [package] name = "collect" -version = "0.2.0" -edition = "2021" +version = "1.0.0" +description = "collect all of stdin until it is closed, then output it all to stdout" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +authors = ["Avril "] +homepage = "https://git.flanchan.moe/flanchan/collect/" +repository="https://github.com/notflan/collect" +edition = "2021" +license = "GPL-3.0-or-later" [features] -default = ["jemalloc", "memfile", "logging", "tracing/release_max_level_warn"] #, "memfile" ] +# Endable default mode (`memfile-preallocate`). +# (Best output appears to come from `memfile-preallocate`, compared to `memfile` and `buffered`) +# +# # Alternatives +# To use a specific mode: `cargo build --release --no-default-features --features mode-{memfile,buffered}[,logging]` +# +# # Logging +# Tracing can be disabled at compile-time for higher performance by disabling the `logging` feature (see above, but remove `,logging` from the features.) +default = ["mode-memfile", "logging"] + +## --- Modes --- ## + +# Mode: default +# Use physical-memory backed kernel file-descriptors. (see feature `memfile`.) +mode-memfile = ["memfile-preallocate"] #, "tracing/release_max_level_warn"] -# TODO: mmap, memfd_create() ver +# Mode: alternative +# Use non-physical memory allocated buffers. +mode-buffered = ["jemalloc", "bytes"] + +## --- Individual features --- ## + +# Use an in-memory file for storage instead of a byte-buffer. +# +# This can draastically improve performance as it allows for the use of `splice()` and `send_file()` syscalls instead of many `read()` and `write()` ones. +# +# # *NOTE*: Requires the Linux `memfd_create()` syscall to be available in libc. +# # **WARNING**: Can potentially cause *full system OOM* if the initial size of the input pipe is: +# * Statically sized (the program can infer the size of standard input.) +# * The standard input file/buffer pipe size is large enough to pre-allocate enough splicing space to use up the rest of your physical RAM. +# (This will very likely not happen unless you're specifically trying to make it happen, however.) memfile = ["bitflags", "lazy_static", "stackalloc"] -# When unable to determine the size of the input, preallocate the buffer to a multiple of the system page-size before writing to it. This can save extra `ftruncate()` calls, but will also result in the buffer needing to be truncated to the correct size at the end if the sizes as not matched. +# `memfile`: When unable to determine the size of the input, preallocate the buffer to a multiple of the system page-size before writing to it. This can save extra `ftruncate()` calls, but will also result in the buffer needing to be truncated to the correct size at the end if the sizes as not matched. +# +# *NOTE*: Requires `getpagesz()` to be available in libc. memfile-preallocate = ["memfile"] -# bytes: use `bytes` crate for collecting instead of `std::vec` # Use jemalloc instead of system malloc. # -# Decreases memory-handling function calls, resulting in less "used" memory and faster allocation speeds at the cost of mapping a huge amount of virtual memory. +# Decreases memory-handling function calls, resulting in less "used" memory and faster allocation speeds at the "cost" of mapping a huge amount of virtual memory. jemalloc = ["jemallocator"] # Remove all runtime logging code. From f05c1466c75ca873760367b95abf6aa0e79cba7e Mon Sep 17 00:00:00 2001 From: Avril Date: Wed, 27 Apr 2022 21:00:16 +0100 Subject: [PATCH 15/15] Added README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortune for collect's current commit: Future small blessing − 末小吉 --- README.md | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..821a4b2 --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +# `collect` - Collect all input until it's closed, then output it all at once. + +This small tool can be used to ensure all data between pipes is synchronised, and/or to ensure the 2nd program in the pipe doesn't start processing before the first one has finished outputting her data. + +## Usage +For example, in the pipeline `x | collect | y`, where `x` is a program who's output is sporadic (something like a network connection, reading and processing a segmented file, etc) `y` will receive all of `x`s output at once as soon as `x` closes her standard output pipe. So `y` will not start processing until `x` has completed hers. + + +There are no runtime flags (unless logging is enabled, in which case, see below), it simply reads from `stdin` and writes to `stdout`. (When logging is enabled, and the log-level is set to a level that will enabled common info logging, it is written to `stderr` **only** to not interfere with the data collected from `stdin`.) + + +### Logging +When compiled with the `logging` feature (default), you can control the log level with the `RUST_LOG` environment variable (the default for release builds is `info`, for debug builds, `debug`.) + +#### Available levels +To set the level, run with `RUST_LOG=` one of the below values: +* `trace` - The lowest level of logging, all information will be printed. +* `debug` - The 2nd lowest level, debugging-relevent information (such as buffer sizes, file descriptor numbers/names, read/write segment sizes, allocations, etc.) will be printed. (default for `debug` builds.) +* `info` - Will print information when collection has started, finished, and output is over. (default for `release` builds.) +* `warn` - Will print only warnings. Most of these that will be seen will be related to additionally required syscalls for fd-size truncation, which are only efficiency-related and not warnings to the user herself's use of the program. But some will be. +* `error` - Only print error messages. +* `off` - Print no messages at all. + +## Building +Building requires `rust` and `Cargo`. + +To build with the default configuration: +``` shell +$ cargo build --release +``` +Will build the binary into `./target/release/collect`. + +### Debug builds +To create a debug build: +``` shell +$ cargo build +``` +Will build the binary into `./target/debug/collect`. +*NOTE*: when `logging` feature is enabled, the default logging level will be `debug` instead of `info`. + +To create a release build that is not symbol-stripped: +``` shell +$ cargo build --profile symbols +``` +Will build the binary into `./target/symbols/collect`. + +### Modes & features +There are two major operative modes: `mode-memfile` (default [+`logging`]) and `mode-buffered`. +These are collections of features specific to each operating mode. + +#### Modes +Each mode feature can be chosen by building with a `Cargo` incantation in the following format: +``` shell +$ cargo build --release --no-default-features --features mode-[,logging] +``` + +* `mode-memfile` - This is the default used mode, which will use the feature `memfile-preallocate`. *NOTE*: The default enabled features chooses this mode and the `logging` feature. +* `mode-buffered` - This will use `jemalloc` and `bytes`-allocated buffers instead of file-descriptors. + +*NOTE*: If both modes are specified at once, `mode-memfile` will take precidence by the program, and `mode-buffered` will not be used. + +#### Features +The user can also compile the program with individual features specific to her needs. + +They can be specified as such: + +| Feature name | Description | Notes | +| `memfile` | Use an in-memory file-descriptor pointing to unmapped physical pages of memory. This will allow the program to make use of the more efficient `splice()` and `send_file()` syscalls where possible. | **WARNING**: Can potentially cause a *full system OOM* if used incorrectly or irresponsibly. (See below) | +| `memfile-preallocate` | `memfile`, but when unable to determine the size of `stdin`, will pre-allocate it to a multiple of the system page size. | *NOTE*: Requires `int getpagesize()` to be availble in your used `libc` implementation. (It ususally will exist unless you're using some meme implementation of `libc`.) This is enabled by default with the `memfile` mode. | +| `jemalloc` | Use `jemalloc` instead of system `malloc()` when allocating memory. This is only really helpful when *not* using `memfile`, but the program heap is still used for error propagating and log reporting in either mode. | `jemalloc` incorporates a lot of redundant (in this case) locking mechanisms, but causes a generally lower used memory profile than system malloc, however it does allocate far more *virtual memory* pages than is generally needed. This is enabled by default with the `buffered` mode. | +| `bytes` | Use the `bytes` crate to manage memory allocations in `buffered` mode instead of native vector implementations, this can *potentially* save on *some* copying operations. | Some crude benchmarks have shown this to be mildly more efficient in `buffered` mode than without it. | +| `disable-logging` | Removes all **runtime** logging code. Span-traces are still captured, however, they just are never used. | This won't save you much compared to just disabling the `logging` feature (below.) | +| `logging` | Enable the capture and reporting of span-traces and events. (See the section on logging above.) | This does cause a slowdown, but can provide useful information to the user about error locations, warnings, when and where input and output have finished and the sizes of both, etc. If you're only using it in scripts however, it'd be better to disable. (*default enabled*) | + +##### Notes about `memfile` feature/mode +If `memfile` is enabled, and the input size can be determined by the program, it will preallocate the required space for the input. +If this input were to exceed the amount of physical memory available (since this is unpaged memory being allocated,) it could hang and/or then cause the kernel to OOMkill basically everything *except* `collect`. + +Please note however, this would only typically happen in instances where a *file* is passed as input (where the length can be determined, the source it *usually* not segmented at all); in which case `collect` is just going to slow down your pipe. (It is still worth using for scripts where the script doesn't *know* if the standatd input is a file or not.) + +In the current version, this is not yet accounted for, so passing massive files, for example: +``` shell +$ collect <10-gb-file | wc -c +``` +Will try to allocate 10GB of *physical* memory for the collection. + +In future versions, a warning for large known-size inputs will be displayed, and an error for known-size inputs so large they would cause an OOM. (Same for unknown-sized inputs that grow the backing memfd to a size that would start to become an issue or would use too much physical memory.) +But currently, this is a pitfall of the `memfile` mode that, while very unlikely to ever be encountered, could still bite the user if it is encountered. + +If something like this may be a concern for your usecase, please fall-back to using the `buffered` mode instead, which, while significantly slower, will only OOM *itself* if the input is too large and cannot eat *physical* memory directly, only its already-large VM page maps which are, for most instances, mostly empty. + +# License +CPL'd with <3 + +