commit 79f5bb88dedae1ea728b4a52e303f4124500132f Author: Avril Date: Wed Mar 30 22:53:11 2022 +0100 initial commit Started low-level interface. mod loli:: Added binds for relevant gnu-c syscall-wrapper functions; added wrapper types and functions for dealing with errno. added extension traits for `dup()` and `dup2()` of streams (tested with stdin/out/err and /dev/null). Added UStr and UString (unmanaged string) helper types. Fortune for mempipe's current commit: Curse − 凶 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..8c1cb28 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,23 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "bytes" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" + +[[package]] +name = "libc" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f" + +[[package]] +name = "mempipe" +version = "0.1.0" +dependencies = [ + "bytes", + "libc", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..0aea888 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "mempipe" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bytes = "1.1.0" +libc = "0.2.121" diff --git a/src/ext.rs b/src/ext.rs new file mode 100644 index 0000000..36be6a1 --- /dev/null +++ b/src/ext.rs @@ -0,0 +1,7 @@ +//! Extension traits, macros, types, etc. +use super::*; + +pub use loli::{ + DupExt, + Dup2Ext, +}; diff --git a/src/loli.rs b/src/loli.rs new file mode 100644 index 0000000..ac13fe6 --- /dev/null +++ b/src/loli.rs @@ -0,0 +1,554 @@ +//! LOw Level Interface +//! +//! Provides interfaces to low-level operations and syscalls. +use std::ops::{Deref, DerefMut}; +use std::{ + fmt, + error, +}; +use bytes::{ + Buf, +}; +use std::os::unix::io::*; +use std::ffi::{ + CStr, CString, +}; + +//TODO: Provide adequate wrappers for the extern functions below + +/// Raw exports +mod raw { + pub(super) use std::os::raw::*; + use libc::{ + off64_t as c_off64_t, + off_t as c_off_t, + pid_t as c_pid_t, + size_t as c_size_t, + }; + + + // Function definitions + + extern "C" { + pub fn memfd_create(name: *const c_char, flags: c_int) -> c_int; + + pub fn dup(fd: c_int) -> c_int; + pub fn dup2(from: c_int, to: c_int) -> c_int; + + pub fn ftruncate(fd: c_int, length: c_off_t) -> c_int; + pub fn ftruncate64(fd: c_int, length: c_off64_t) -> c_int; + + pub fn close(fd: c_int) -> c_int; + + pub fn getenv(name: *const c_char) -> *mut c_char; + pub fn getpid() -> c_pid_t; + + pub fn mmap( + addr: *mut c_void, + len: c_size_t, + prot: c_int, + flags: c_int, + fd: c_int, + offset: c_off_t + ) -> *mut c_void; + + pub fn mmap64( + addr: *mut c_void, + len: c_size_t, + prot: c_int, + flags: c_int, + fd: c_int, + offset: c_off64_t + ) -> *mut c_void; + } + + // Re-exports + + pub use libc::MAP_FAILED; + pub use std::ffi::c_void as Opaque; + pub use libc::c_char as Char; +} + +// Errno error handling + +/// A raw `errno` value +pub type RawErrno = raw::c_int; + +fn errno_raw_buffer(errno: RawErrno) -> Result<[u8; BUFF_SIZE], RawErrno> +{ + use std::mem::MaybeUninit; + + let mut buffer: MaybeUninit<[u8; BUFF_SIZE]> = MaybeUninit::uninit(); + + Ok(unsafe { + errno_if_nz(libc::strerror_r(errno, buffer.as_mut_ptr() as *mut raw::c_char, BUFF_SIZE))?; + + buffer.assume_init() + }) +} + +fn errno_raw_ustr(errno: RawErrno, buffer: &mut [u8]) -> Result<&UStr, RawErrno> +{ + unsafe { + errno_if_nz(libc::strerror_r(errno, buffer.as_mut_ptr() as *mut raw::c_char, buffer.len()))?; + } + Ok(UStr::new_mut(buffer)) +} + +#[inline(always)] +fn errno_raw_cstr(errno: RawErrno, buffer: &mut [u8]) -> Result<&CStr, RawErrno> +{ + errno_raw_ustr(errno, buffer).map(|x| unsafe {x.to_c_str()}) +} + +fn errno_ustring_buffer(errno: RawErrno) -> Result +{ + let buf = errno_raw_buffer::(errno)?; + Ok(UString::new_from_buffer(&mut &buf[..], BUFF_SIZE)) +} + +fn errno_cstring_buffer(errno: RawErrno) -> Result +{ + errno_ustring_buffer::(errno).map(|x| unsafe {x.to_c_string()}) +} + +fn errno_lossy_string_buffer(errno: RawErrno) -> Result +{ + errno_ustring_buffer::(errno).map(|x| x.to_string_lossy().into_owned()) +} + +/// An error when `dup()` or `dup2()`ing a stream. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct DupError(RawErrno); + +impl DupError +{ + /// The `errno` code for this error. + #[inline] + pub fn code(&self) -> RawErrno + { + self.0 + } +} + +impl error::Error for DupError{} +impl fmt::Display for DupError +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "dup() failed with ")?; + + if let Ok(buffer) = errno_raw_buffer::<1024>(self.0) { + let string = String::from_utf8_lossy(buffer.split(|&b| b == 0) + .next() + .unwrap_or(b"")); + write!(f, "{}: {}", self.0, string) + } else { + write!(f, "{} ", self.0) + } + } +} + + +/// Get the current thread's `errno` value +pub fn current_errno() -> RawErrno +{ + unsafe {*libc::__errno_location()} +} + +/// Check FFI function returns for errors and return the appropriate errno value if needed. +/// +/// # Returns +/// If `chk(rv)` returns `true`, `Err()` is returned. Otherwise, `Ok(rv)` is returned. +#[inline(always)] +pub fn errno_if(rv: T, chk: F) -> Result +where F: Fn(T) -> bool +{ + if !chk(rv) { + Ok(rv) + } else { + #[cold] + #[inline(never)] + fn _error() -> RawErrno + { + current_errno() + } + Err(_error()) + } +} +#[inline(always)] +fn errno_if_not(rv: raw::c_int) -> Result +{ + if rv == EXPECTED { + Ok(rv) + } else { + #[cold] + #[inline(never)] + fn _error() -> RawErrno + { + current_errno() + } + Err(_error()) + } +} +#[inline(always)] +fn errno_if_nz(rv: raw::c_int) -> Result<(), RawErrno> +{ + errno_if_not::<0>(rv).map(|_| ()) +} + +// Wrapper traits + +/// Extension methods for `dup()`ing file descriptors +pub trait DupExt: Sized +{ + unsafe fn dup(&self) -> Result; +} + +/// Extension methods for `dup2()`ing file descriptors +pub trait Dup2Ext +{ + unsafe fn dup2(&self, other: &mut U) -> Result<(), DupError>; +} + +impl Dup2Ext for T +where T: AsRawFd, + U: AsRawFd +{ + unsafe fn dup2(&self, other: &mut U) -> Result<(), DupError> + { + errno_if(raw::dup2(self.as_raw_fd(), other.as_raw_fd()), |v| v < 0).map_err(DupError)?; + Ok(()) + } +} + +impl DupExt for T +where T: FromRawFd + AsRawFd +{ + unsafe fn dup(&self) -> Result + { + errno_if(raw::dup(self.as_raw_fd()), |v| v < 0) + .map(|fd| Self::from_raw_fd(fd)) // WHY are `unsafe fn`s not first class in an `unsafe` context?? + .map_err(DupError) + } +} + +//TODO: Document the following, and make it work like CStr/CString + +/// A reference to an unmanaged string that may contain non-utf8 characters. +/// +/// This differs from `CStr` in that it is **not** nul-terminated. +#[repr(transparent)] +pub struct UStr([u8]); + +impl UStr { + #[inline(always)] + pub unsafe fn from_raw_mut<'a>(ptr: *mut u8, size: usize) -> &'a mut Self + { + &mut *(std::ptr::slice_from_raw_parts_mut::(ptr, size) as *mut Self) + } + #[inline(always)] + pub unsafe fn from_raw<'a>(ptr: *const u8, size: usize) -> &'a Self + { + &*(std::ptr::slice_from_raw_parts::(ptr, size) as *const Self) + } + + #[inline] + pub fn new<'a>(bytes: &'a [u8]) -> &'a Self + { + // SAFETY: ustr is transparent. + unsafe { + std::mem::transmute::<&'a [u8], &'a Self>(bytes) + } + } + + #[inline] + pub fn new_mut<'a>(bytes: &'a mut [u8]) -> &'a mut Self + { + // SAFETY: ustr is transparent. + unsafe { + std::mem::transmute::<&'a mut [u8], &'a mut Self>(bytes) + } + } + + /// Convert to a reference to a nul-terminated byte string + /// + /// # Safety + /// The unmanaged string must contain a null byte in its slice. + pub unsafe fn to_c_str(&self) -> &CStr + { + CStr::from_ptr(self.0.as_ptr() as *const raw::c_char) + } + + /// Create a `CStr` from this nul-terminated unmanaged byte string + /// + /// # Returns + /// The final byte in the slice must be `\0`, or `Err` will be returned. + pub fn as_c_str(&self) -> Result<&CStr, std::ffi::FromBytesWithNulError> + { + CStr::from_bytes_with_nul(&self.0[..]) + } + + /// Create an unmanaged string from a nul-terminated string + /// + /// # Nul-terminator + /// The returned unmanaged string reference will contain the nul-terminator as the final byte in the slice. + #[inline] + pub fn from_c_str(raw: &CStr) -> &Self + { + Self::new(raw.to_bytes_with_nul()) + } + + /// Returns `true` if the underlying slice is nul-terminated, and can be safely converted back/forth with `CStr`. + #[inline] + pub fn is_nul_terminated(&self) -> bool + { + self.0.last().map(|&x| x == 0).unwrap_or(false) + } + + #[inline] + pub fn to_str(&self) -> Result<&str, std::str::Utf8Error> + { + std::str::from_utf8(&self.0[..]) + } + + #[inline] + pub fn to_mut_str(&mut self) -> Result<&mut str, std::str::Utf8Error> + { + std::str::from_utf8_mut(&mut self.0[..]) + } + + pub fn to_string_lossy(&self) -> std::borrow::Cow<'_, str> + { + String::from_utf8_lossy(&self.0[..]) + } + + #[inline] + pub fn into_ustring(self: Box) -> UString + { + UString::from_boxed_ustr(self) + } + + #[inline] + pub fn from_ustring(owned: UString) -> Box + { + owned.into_boxed_ustr() + } +} + +/// An owned unmanaged string that may contain non-utf8 characters. +/// +/// This differs from `CString` in that it is **not** nul-terminated. +pub struct UString(Box<[u8]>); + +impl UString +{ + /// Create a new unmanaged string from `len` bytes from a buffer + pub fn new_from_buffer(buffer: &mut B, len: usize) -> Self + { + let mut vec = Vec::with_capacity(len); + buffer.copy_to_slice(&mut vec[..]); + Self(vec.into()) + } + + pub fn new_from_bytes(bytes: impl Into) -> Self + { + Self::new_from_slice(bytes.into()) + } + + /// Create a new unmanaged string by copying it from a slice + #[inline] + pub fn new_from_slice(slice: impl AsRef<[u8]>) -> Self + { + Self::new(slice.as_ref().to_owned()) + } + /// Create a new unmanaged string from a container containing arbitrary bytes + #[inline] + pub fn new(bytes: impl Into>) -> Self + { + Self(bytes.into()) + } + + /// Create an owned (allocated) unmanaged string from a pointer and size + /// + /// # Safety + /// `ptr` *must* have been originally created form a `Box::into_raw()` call. + #[inline] + pub unsafe fn from_raw(ptr: *mut u8, size: usize) -> Self + { + Self(Box::from_raw(std::ptr::slice_from_raw_parts_mut(ptr, size))) + } + + #[inline] + pub fn into_raw(self) -> Box<[u8]> + { + self.0 + } + + pub fn to_mut_bytes(&self) -> bytes::BytesMut + { + bytes::BytesMut::from(&self.0[..]) + } + + pub fn into_bytes(self) -> bytes::Bytes + { + bytes::Bytes::from(self.0) + } + + pub fn to_bytes(&self) -> bytes::Bytes + { + bytes::Bytes::copy_from_slice(&self.0[..]) + } + + pub fn into_boxed_ustr(self) -> Box + { + unsafe { + Box::::from_raw(Box::<[u8]>::into_raw(self.0) as *mut UStr) + } + } + + pub fn from_boxed_ustr(us: Box) -> Self + { + Self(unsafe { + Box::<[u8]>::from_raw(Box::::into_raw(us) as *mut [u8]) + }) + } + + pub fn into_string(self) -> Result + { + let vec = Vec::from(self.0); + String::from_utf8(vec).map_err(|e| { + Self(e.into_bytes().into()) + }) + } + + /// Allocate a new owned `CString` from this nul-terminated unmanaged string. + /// + /// # Safety + /// The underlying slice *must* contain a null byte. + #[inline] + pub unsafe fn to_c_string(&self) -> CString + { + self.deref().to_c_str().into() + } + + /// Convert this nul-terminated `UString` into a `CString`. + /// + /// # Returns + /// If the last bytes of the sequence is not `\0`, `Err` is returned. + #[inline] + pub fn into_c_string(self) -> Result + { + CString::from_vec_with_nul(self.0.into()) + } + + /// Convert this owned `CString` into a nul-terminated `UString`. + #[inline] + pub fn from_c_string(cstring: CString) -> Self + { + Self(cstring.into_bytes_with_nul().into()) + } +} + +impl From for Box +{ + #[inline] + fn from(from: UString) -> Self + { + from.into_boxed_ustr() + } +} + +impl From> for UString +{ + #[inline] + fn from(from: Box) -> Self + { + from.into_ustring() + } +} + +impl AsRef<[u8]> for UStr +{ + #[inline] + fn as_ref(&self) -> &[u8] + { + &self.0[..] + } +} + +/* XXX: Is this needed? +impl Deref for ustr +{ +type Target = [u8]; +#[inline] +fn deref(&self) -> &Self::Target { +&self.0[..] + } +} +XXX: Is DerefMut needed, too? +XXX: What about AsMut? + */ + +impl AsRef for UString +{ + #[inline(always)] + fn as_ref(&self) -> &UStr + { + UStr::new(&self.0[..]) + } +} + + +impl Deref for UString +{ + type Target = UStr; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_ref() + } +} + +//XXX: Should we allow mutation of the `ustr` here? I think so, because we don't allow mutation of the underlying [u8] +impl AsMut for UString +{ + #[inline(always)] + fn as_mut(&mut self) -> &mut UStr + { + UStr::new_mut(&mut self.0[..]) + } +} +impl DerefMut for UString +{ + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + self.as_mut() + } +} + +#[cfg(test)] +mod tests +{ + use super::*; + + #[test] + fn dup2() + { + use std::fs; + unsafe { std::io::stderr().dup2(&mut std::io::stdout()) } + .expect("Failed to route stdout to stderr"); + + let null = fs::OpenOptions::new() + .read(true) + .write(true) + .open("/dev/null").unwrap(); + unsafe { + null.dup2(&mut std::io::stdin()) + }.expect("Failed to shunt stdin to /dev/null"); + + unsafe { + null.dup2(&mut std::io::stderr()) + }.expect("Failed to shunt stderr to /dev/null"); + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..02967fe --- /dev/null +++ b/src/main.rs @@ -0,0 +1,11 @@ + +#![cfg_attr(debug_assertions, allow(dead_code))] + +#[macro_use] mod ext; use ext::*; +mod loli; + +fn main() { + + println!("Hello, world!"); +} +