mempipe/src/loli.rs

//! LOw Level Interface
//!
//! Provides interfaces to low-level operations and syscalls.
use std::ops::{Deref, DerefMut};
use std::{
    fmt,
    error,
};
use bytes::{
    Buf,
};
use std::os::unix::io::*;
use std::ffi::{
    CStr, CString,
};

//TODO: Provide adequate wrappers for the extern functions below

/// Raw exports
mod raw {
    pub(super) use std::os::raw::*;
    use libc::{
	off64_t as c_off64_t,
	off_t as c_off_t,
	pid_t as c_pid_t,
	size_t as c_size_t,
    };


    // Function definitions

    extern "C" {
	pub fn memfd_create(name: *const c_char, flags: c_int) -> c_int;

	pub fn dup(fd: c_int) -> c_int;
	pub fn dup2(from: c_int, to: c_int) -> c_int;

	pub fn ftruncate(fd: c_int, length: c_off_t) -> c_int;
	pub fn ftruncate64(fd: c_int, length: c_off64_t) -> c_int;

	pub fn close(fd: c_int) -> c_int;

	pub fn getenv(name: *const c_char) -> *mut c_char;
	pub fn getpid() -> c_pid_t;

	pub fn mmap(
	    addr: *mut c_void,
	    len: c_size_t,
	    prot: c_int,
	    flags: c_int,
	    fd: c_int,
	    offset: c_off_t
	) -> *mut c_void;

	pub fn mmap64(
	    addr: *mut c_void,
	    len: c_size_t,
	    prot: c_int,
	    flags: c_int,
	    fd: c_int,
	    offset: c_off64_t
	) -> *mut c_void;
    }

    // Re-exports

    pub use libc::MAP_FAILED;
    pub use std::ffi::c_void as Opaque;
    pub use libc::c_char as Char;
}

// Errno error handling

//TODO: Move all errno related stuff into a submodule inside `srd/loli/` to un-clutter this file; and add a `RawErrno`-like struct for errors that are *just* an errno value (that implements `SystemError`)

/// A raw `errno` value
pub type RawErrno = raw::c_int;

/// Default max buffer size for formatting `errno` message codes.
///
/// This is the size used by glibc's `perror()`. It should cover all internationalised error messages.
pub const DEFAULT_ERRNO_MESSAGE_BUFFER_SIZE: usize = 1024;

/// Default buffer size short `errno` message code strings.
///
/// # Internationalisation
/// This size covers all current English `errno` messages. However, if the program is ran with a non-ASCII locale, the size may exceed this by far. See `DEFAULT_ERRNO_MESSAGE_BUFFER_SIZE` for those cases.
pub const DEFAULT_ASCII_ERRNO_MESSAGE_BUFFER_SIZE: usize = 64;

/// An error type which also contains an `errno` code.
//TODO: A way to integrate this into eyre::Reports? Maybe in the extension trait? `.to_report_with_errno()` or something?
pub trait SystemError: std::error::Error
{
    fn code(&self) -> RawErrno;

    #[inline(always)]
    fn fmt_message(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
    {
	errno_fmt_message::<1024>(self.code(), f)
    }

    #[inline]
    fn get_message(&self) -> ErrnoFormatter
    {
	ErrnoFormatter::new_default(self.code())
    }

    #[inline]
    fn message(&self) -> String
    {
	self.get_message().to_string()
    }
}

/// Extension trait for `SystemError` types.
///
/// # Specific buffered sizes
/// Provides user-configurable buffer sized methods for message formatting
pub trait SystemErrorExt: SystemError
{
    fn fmt_message_with_buffer<const BUFFER_SIZE: usize>(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result;
    fn get_message_with_buffer<const BUFFER_SIZE: usize>(&self) -> ErrnoFormatter<BUFFER_SIZE>;
    fn message_with_buffer<const BUFFER_SIZE: usize>(&self) -> String;
}

impl<T: ?Sized> SystemErrorExt for T
where T: SystemError
{
    #[inline]
    fn fmt_message_with_buffer<const BUFFER_SIZE: usize>(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
    {
	errno_fmt_message::<BUFFER_SIZE>(self.code(), f)
    }

    #[inline]
    fn get_message_with_buffer<const BUFFER_SIZE: usize>(&self) -> ErrnoFormatter<BUFFER_SIZE>
    {
	ErrnoFormatter::<BUFFER_SIZE>::new(self.code())
    }
    #[inline]
    fn message_with_buffer<const BUFFER_SIZE: usize>(&self) -> String
    {
	ErrnoFormatter::<BUFFER_SIZE>::new(self.code()).to_string()
    }
}

/// String formatter for an `errno` value.
///
/// # `MESSAGE_BUFFER_SIZE`
/// This structure contains only the `errno` code. The `MESSAGE_BUFFER_SIZE` const generic is used as a max size for formatting the error messages.
/// It **must** be above 0.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)]
#[repr(transparent)]
pub struct ErrnoFormatter<const MESSAGE_BUFFER_SIZE: usize = DEFAULT_ERRNO_MESSAGE_BUFFER_SIZE>(RawErrno);

//TODO: Document the following

impl ErrnoFormatter<DEFAULT_ASCII_ERRNO_MESSAGE_BUFFER_SIZE>
{
    #[inline(always)]
    pub const fn new_ascii(code: RawErrno) -> Self
    {
	Self(code)
    }
}

impl ErrnoFormatter<DEFAULT_ERRNO_MESSAGE_BUFFER_SIZE>
{
    #[inline(always)]
    pub const fn new_default(code: RawErrno) -> Self
    {
	Self(code)
    }
}

impl<const BUFF_SIZE: usize> ErrnoFormatter<BUFF_SIZE>
{
    #[inline(always)]
    pub const fn new(code: RawErrno) -> Self
    {
	Self(code)
    }

    #[inline]
    pub const fn raw(self) -> RawErrno
    {
	self.0
    }

    #[inline]
    pub const fn buffer_size(self) -> usize
    {
	BUFF_SIZE
    }

    #[inline]
    pub const fn with_buffer_size<const NBUFF_SIZE: usize>(self) -> ErrnoFormatter<NBUFF_SIZE>
    {
	ErrnoFormatter::<NBUFF_SIZE>::new(self.0)
    }

    pub fn try_to_string(&self) -> Result<String, Result<UString, RawErrno>>
    {
	match self.try_to_raw_string() {
	    Ok(string) => string.into_string().map_err(|us| Ok(us)),
	    Err(en) => Err(Err(en)),
	}
    }

    #[inline]
    pub fn try_to_raw_string(&self) -> Result<UString, RawErrno>
    {
	errno_ustring_buffer::<BUFF_SIZE>(self.0)
    }

    #[inline]
    pub fn try_to_c_string(&self) -> Result<CString, RawErrno>
    {
	errno_cstring_buffer::<BUFF_SIZE>(self.0)
    }

    #[inline]
    pub fn to_raw_buffer(&self) -> Result<[u8; BUFF_SIZE], RawErrno>
    {
	errno_raw_buffer(self.0)
    }
}

impl<const BUFF_SIZE: usize> fmt::Display for ErrnoFormatter<BUFF_SIZE>
{
    #[inline]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
    {
	errno_fmt_message::<BUFF_SIZE>(self.0, f)
    }
}


fn errno_fmt_message<const BUFF_SIZE: usize>(code: RawErrno, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
    if let Ok(buffer) = errno_raw_buffer::<BUFF_SIZE>(code)
    {
	if let Some(first) = buffer.split(|&b| b == 0).next()
	{
	    f.write_str(String::from_utf8_lossy(first).as_ref())
	} else {
	    #[cold]
	    #[inline(never)] //XXX: Should we noinline this cold path? I think we should.
	    fn _failed_path(code: RawErrno, f: &mut fmt::Formatter<'_>) -> fmt::Result
	    {
		write!(f, "<!unknown: empty message for errno {}>", code)
	    }
	    _failed_path(code, f)
	}
    } else {
	#[cold]
	#[inline(never)]
	fn _failed_path(code: RawErrno, f: &mut fmt::Formatter<'_>) -> fmt::Result
	{
	    write!(f, "<!unknown: message extraction failed for errno {}>", code)
	}
	_failed_path(code, f)
    }
}

fn errno_raw_buffer<const BUFF_SIZE: usize>(errno: RawErrno) -> Result<[u8; BUFF_SIZE], RawErrno>
{
    use std::mem::MaybeUninit;

    let mut buffer: MaybeUninit<[u8; BUFF_SIZE]> = MaybeUninit::uninit();

    Ok(unsafe {
	errno_if_nz(libc::strerror_r(errno, buffer.as_mut_ptr() as *mut raw::c_char, BUFF_SIZE))?;

	buffer.assume_init()
    })
}

fn errno_raw_ustr(errno: RawErrno, buffer: &mut [u8]) -> Result<&UStr, RawErrno>
{
    unsafe {
	errno_if_nz(libc::strerror_r(errno, buffer.as_mut_ptr() as *mut raw::c_char, buffer.len()))?;
    }
    Ok(UStr::new_mut(buffer))
}

#[inline(always)]
fn errno_raw_cstr(errno: RawErrno, buffer: &mut [u8]) -> Result<&CStr, RawErrno>
{
    errno_raw_ustr(errno, buffer).map(|x| unsafe {x.to_c_str()})
}

fn errno_ustring_buffer<const BUFF_SIZE: usize>(errno: RawErrno) -> Result<UString, RawErrno>
{
    let buf = errno_raw_buffer::<BUFF_SIZE>(errno)?;
    Ok(UString::new_from_buffer(&mut &buf[..], BUFF_SIZE))
}

fn errno_cstring_buffer<const BUFF_SIZE: usize>(errno: RawErrno) -> Result<CString, RawErrno>
{
    errno_ustring_buffer::<BUFF_SIZE>(errno).map(|x| unsafe {x.to_c_string()})
}

fn errno_lossy_string_buffer<const BUFF_SIZE: usize>(errno: RawErrno) -> Result<String, RawErrno>
{
    errno_ustring_buffer::<BUFF_SIZE>(errno).map(|x| x.to_string_lossy().into_owned())
}

/// An error when `dup()` or `dup2()`ing a stream.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct DupError(RawErrno);

impl SystemError for DupError
{
    /// The `errno` code for this error.
    #[inline]
    fn code(&self) -> RawErrno
    {
	self.0
    }
}

impl error::Error for DupError{}
impl fmt::Display for DupError
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
    {
	write!(f, "dup() failed with {}: ", self.0)?;
	self.fmt_message(f)
    }
}


/// Get the current thread's `errno` value
pub fn current_errno() -> RawErrno
{
    unsafe {*libc::__errno_location()}
}

/// Check FFI function returns for errors and return the appropriate errno value if needed.
///
/// # Returns
/// If `chk(rv)` returns `true`, `Err(<current errno>)` is returned. Otherwise, `Ok(rv)` is returned.
#[inline(always)]
pub fn errno_if<T: Copy, F>(rv: T, chk: F) -> Result<T, RawErrno>
where F: Fn(T) -> bool
{
    if !chk(rv) {
	Ok(rv)
    } else {
	#[cold]
	#[inline(never)]
	fn _error() -> RawErrno
	{
	    current_errno()
	}
	Err(_error())
    }
}
#[inline(always)]
fn errno_if_not<const EXPECTED: raw::c_int>(rv: raw::c_int) -> Result<raw::c_int, RawErrno>
{
    if rv == EXPECTED {
	Ok(rv)
    } else {
	#[cold]
	#[inline(never)]
	fn _error() -> RawErrno
	{
	    current_errno()
	}
	Err(_error())
    }
}
#[inline(always)]
fn errno_if_nz(rv: raw::c_int) -> Result<(), RawErrno>
{
    errno_if_not::<0>(rv).map(|_| ())
}

// Wrapper traits

//TODO: Move wrapper traits into a seperate module in src/loli/ to unclutter this file

/// Extension methods for `dup()`ing file descriptors
pub trait DupExt: Sized
{
    unsafe fn dup(&self) -> Result<Self, DupError>;
}

/// Extension methods for `dup2()`ing file descriptors
pub trait Dup2Ext<U: ?Sized>
{
    unsafe fn dup2(&self, other: &mut U) -> Result<(), DupError>;
}

impl<T: ?Sized, U: ?Sized> Dup2Ext<U> for T
where T: AsRawFd,
      U: AsRawFd
{
    unsafe fn dup2(&self, other: &mut U) -> Result<(), DupError>
    {
	errno_if(raw::dup2(self.as_raw_fd(), other.as_raw_fd()), |v| v < 0).map_err(DupError)?;
	Ok(())
    }
}

impl<T> DupExt for T
where T: FromRawFd + AsRawFd
{
    unsafe fn dup(&self) -> Result<Self, DupError>
    {
	errno_if(raw::dup(self.as_raw_fd()), |v| v < 0)
	    .map(|fd| Self::from_raw_fd(fd)) // WHY are `unsafe fn`s not first class in an `unsafe` context??
	    .map_err(DupError)
    }
}

//TODO: Document the following, and make sure it works like CStr/CString (minus the nul-termination bs)

//TODO: Move UStr/UString and related stuffs into a seperate module in src/loli/ to unclutter this file


/// A reference to an unmanaged string that may contain non-utf8 characters.
///
/// This differs from `CStr` in that it is **not** nul-terminated.
#[repr(transparent)]
pub struct UStr([u8]);

impl UStr {
    #[inline(always)]
    pub unsafe fn from_raw_mut<'a>(ptr: *mut u8, size: usize) -> &'a mut Self
    {
	&mut *(std::ptr::slice_from_raw_parts_mut::<u8>(ptr, size) as *mut Self)
    }
    #[inline(always)]
    pub unsafe fn from_raw<'a>(ptr: *const u8, size: usize) -> &'a Self
    {
	&*(std::ptr::slice_from_raw_parts::<u8>(ptr, size) as *const Self)
    }

    #[inline]
    pub fn new<'a>(bytes: &'a [u8]) -> &'a Self
    {
	// SAFETY: ustr is transparent.
	unsafe {
	    std::mem::transmute::<&'a [u8], &'a Self>(bytes)
	}
    }

    #[inline]
    pub fn new_mut<'a>(bytes: &'a mut [u8]) -> &'a mut Self
    {
	// SAFETY: ustr is transparent.
	unsafe {
	    std::mem::transmute::<&'a mut [u8], &'a mut Self>(bytes)
	}
    }

    /// Convert to a reference to a nul-terminated byte string
    ///
    /// # Safety
    /// The unmanaged string must contain a null byte in its slice.
    pub unsafe fn to_c_str(&self) -> &CStr
    {
	CStr::from_ptr(self.0.as_ptr() as *const raw::c_char)
    }

    /// Create a `CStr` from this nul-terminated unmanaged byte string
    ///
    /// # Returns
    /// The final byte in the slice must be `\0`, or `Err` will be returned.
    pub fn as_c_str(&self) -> Result<&CStr, std::ffi::FromBytesWithNulError>
    {
	CStr::from_bytes_with_nul(&self.0[..])
    }

    /// Create an unmanaged string from a nul-terminated string
    ///
    /// # Nul-terminator
    /// The returned unmanaged string reference will contain the nul-terminator as the final byte in the slice.
    #[inline]
    pub fn from_c_str(raw: &CStr) -> &Self
    {
	Self::new(raw.to_bytes_with_nul())
    }

    /// Returns `true` if the underlying slice is nul-terminated, and can be safely converted back/forth with `CStr`.
    #[inline]
    pub fn is_nul_terminated(&self) -> bool
    {
	self.0.last().map(|&x| x == 0).unwrap_or(false)
    }

    #[inline]
    pub fn to_str(&self) -> Result<&str, std::str::Utf8Error>
    {
	std::str::from_utf8(&self.0[..])
    }

    #[inline]
    pub fn to_mut_str(&mut self) -> Result<&mut str, std::str::Utf8Error>
    {
	std::str::from_utf8_mut(&mut self.0[..])
    }

    pub fn to_string_lossy(&self) -> std::borrow::Cow<'_, str>
    {
	String::from_utf8_lossy(&self.0[..])
    }

    #[inline]
    pub fn into_ustring(self: Box<Self>) -> UString
    {
	UString::from_boxed_ustr(self)
    }

    #[inline]
    pub fn from_ustring(owned: UString) -> Box<Self>
    {
	owned.into_boxed_ustr()
    }
}

/// An owned unmanaged string that may contain non-utf8 characters.
///
/// This differs from `CString` in that it is **not** nul-terminated.
pub struct UString(Box<[u8]>);

impl UString
{
    /// Create a new unmanaged string from `len` bytes from a buffer
    pub fn new_from_buffer<B: Buf + ?Sized>(buffer: &mut B, len: usize) -> Self
    {
	let mut vec = Vec::with_capacity(len);
	buffer.copy_to_slice(&mut vec[..]);
	Self(vec.into())
    }

    pub fn new_from_bytes(bytes: impl Into<bytes::Bytes>) -> Self
    {
	Self::new_from_slice(bytes.into())
    }

    /// Create a new unmanaged string by copying it from a slice
    #[inline]
    pub fn new_from_slice(slice: impl AsRef<[u8]>) -> Self
    {
	Self::new(slice.as_ref().to_owned())
    }
    /// Create a new unmanaged string from a container containing arbitrary bytes
    #[inline]
    pub fn new(bytes: impl Into<Box<[u8]>>) -> Self
    {
	Self(bytes.into())
    }

    /// Create an owned (allocated) unmanaged string from a pointer and size
    ///
    /// # Safety
    /// `ptr` *must* have been originally created form a `Box::into_raw()` call.
    #[inline]
    pub unsafe fn from_raw(ptr: *mut u8, size: usize) -> Self
    {
	Self(Box::from_raw(std::ptr::slice_from_raw_parts_mut(ptr, size)))
    }

    #[inline]
    pub fn into_raw(self) -> Box<[u8]>
    {
	self.0
    }

    pub fn to_mut_bytes(&self) -> bytes::BytesMut
    {
	bytes::BytesMut::from(&self.0[..])
    }

    pub fn into_bytes(self) -> bytes::Bytes
    {
	bytes::Bytes::from(self.0)
    }

    pub fn to_bytes(&self) -> bytes::Bytes
    {
	bytes::Bytes::copy_from_slice(&self.0[..])
    }

    pub fn into_boxed_ustr(self) -> Box<UStr>
    {
	unsafe {
	    Box::<UStr>::from_raw(Box::<[u8]>::into_raw(self.0) as *mut UStr)
	}
    }

    pub fn from_boxed_ustr(us: Box<UStr>) -> Self
    {
	Self(unsafe {
	    Box::<[u8]>::from_raw(Box::<UStr>::into_raw(us) as *mut [u8])
	})
    }

    pub fn into_string(self) -> Result<String, Self>
    {
	let vec = Vec::from(self.0);
	String::from_utf8(vec).map_err(|e| {
	    Self(e.into_bytes().into())
	})
    }

    pub fn try_into_string(self) -> Result<String, std::string::FromUtf8Error>
    {
	let vec = Vec::from(self.0);
	String::from_utf8(vec)/*.map_err(|e| {
	    Self(e.into_bytes().into())
    })*/
    }

    /// Allocate a new owned `CString` from this nul-terminated unmanaged string.
    ///
    /// # Safety
    /// The underlying slice *must* contain a null byte.
    #[inline]
    pub unsafe fn to_c_string(&self) -> CString
    {
	self.deref().to_c_str().into()
    }

    /// Convert this nul-terminated `UString` into a `CString`.
    ///
    /// # Returns
    /// If the last bytes of the sequence is not `\0`, `Err` is returned.
    #[inline]
    pub fn into_c_string(self) -> Result<CString, std::ffi::FromVecWithNulError>
    {
	CString::from_vec_with_nul(self.0.into())
    }

    /// Convert this owned `CString` into a nul-terminated `UString`.
    #[inline]
    pub fn from_c_string(cstring: CString) -> Self
    {
	Self(cstring.into_bytes_with_nul().into())
    }
}

impl From<UString> for Box<UStr>
{
    #[inline]
    fn from(from: UString) -> Self
    {
	from.into_boxed_ustr()
    }
}

impl From<Box<UStr>> for UString
{
    #[inline]
    fn from(from: Box<UStr>) -> Self
    {
	from.into_ustring()
    }
}

impl AsRef<[u8]> for UStr
{
    #[inline]
    fn as_ref(&self) -> &[u8]
    {
	&self.0[..]
    }
}

/* XXX: Is this needed?
impl Deref for ustr
{
type Target = [u8];
#[inline]
fn deref(&self) -> &Self::Target {
&self.0[..]
    }
}

XXX: Is DerefMut needed, too?
XXX: What about AsMut? I think those two are unsafe, especially if the &mut UStr comes from an &mut str. There are unsafe methods for this anyway.
 */

impl AsRef<UStr> for UString
{
    #[inline(always)]
    fn as_ref(&self) -> &UStr
    {
	UStr::new(&self.0[..])
    }
}


impl Deref for UString
{
    type Target = UStr;

    #[inline]
    fn deref(&self) -> &Self::Target {
	self.as_ref()
    }
}

//Should we allow mutation of the `ustr` here? I think so, because we don't allow mutation of the underlying [u8]
impl AsMut<UStr> for UString
{
    #[inline(always)]
    fn as_mut(&mut self) -> &mut UStr
    {
	UStr::new_mut(&mut self.0[..])
    }
}
impl DerefMut for UString
{
    #[inline]
    fn deref_mut(&mut self) -> &mut Self::Target {
	self.as_mut()
    }
}

// UStr/ing <--?> CStr/ing

impl From<CString> for UString
{
    #[inline]
    fn from(from: CString) -> Self
    {
	Self::from_c_string(from)
    }
}

impl<'a> From<&'a CStr> for &'a UStr
{
    #[inline]
    fn from(from: &'a CStr) -> Self
    {
	UStr::from_c_str(from)
    }
}

impl TryFrom<UString> for CString
{
    type Error = std::ffi::FromVecWithNulError;

    #[inline]
    fn try_from(from: UString) -> Result<Self, Self::Error>
    {
	from.into_c_string()
    }
}

impl<'a> TryFrom<&'a UStr> for &'a CStr
{
    type Error = std::ffi::FromBytesWithNulError;

    #[inline]
    fn try_from(from: &'a UStr) -> Result<Self, Self::Error>
    {
	from.as_c_str()
    }
}

// UStr/ing <--?> str/ing

impl From<String> for UString
{
    #[inline]
    fn from(from: String) -> Self
    {
	Self::new(from.into_bytes())
    }
}

impl<'a> From<&'a str> for &'a UStr
{
    #[inline]
    fn from(from: &'a str) -> Self
    {
	UStr::new(from.as_bytes())
    }
}
// No From<&'_ mut str> for &'_ mut UStr: The bytes belonging to the &'_ str may be modified in safe code if this were allowed.

impl TryFrom<UString> for String
{
    type Error = std::string::FromUtf8Error;

    #[inline]
    fn try_from(from: UString) -> Result<Self, Self::Error>
    {
	from.try_into_string()
    }
}

impl<'a> TryFrom<&'a UStr> for &'a str
{
    type Error = std::str::Utf8Error;

    #[inline]
    fn try_from(from: &'a UStr) -> Result<Self, Self::Error>
    {
	from.to_str()
    }
}

#[cfg(test)]
mod tests
{
    use super::*;

    #[test]
    fn dup2()
    {
	use std::fs;
	unsafe { std::io::stderr().dup2(&mut std::io::stdout()) }
	.expect("Failed to route stdout to stderr");

	let null = fs::OpenOptions::new()
	    .read(true)
	    .write(true)
	    .open("/dev/null").unwrap();
	unsafe {
	    null.dup2(&mut std::io::stdin())
	}.expect("Failed to shunt stdin to /dev/null");

	unsafe {
	    null.dup2(&mut std::io::stderr())
	}.expect("Failed to shunt stderr to /dev/null");
    }
}