You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
dirstat/src/serial.rs

203 lines
7.4 KiB

//! For serializing
use super::*;
use tokio::prelude::*;
use async_compression::tokio_02::write::{
BzEncoder,
BzDecoder,
};
type Compressor<T> = BzEncoder<T>;
type Decompressor<T> = BzDecoder<T>;
const DEFER_DROP_SIZE_FLOOR: usize = 1024 * 1024; // 1 MB
pub trait Compression
{
type OutputStream: AsyncWrite + Unpin;
type InputStream: AsyncRead + Unpin;
fn create_output<W: AsyncWrite+ Unpin>(from: W) -> Result<Self::OutputStream, W>;
fn create_input<W: AsyncRead+ Unpin>(from: W) -> Result<Self::InputStream, W>;
}
pub mod compress
{
use super::*;
/// No compression.
#[derive(Debug)]
pub struct No;
impl Compression for No
{
type OutputStream = DeadSink;
type InputStream = DeadSink;
fn create_input<W: AsyncRead+ Unpin>(from: W) -> Result<Self::InputStream, W> {
Err(from)
}
fn create_output<W: AsyncWrite+ Unpin>(from: W) -> Result<Self::OutputStream, W> {
Err(from)
}
}
#[derive(Debug)]
pub struct Bz;
impl Compression for Bz
{
type OutputStream = Box<dyn AsyncWrite + Unpin>;
type InputStream = Box<dyn AsyncRead + Unpin>;
fn create_input<W: AsyncRead+ Unpin>(from: W) -> Result<Self::InputStream, W> {
panic!()
}
fn create_output<W: AsyncWrite+ Unpin>(from: W) -> Result<Self::OutputStream, W> {
Ok(Box::new(super::Compressor::new(from)))
}
}
}
#[inline] fn _type_name<T: ?Sized>(_val: &T) -> &'static str {
std::any::type_name::<T>()
}
/// Serialise this object asynchronously
///
/// # Note
/// This compresses the output stream.
/// It cannot be used by `prealloc` read/write functions, as they do not compress.
pub async fn write_async<Compress: Compression>(mut to: impl AsyncWrite + Unpin, item: &impl Serialize, _comp: Compress) -> eyre::Result<()>
{
let name_of_item = _type_name(item);
let name_of_stream = _type_name(&to);
let sect_type_name = || name_of_item.header("Type trying to serialise was");
let sect_stream_type_name = || name_of_stream.header("Stream type was");
let vec = tokio::task::block_in_place(|| serde_cbor::to_vec(item))
.wrap_err(eyre!("Failed to serialise item"))
.with_section(sect_stream_type_name.clone())
.with_section(sect_type_name.clone())?;
{
let mut stream: EitherWrite<_, _> = Compress::create_output(&mut to).into();//Compressor::new(&mut to);
cfg_eprintln!(Verbose; config::get_global(), "Writing {} bytes of type {:?} to stream of type {:?}", vec.len(), name_of_item, name_of_stream);
stream.write_all(&vec[..])
.await
.wrap_err(eyre!("Failed to write serialised memory to stream"))
.with_section(|| vec.len().to_string().header("Size of the serialised object was"))
.with_section(sect_stream_type_name.clone())
.with_section(sect_type_name.clone())?;
stream.flush().await.wrap_err(eyre!("Failed to flush output compression stream"))?;
stream.shutdown().await.wrap_err(eyre!("Failed to shutdown output compression stream"))?;
}
// Extremely overcomplicated concurrent flush+drop.
use futures::FutureExt;
let flush_fut = async {
to.flush().await.wrap_err(eyre!("Failed to flush output backing stream"))?;
to.shutdown().await.wrap_err(eyre!("Failed to shutdown output backing stream"))?;
Ok::<(), eyre::Report>(())
}.fuse();
tokio::pin!(flush_fut);
tokio::select!{
res = &mut flush_fut => {
return res;
}
_ = async move { drop!(async vec vec); } => {}
}
flush_fut.await
}
#[cfg(feature="prealloc")]
mod prealloc {
use super::*;
use std::os::unix::prelude::*;
use std::fs::File;
use memmap::{MmapMut, Mmap};
/// Write this object as-is to this file descriptor.
///
/// # Note
/// This does not compress like `write_aynsc()` does. It is just a 1-1 dump of the serialisation.
/// Therefore, data written with `write_prealloc()` cannot be then read used with `read_async()`.
///
/// This is a completely synchronous operation. You should use it with `spawn_blocking` et al. to prevent task hangups.
pub fn write_prealloc<T: Serialize>(file: &mut File, item: &T) -> eyre::Result<()>
{
let sect_type_name = || std::any::type_name::<T>().header("Type trying to serialise was");
let vec = tokio::task::block_in_place(|| serde_cbor::to_vec(item))
.wrap_err(eyre!("Failed to serialise item"))
.with_section(sect_type_name.clone())?;
let fd = file.as_raw_fd();
unsafe {
if libc::fallocate(fd, 0, 0, vec.len().try_into()
.wrap_err(eyre!("Failed to cast buffer size to `off_t`"))
.with_section(|| vec.len().header("Buffer size was"))
.with_section(|| libc::off_t::MAX.to_string().header("Max value of `off_t` is"))
.with_warning(|| "Usually `off_t` is a signed 64 bit integer. Whereas the buffer's size is unsigned. On systems where `off_t` is 64 bits or higher, this should realistically never happen and probably indicates a bug.")?) < 0 {
// Error
Err(std::io::Error::last_os_error())
} else {
Ok(())
}
}.wrap_err("fallocate() failed")
.with_section(|| vec.len().header("Bytes to allocate was"))
.with_suggestion(|| "Make sure there is enough space for the fallocate() call")
.with_suggestion(|| "Make sure we are able to write to the file")?;
// fallocate() succeeded in allocating `vec.len()` bytes to map.
let mut map = unsafe { MmapMut::map_mut(file) }
.wrap_err(eyre!("Failed to map file for read + write"))
.with_section(|| fd.header("fd was"))
.with_suggestion(|| "Do we have the premissions for both reading and writing of this file and fd?")?;
eyre_assert!(tokio::task::block_in_place(|| unsafe {
bytes::copy_nonoverlapping_unchecked(&vec[..], &mut map[..])
}) == vec.len(); "Length mismatch")
.with_section(|| vec.len().header("Expected"))
.with_section(|| map.len().header("Got"))
.with_warning(|| "This should never happen, it indicates a bug")?;
tokio::task::block_in_place(move || map.flush())
.wrap_err(eyre!("Failed to flush map in place"))?; //map is dropped here
drop!(vec vec);
Ok(())
}
/// Read this object as-is from this file descriptor.
///
/// # Note
/// This does not decompress like `read_aynsc()` does. It is just a 1-1 read of the serialisation.
/// Therefore, `read_prealloc()` cannot be used with data written by `write_async()`.
///
/// This is a completely synchronous operation. You should use it with `spawn_blocking` et al. to prevent task hangups.
// This must be `DeserializeOwned` because the lifetime it is bound to is that of the memory map created and destroyed in the function, not of the fd `file` itself.
pub fn read_prealloc<T: serde::de::DeserializeOwned>(file: &File) -> eyre::Result<T>
{
let map = unsafe { Mmap::map(file) }
.wrap_err(eyre!("Failed to map file for read + write"))
.with_section(|| file.as_raw_fd().header("fd was"))
.with_suggestion(|| "Do we have the premissions for both reading and writing of this file and fd?")?;
tokio::task::
block_in_place(move || serde_cbor::from_slice(&map[..]))
.wrap_err(eyre!("Failed to deserialise from map"))
.with_note(|| "The prealloc read and write functions handle only *uncompressed* data. Make sure you're not feeding it compressed data (written with the non-prealloc read and write functions)")
}
}
#[cfg(feature="prealloc")] pub use prealloc::{
write_prealloc as write_sync_map,
read_prealloc as read_sync_map,
};