master
Avril 3 years ago
parent 48277f62ac
commit ff1d82893a
Signed by: flanchan
GPG Key ID: 284488987C31F630

213
Cargo.lock generated

@ -117,6 +117,12 @@ dependencies = [
"generic-array",
]
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "foreign-types"
version = "0.3.2"
@ -138,6 +144,22 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
[[package]]
name = "fuchsia-zircon"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
dependencies = [
"bitflags",
"fuchsia-zircon-sys",
]
[[package]]
name = "fuchsia-zircon-sys"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
[[package]]
name = "futures"
version = "0.3.8"
@ -263,6 +285,15 @@ dependencies = [
"wasi",
]
[[package]]
name = "hermit-abi"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8"
dependencies = [
"libc",
]
[[package]]
name = "hex-literal"
version = "0.3.1"
@ -279,6 +310,15 @@ dependencies = [
"digest",
]
[[package]]
name = "iovec"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e"
dependencies = [
"libc",
]
[[package]]
name = "jemalloc-sys"
version = "0.3.2"
@ -300,6 +340,16 @@ dependencies = [
"libc",
]
[[package]]
name = "kernel32-sys"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
dependencies = [
"winapi 0.2.8",
"winapi-build",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -312,6 +362,15 @@ version = "0.2.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb"
[[package]]
name = "log"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
dependencies = [
"cfg-if 0.1.10",
]
[[package]]
name = "memchr"
version = "2.3.4"
@ -325,7 +384,71 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
dependencies = [
"libc",
"winapi",
"winapi 0.3.9",
]
[[package]]
name = "mio"
version = "0.6.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4afd66f5b91bf2a3bc13fad0e21caedac168ca4c707504e75585648ae80e4cc4"
dependencies = [
"cfg-if 0.1.10",
"fuchsia-zircon",
"fuchsia-zircon-sys",
"iovec",
"kernel32-sys",
"libc",
"log",
"miow 0.2.2",
"net2",
"slab",
"winapi 0.2.8",
]
[[package]]
name = "mio-named-pipes"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0840c1c50fd55e521b247f949c241c9997709f23bd7f023b9762cd561e935656"
dependencies = [
"log",
"mio",
"miow 0.3.6",
"winapi 0.3.9",
]
[[package]]
name = "mio-uds"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afcb699eb26d4332647cc848492bbc15eafb26f08d0304550d5aa1f612e066f0"
dependencies = [
"iovec",
"libc",
"mio",
]
[[package]]
name = "miow"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebd808424166322d4a38da87083bfddd3ac4c131334ed55856112eb06d46944d"
dependencies = [
"kernel32-sys",
"net2",
"winapi 0.2.8",
"ws2_32-sys",
]
[[package]]
name = "miow"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a33c1b55807fbed163481b5ba66db4b2fa6cde694a5027be10fb724206c5897"
dependencies = [
"socket2",
"winapi 0.3.9",
]
[[package]]
@ -338,6 +461,28 @@ dependencies = [
"jemallocator",
"memmap",
"serde",
"tokio",
]
[[package]]
name = "net2"
version = "0.2.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "391630d12b68002ae1e25e8f974306474966550ad82dac6886fb8910c19568ae"
dependencies = [
"cfg-if 0.1.10",
"libc",
"winapi 0.3.9",
]
[[package]]
name = "num_cpus"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
@ -540,12 +685,32 @@ dependencies = [
"opaque-debug",
]
[[package]]
name = "signal-hook-registry"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce32ea0c6c56d5eacaeb814fbed9960547021d3edd010ded1425f180536b20ab"
dependencies = [
"libc",
]
[[package]]
name = "slab"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
[[package]]
name = "socket2"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97e0e9fd577458a4f61fb91fcb559ea2afecc54c934119421f9f5d3d5b1a1057"
dependencies = [
"cfg-if 1.0.0",
"libc",
"winapi 0.3.9",
]
[[package]]
name = "subtle"
version = "2.4.0"
@ -570,8 +735,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "099837d3464c16a808060bb3f02263b412f6fafcb5d01c533d309985fbeebe48"
dependencies = [
"bytes 0.5.6",
"fnv",
"futures-core",
"iovec",
"lazy_static",
"libc",
"memchr",
"mio",
"mio-named-pipes",
"mio-uds",
"num_cpus",
"pin-project-lite",
"signal-hook-registry",
"slab",
"tokio-macros",
"winapi 0.3.9",
]
[[package]]
name = "tokio-macros"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e44da00bfc73a25f814cd8d7e57a68a5c31b74b3152a0a1d1f590c97ed06265a"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
@ -604,6 +793,12 @@ version = "0.9.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
[[package]]
name = "winapi"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
[[package]]
name = "winapi"
version = "0.3.9"
@ -614,6 +809,12 @@ dependencies = [
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-build"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
@ -625,3 +826,13 @@ name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "ws2_32-sys"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e"
dependencies = [
"winapi 0.2.8",
"winapi-build",
]

@ -14,3 +14,4 @@ generational-arena = "0.2.8"
jemallocator = "0.3.2"
memmap = "0.7.0"
serde = {version = "1.0.118", features= ["derive"]}
tokio = {version = "0.2", features = ["full"]}

@ -1,5 +1,12 @@
use super::*;
use std::marker::Unpin;
use std::{io::{self,Read,},
fs};
use bytes::{
BytesMut,
BufMut,
};
use tokio::io::AsyncRead;
/// An open fd that has been memory mapped.
#[derive(Debug)]
@ -9,18 +16,98 @@ pub struct OpenMMap
map: Mmap,
}
impl AsRef<[u8]> for OpenMMap
{
#[inline(always)] fn as_ref(&self) -> &[u8]
{
&self.map[..]
}
}
impl OpenMMap
{
fn new_sync(file: impl AsRef<Path>) -> io::Result<Self>
{
let file = fs::OpenOptions::new().read(true).open(file)?;
let map = unsafe { Mmap::map(&file)? };
Ok(Self {
file,
map
})
}
async fn new(file: impl AsRef<Path>) -> io::Result<Self>
{
let file = tokio::fs::OpenOptions::new().read(true).open(file).await?.into_std().await;
let map = unsafe { Mmap::map(&file)? };
Ok(Self {
file,
map
})
}
}
/// How aggressively should we cache a specific item.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
#[repr(u32)]
pub enum Level
{
/// No cacheing
///
/// # Usage
/// Best used for cold files, or files that are not accessed much, or when running low on memory and/or fds.
///
/// Corresponds to `DataCacheState::None`
None,
/// Open the file and cache the FD as std `File`
/// This can avoid the syscall overhead of needing to open the file next time it is accessed.
///
/// # Usage
/// Best used for frequently acessed files.
///
/// Corresponds to `DataCacheState::Open`
Low,
/// Open the file, cache the FD *and* map the file in memory.
/// This can provide efficient random-access of the file without the need to preload it.
///
/// # Usage
/// Best used for frequently read large files.
///
/// Corresponds to `DataCacheState::Mapped`
High,
/// Load the whole contents of the file into memory, without keeping it open.
/// This provides the most efficient acesss of the file, as well as not contributing to the process' fd limit, but at the cost of:
/// * Loading the whole file into a memory buffer, which may be a slow operation and/or take up massive memory space
/// * Allowing a potential desync if the file is changed on disk while the cache buffer is loaded.
///
/// ## Desync
/// While `mtfse` already assumes it has exclusive access to all files in its db root, generally a file being modified by an external program will cause an error to be returned within a `mtfse` operation eventually as file hashes are updated and/or files are encrypted/decrypted or even read (in the case of a file being deleted.)
///
/// When an item is cached at this level however, any of these kinds of changes made will not be visible. The store can then be put into an invalid state without realising it.
///
/// ## Tradeoffs
/// The caching operation itself is expensive, the memory requirements is expensive, and in some cases this can even *slow* reads compared to the other levels when used on large files, as we cannot take advantage of the kernel's internal file data caching for mapped random-acesss reads and we are bound to causing CPU cache misses.
///
/// # Usage
/// Best used for frequently read small files.
///
/// Corresponds to `DataCacheState::Memory`
Extreme,
}
/// Provides immutable caching of a file in a data entry.
#[derive(Debug)]
pub enum DataCacheState
pub(super) enum DataCacheState
{
/// There is no file cache for this item.
None,
/// The file is open, we have an fd.
Open(File),
/// The file is not open, but its whole contents have been loaded into memory.
Memory(Bytes), // load from file via `BytesMut` buffer, then call `.freeze()`
/// The file is open and memory mapped.
Mapped(OpenMMap),
/// The file is not open, but its whole contents have been loaded into memory.
Memory(Bytes), // load from file via `BytesMut` buffer, then call `.freeze()`.
}
impl Default for DataCacheState
@ -34,9 +121,159 @@ impl Default for DataCacheState
impl DataCacheState
{
/// Read from the cache at `offset` into the provided buffer, and return the number of bytes read.
///
/// # Performance
///
/// When the cache has random access, this method completes without yielding. If not, it performs async seek & read operations to fill the buffer as much as possible from the offset.
///
/// # Returns
/// If `EOF` is encountered within the read, then it is terminated early and the number of bytes successfully read is returned (and will be less than the length of the buffer), otherwise, the full buffer was filled, and the full buffer's length will be returned.
///
/// ## Errors
///
/// If this cache is not active, it will return an `io::Error` with `io::ErrorKind::NotConnected`.
/// Any other error in I/O operations is propagated.
pub async fn read_at(&mut self, offset: usize, into: &mut [u8]) -> io::Result<usize> // this takes `&mut self` only to ensure it cannot be called on different threads at the same time, as any file operations need to be atomic.
{
if let Some(ar) = self.random_access()
{
return Ok(slice::copy_bytes(&ar[offset..], into));
}
if let Some(file) = self.file()
{
use tokio::{
fs, prelude::*,
};
let mut file = fs::File::from_std(file.try_clone()?); // this is what requires we take `&mut(ex) self`.
file.seek(io::SeekFrom::Start(u64::try_from(offset).expect("Arch size integer was out of bounds of u64 (this should never happen)"))).await?;
let mut read =0;
let mut cur;
while {cur =file.read(&mut into[read..]).await?; cur != 0 && read<into.len()} {
read+=cur;
}
return Ok(read);
}
Err(io_err!(NotConnected, "Operation not supported (no cache is available)"))
}
/// Attempt to get a reference to an fd
pub fn file(&self) -> Option<&File>
{
match self
{
Self::Mapped(map) => Some(&map.file),
Self::Open(file) => Some(file),
_ => None,
}
}
/// Attempt to get a random access buffer of this cache, if one exists.
pub fn random_access(&self) -> Option<& [u8]>
{
match self {
Self::Mapped(map) => Some(map.as_ref()),
Self::Memory(mem) => Some(&mem[..]),
_ => None,
}
}
/// Drop the whole cache (if there is one).
#[inline] pub fn clear(&mut self)
{
*self = Self::None;
}
/// Attempt to asynchronously create a cache state for file provided by `file` at this level.
pub async fn new(file: impl AsRef<Path>, level: Level) -> io::Result<Self>
{
Ok(match level {
Level::None => Self::None,
Level::Low => Self::Open(tokio::fs::OpenOptions::new().read(true).open(file).await?.into_std().await),
Level::High => Self::Mapped(OpenMMap::new(file).await?),
Level::Extreme => {
let file = tokio::fs::OpenOptions::new().read(true).open(file).await?;
let (mut bytes,expect) = {
if let Some(len) = file.metadata().await.ok().map(|m| usize::try_from(m.len()).ok()).flatten() {
(BytesMut::with_capacity(len), Some(len))
} else {
(BytesMut::new(), None)
}
};
match (expect, read_whole_into_buffer(file, &mut bytes).await?) {
(Some(expect), len) if len != expect => return Err(io_err!(UnexpectedEof, "Size mismatch")),
_ => Self::Memory(bytes.freeze()),
}
},
})
}
/// Attempt to synchronously create a cache state for file provided by `file` at this level.
///
/// # Note
/// This will block until all the I/O operations and syscalls have completed. In an async context avoid using it.
pub fn new_blocking(file: impl AsRef<Path>, level: Level) -> io::Result<Self>
{
Ok(match level {
Level::None => Self::None,
Level::Low => Self::Open(fs::OpenOptions::new().read(true).open(file)?),
Level::High => Self::Mapped(OpenMMap::new_sync(file)?),
Level::Extreme => {
let file = fs::OpenOptions::new().read(true).open(file)?;
let (mut bytes,expect) = {
if let Some(len) = file.metadata().ok().map(|m| usize::try_from(m.len()).ok()).flatten() {
(BytesMut::with_capacity(len), Some(len))
} else {
(BytesMut::new(), None)
}
};
match (expect, read_whole_into_buffer_sync(file, &mut bytes)?) {
(Some(expect), len) if len != expect => return Err(io_err!(UnexpectedEof, "Size mismatch")),
_ => Self::Memory(bytes.freeze()),
}
},
})
}
}
const BUFSIZE: usize = 4096;
/// Read as many bytes from `input` into the `BufMut` output as possible and then return the number of bytes read.
/// Does not block the current task.
async fn read_whole_into_buffer<R,W>(mut input: R, mut output: &mut W) -> io::Result<usize>
where R: AsyncRead + Unpin,
W: BufMut + ?Sized,
{
use tokio::prelude::*;
let mut buf = [0u8; BUFSIZE];
let mut whole=0;
Ok(loop {
let read = match input.read(&mut buf[..]).await? {
0 => break whole,
x => (whole += x, x).1,
};
(&mut output).put(&buf[..read]);
})
}
/// Read as many bytes from `input` into the `BufMut` output as possible and then return the number of bytes read.
/// Blocks the current thread.
fn read_whole_into_buffer_sync<R,W>(mut input: R, mut output: &mut W) -> io::Result<usize>
where R: Read,
W: BufMut + ?Sized,
{
let mut buf = [0u8; BUFSIZE];
let mut whole=0;
Ok(loop {
let read = match input.read(&mut buf[..])? {
0 => break whole,
x => (whole += x, x).1,
};
(&mut output).put(&buf[..read]);
})
}

@ -2,6 +2,9 @@ use super::*;
use generational_arena::{
Arena, Index as ArenaIndex,
};
use std::convert::{TryFrom, TryInfo};
use std::collections::BTreeMap;
use std::collections::HashSet;
@ -77,7 +80,7 @@ impl Store
assert!(metadata.root.exists() && metadata.root.is_dir(), "Metadata root {:?} passed to `with_capacity` not existant or not a directory", metadata.root);
Self {
metadata,
+
data: HashSet::with_capacity(cap),
data_hashes: Arena::with_capacity(cap),

@ -8,6 +8,14 @@ use jemallocator::Jemalloc;
#[global_allocator]
static GLOBAL: Jemalloc = Jemalloc;
macro_rules! io_err {
($kind:ident, $msg:expr) => {
::std::io::Error::new(::std::io::ErrorKind::$kind, $msg)
};
($msg:expr) => (io_err!(Other, $msg));
}
mod slice;
mod data;
fn main() {

@ -0,0 +1,30 @@
use super::*;
use std::cmp;
use std::ptr;
/// Copy bytes from one slice to another.
///
/// # Notes
/// The slices can overlap.
#[inline] pub fn move_bytes(from: &[u8], to: &mut [u8]) -> usize
{
let len = cmp::min(from.len(), to.len());
unsafe {
ptr::copy(from.as_ptr(), to.as_mut_ptr(), len);
}
len
}
/// Copy bytes from one slice to another.
///
/// # Notes
/// The slices must *not* overlap.
#[inline] pub fn copy_bytes(from: &[u8], to: &mut [u8]) -> usize
{
let len = cmp::min(from.len(), to.len());
unsafe {
ptr::copy_nonoverlapping(from.as_ptr(), to.as_mut_ptr(), len);
}
len
}
Loading…
Cancel
Save