Compare commits

..

37 Commits

Author SHA1 Message Date
Avril c9d71b0bec
treemap
4 years ago
Avril 784f3f2c10
repl i guess
4 years ago
Avril 5169227499
begin working on command implementation for REPL
4 years ago
Avril b7d6bb0095
begin working on command implementation for REPL
4 years ago
Avril e8d215a65f
begin repl inspection
4 years ago
Avril 1698eaa37d
fix building without 'inspect' feature
4 years ago
Avril c253b593ca
verbose message when prealloc write
4 years ago
Avril b78475b91a
remove deprecated Argument function
4 years ago
Avril 06208ee535
arg parsing works
4 years ago
Avril a264a7b05d
arg parsing works
4 years ago
Avril deddddcdce
mode selection process outlined in 'arg::parsing::into_mode'
4 years ago
Avril 172462b48e
rework mode selection begin
4 years ago
Avril 994a394631
Argument::insert_into_cfg
4 years ago
Avril f6e674d190
ported arg parsers to parse_next
4 years ago
Avril 6a8d4ccd1e
add Continue::Abort to arg parsing
4 years ago
Avril 1a88bc149d
added async read
4 years ago
Avril df59cb16a8
changed writer to no-op when feature 'inspect' is disabled
4 years ago
Avril 58ea131c3f
fix broken finish match
4 years ago
Avril f69ecbfcf1
removed useless Arc clone and spawn when not serialising
4 years ago
Avril c7b40396a2
fix written message appearing when not writing
4 years ago
Avril 0ca0146739
move write_graph to Arced background task
4 years ago
Avril 0c58a9d44b
fix arg whitespace
4 years ago
Avril 5d7f5b2234
remove TODO
4 years ago
Avril e1083f4844
re-enable prealloc by default
4 years ago
Avril 9d339d9628
raw savings
4 years ago
Avril 18c7c12025
added raw / no-compressing mode to write_async
4 years ago
Avril 00798609b2
prealloc writing done: --save-raw
4 years ago
Avril 2583ae674c
prealloc: write and read
4 years ago
Avril a724270841
enabled jemalloc as default allocator for now
4 years ago
Avril 06c07371ee
start new optional serialisation mode: prealloc
4 years ago
Avril 4ee524d859
fix feature-dependant argument options
4 years ago
Avril c0a608e5d5
better output noise level control
4 years ago
Avril 58fe129bdd
added quiet mode
4 years ago
Avril ec35671398
added config speicifc printing macros
4 years ago
Avril 9f4a0faa68
added dumping of collected data
4 years ago
Avril 9c8707fc44
added more graceful interrupt handler
4 years ago
Avril 4124aa7d07
switch default conc limit to infinity
4 years ago

3
.gitignore vendored

@ -1,2 +1,5 @@
/target
*~
*.dump
*.dump.raw
profiling/

279
Cargo.lock generated

@ -1,5 +1,13 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ad-hoc-iter"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90a8dd76beceb5313687262230fcbaaf8d4e25c37541350cf0932e9adb8309c8"
[[package]]
name = "addr2line"
version = "0.14.1"
@ -15,6 +23,19 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"
[[package]]
name = "async-compression"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b72c1f1154e234325b50864a349b9c8e56939e266a4c307c0f159812df2f9537"
dependencies = [
"bzip2",
"futures-core",
"memchr",
"pin-project-lite 0.2.4",
"tokio",
]
[[package]]
name = "autocfg"
version = "1.0.1"
@ -47,6 +68,33 @@ version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38"
[[package]]
name = "bzip2"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "275d84fe348b838dc49477d39770682839b3e73e21a3eadc07b12924f1a9fcbe"
dependencies = [
"bzip2-sys",
"libc",
]
[[package]]
name = "bzip2-sys"
version = "0.1.10+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17fa3d1ac1ca21c5c4e36a97f3c3eb25084576f6fc47bf0139c1123434216c6c"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "cc"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
[[package]]
name = "cfg-if"
version = "0.1.10"
@ -72,16 +120,49 @@ dependencies = [
"owo-colors",
]
[[package]]
name = "dirs-next"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
dependencies = [
"cfg-if 1.0.0",
"dirs-sys-next",
]
[[package]]
name = "dirs-sys-next"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
dependencies = [
"libc",
"redox_users",
"winapi 0.3.9",
]
[[package]]
name = "dirstat"
version = "0.1.0"
dependencies = [
"ad-hoc-iter",
"async-compression",
"cfg-if 1.0.0",
"color-eyre",
"futures",
"jemallocator",
"lazy_static",
"libc",
"memmap",
"num_cpus",
"once_cell",
"pin-project",
"rustyline",
"serde",
"serde_cbor",
"smallvec",
"tokio",
"treemap",
]
[[package]]
@ -100,6 +181,22 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "fs2"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
dependencies = [
"libc",
"winapi 0.3.9",
]
[[package]]
name = "fs_extra"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
[[package]]
name = "fuchsia-zircon"
version = "0.3.3"
@ -211,12 +308,29 @@ dependencies = [
"slab",
]
[[package]]
name = "getrandom"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8"
dependencies = [
"cfg-if 1.0.0",
"libc",
"wasi",
]
[[package]]
name = "gimli"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce"
[[package]]
name = "half"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3"
[[package]]
name = "hermit-abi"
version = "0.1.18"
@ -241,6 +355,27 @@ dependencies = [
"libc",
]
[[package]]
name = "jemalloc-sys"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45"
dependencies = [
"cc",
"fs_extra",
"libc",
]
[[package]]
name = "jemallocator"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69"
dependencies = [
"jemalloc-sys",
"libc",
]
[[package]]
name = "kernel32-sys"
version = "0.2.2"
@ -259,9 +394,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.85"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ccac4b00700875e6a07c6cde370d44d32fa01c5a65cdd2fca6858c479d28bb3"
checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c"
[[package]]
name = "log"
@ -278,6 +413,16 @@ version = "2.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
[[package]]
name = "memmap"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
dependencies = [
"libc",
"winapi 0.3.9",
]
[[package]]
name = "miniz_oxide"
version = "0.4.3"
@ -363,6 +508,18 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "nix"
version = "0.19.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2ccba0cfe4fdf15982d1674c69b1fd80bad427d293849982668dfe454bd61f2"
dependencies = [
"bitflags",
"cc",
"cfg-if 1.0.0",
"libc",
]
[[package]]
name = "num_cpus"
version = "1.13.0"
@ -429,6 +586,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkg-config"
version = "0.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
@ -459,12 +622,88 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94341e4e44e24f6b591b59e47a8a027df12e008d73fd5672dbea9cc22f4507d9"
dependencies = [
"bitflags",
]
[[package]]
name = "redox_users"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64"
dependencies = [
"getrandom",
"redox_syscall",
]
[[package]]
name = "rustc-demangle"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e3bad0ee36814ca07d7968269dd4b7ec89ec2da10c4bb613928d3077083c232"
[[package]]
name = "rustyline"
version = "7.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8227301bfc717136f0ecbd3d064ba8199e44497a0bdd46bb01ede4387cfd2cec"
dependencies = [
"bitflags",
"cfg-if 1.0.0",
"dirs-next",
"fs2",
"libc",
"log",
"memchr",
"nix",
"scopeguard",
"unicode-segmentation",
"unicode-width",
"utf8parse",
"winapi 0.3.9",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "serde"
version = "1.0.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_cbor"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622"
dependencies = [
"half",
"serde",
]
[[package]]
name = "serde_derive"
version = "1.0.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "signal-hook-registry"
version = "1.3.0"
@ -480,6 +719,12 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
[[package]]
name = "smallvec"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
[[package]]
name = "socket2"
version = "0.3.19"
@ -537,12 +782,42 @@ dependencies = [
"syn",
]
[[package]]
name = "treemap"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1571f89da27a5e1aa83304ee1ab9519ea8c6432b4c8903aaaa6c9a9eecb6f36"
[[package]]
name = "unicode-segmentation"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796"
[[package]]
name = "unicode-width"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
[[package]]
name = "unicode-xid"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
[[package]]
name = "utf8parse"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372"
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "winapi"
version = "0.2.8"

@ -6,16 +6,50 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[profile.release]
opt-level = 3
lto = "fat"
codegen-units = 1
panic = "unwind"
[features]
default = ["splash"]
default = ["splash", "inspect", "defer-drop", "jemalloc", "prealloc", "treemap"]
# When using the REPL to inspect graphs, save command history.
save-history = []
# Use jemalloc as global allocator instead of system allocator.
# May potentially cause some speedups and better memory profile on large runs.
jemalloc = ["jemallocator"]
# Use `fallocate()` and memory mapping to save output data instead of normal `write()` syscalls
prealloc = ["inspect", "libc", "memmap"]
# Allow saving and loading of gathered data for later inspection
inspect = ["serde", "serde_cbor", "async-compression"]
# Enable dropping of certain large objects on background threads.
defer-drop = []
# Show splash screen
splash = []
[dependencies]
ad-hoc-iter = "0.2.3"
async-compression = {version = "0.3", features=["tokio-02", "bzip2"], optional=true}
cfg-if = "1.0.0"
color-eyre = {version = "0.5.10", default-features=false}
futures = "0.3.12"
jemallocator = {version = "0.3.2", optional = true}
lazy_static = "1.4.0"
libc = {version = "0.2.86", optional = true}
memmap = {version = "0.7.0", optional = true}
num_cpus = "1.13.0"
once_cell = "1.5.2"
pin-project = "1.0.5"
rustyline = "7.1.0"
serde = {version = "1.0.123", features=["derive"], optional=true}
serde_cbor = {version = "0.11.1", optional=true}
smallvec = "1.6.1"
tokio = {version = "0.2", features=["full"]}
treemap = {version = "0.3.2", optional=true}

@ -1,124 +0,0 @@
//! Argument parsing and handling
use super::*;
use std::num::NonZeroUsize;
use config::Config;
/// Executable name
pub fn program_name() -> &'static str
{
lazy_static! {
static ref NAME: String = std::env::args().next().unwrap();
}
&NAME[..]
}
#[cfg(feature="splash")]
/// Print splash screen
#[inline] pub fn splash()
{
eprintln!("dirstat version {}", env!("CARGO_PKG_VERSION"));
eprintln!("Made by {} with <3.\n Licensed with GPL v3.0 +", env!("CARGO_PKG_AUTHORS"));
}
/// Print usage message
pub fn usage()
{
#[cfg(feature="splash")]
{
splash(); println!();
}
println!("{} [OPTIONS] [-] <paths...>", program_name());
println!("{} --help", program_name());
println!(r#"
OPTIONS:
--recursive <number> Set max directory recursion depth limit (1 = No recursion (default), 0 = Unlimited recursion).
-r Set unlimited directory recursion depth. (same as `--recursive 0`).
--threads <number> Limit the maximum number of tasks allowed to process concurrently (Set to 0 for unlimited.)
-M Set number of parallel running tasks to unlimited. (Same as `--threads 0`).
-m Limit number of parallel tasks to the number of active CPU processors. (default).
- Stop parsing arguments, treat all the rest as paths.
--help Print this message and exit.
NOTES:
The first time a non-option argument is encountered, the program stops parsing arguments and assumes the rest of the arguments are paths.
If parallelism is set to unlimited, there can be a huge syscall overhead. It is recommended to use `-m` (which is default anyway).
"#);
}
/// Print usage message then exit with code 1.
pub fn help() -> !
{
usage();
std::process::exit(1)
}
/// Which mode to run in
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Mode
{
Normal(Config),
Help,
}
/// Parse command-line arguments
#[inline] pub fn parse_args() -> eyre::Result<Mode>
{
parse(std::env::args().skip(1))
}
fn parse<I: IntoIterator<Item=String>>(args: I) -> eyre::Result<Mode>
{
let suggestion_intended_arg = || "If this was intended as a path instead of an option, use option `-` before it.";
let mut args = args.into_iter();
let mut cfg = Config::default();
let mut reading = true;
while let Some(opt) = args.next()
{
if reading {
match opt.trim()
{
"--help" => return Ok(Mode::Help),
"-" => reading = false,
"--threads" => {
let max = args.next().ok_or(eyre!("`--threads` expects a parameter"))
.with_suggestion(suggestion_intended_arg.clone())?;
cfg.max_tasks = NonZeroUsize::new(max.parse::<usize>()
.wrap_err(eyre!("`--threads` expects a non-negative number"))
.with_suggestion(suggestion_intended_arg.clone())
.with_section(move || max.header("Parameter given was"))?);
},
"-M" => cfg.max_tasks = None,
"-m" => {
cfg.max_tasks = config::max_tasks_cpus(); // this is the default, but it is possible an earlier command mutated it, so doing nothing here would be a bug for that corner case
},
"--recursive" => {
let max = args.next().ok_or(eyre!("`--recursive` expects a parameter"))
.with_suggestion(suggestion_intended_arg.clone())?;
cfg.recursive = max.parse::<usize>()
.wrap_err(eyre!("`--recursive` expects a non-negative number"))
.with_suggestion(suggestion_intended_arg.clone())
.with_section(move || max.header("Parameter given was"))?.into();
},
"-r" => cfg.recursive = config::Recursion::Unlimited,
_ => {
cfg.paths.push(opt.into());
reading = false;
}
}
continue;
} else {
cfg.paths.push(opt.into());
}
}
Ok(Mode::Normal(cfg))
}

@ -0,0 +1,137 @@
//! Argument parsing and handling
use super::*;
use std::num::NonZeroUsize;
use std::fmt;
use config::Config;
mod parsing;
/// Executable name
pub fn program_name() -> &'static str
{
lazy_static! {
static ref NAME: String = std::env::args().next().unwrap();
}
&NAME[..]
}
#[cfg(feature="splash")]
/// Print splash screen
#[inline] pub fn splash()
{
eprintln!("dirstat version {}", env!("CARGO_PKG_VERSION"));
eprintln!("Made by {} with <3.\n Licensed with GPL v3.0 +", env!("CARGO_PKG_AUTHORS"));
}
const OPTIONS_NORMAL: &'static [&'static str] = &[
"--recursive <number> Set max directory recursion depth limit (1 = No recursion (default), 0 = Unlimited recursion).",
"-r Set unlimited directory recursion depth. (same as `--recursive 0`).",
"--threads <number> Limit the maximum number of tasks allowed to process concurrently (Set to 0 for unlimited.)",
"-M Set number of parallel running tasks to unlimited. (Same as `--threads 0`). (default).",
"-m Limit number of parallel tasks to the number of active CPU processors.",
"-q Quiet mode. Don't output info messages about successful `stat`ing.",
"-Q Silent mode. Don't output any messages.",
"-v Verbose mode. Output extra information.",
#[cfg(feature="inspect")] "--save <file> Dump the collected data to this file for further inspection.",
#[cfg(feature="inspect")] "-D Dump the collected data to `stdout` (see `--save`.)",
#[cfg(feature="inspect")] "--save-raw <file> Dump the collected data to this file uncompressed. (see `--save`.)",
#[cfg(feature="inspect")] "-R Dump the collected data to standard output uncompressed. (see `--save-raw`.)",
"- Stop parsing arguments, treat all the rest as paths.",
];
const NOTES: &'static [&'static str] = &[
"The first time a non-option argument is encountered, the program stops parsing arguments and assumes the rest of the arguments are paths.",
"If parallelism is set to unlimited, there can be a huge syscall overhead. It is recommended to use `-m` in large runs.",
"",
"Symlinks are ignored while collecting stat data. They will fail with message 'Unknown file type'. Symlinks are generally very small in the actual data they contain themselves, so this is *usually* unimportant.",
#[cfg(feature="inspect")] "\nThe save formats of `--save` (`-D`) and `--save-raw` (`-Dr`) are incompatible. The former is bzip2 compressed the latter is uncompressed.",
];
fn get_opt_normal() -> impl fmt::Display
{
#[derive(Debug)]
struct Opt;
impl fmt::Display for Opt
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
for line in OPTIONS_NORMAL.iter()
{
writeln!(f, " {}", line)?;
}
Ok(())
}
}
Opt
}
/// Print usage message
pub fn usage()
{
#[cfg(feature="splash")]
{
splash(); println!();
}
println!("{} [OPTIONS] [-] <paths...>", program_name());
println!("{} --help", program_name());
println!(r#"
OPTIONS:
{}
--help Print this message and exit.
NOTES:
The first time a non-option argument is encountered, the program stops parsing arguments and assumes the rest of the arguments are paths.
If parallelism is set to unlimited, there can be a huge syscall overhead. It is recommended to use `-m`.
Symlinks are ignored while collection stat data. They will fail with message 'Unknown file type'. Symlinks are generally very small in the actual data they contain themselves, so this is *usually* unimportant.
"#, get_opt_normal());
}
/// Print usage message then exit with code 1.
pub fn help() -> !
{
usage();
std::process::exit(1)
}
/// Which mode to run in
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Mode
{
Normal(Config),
Help,
}
/// Parse command-line arguments
#[inline] pub fn parse_args() -> eyre::Result<Mode>
{
parse(std::env::args().skip(1))
}
fn parse<I: IntoIterator<Item=String>>(args: I) -> eyre::Result<Mode>
{
//let mut cfg = config::Config::default();
let mut buffer = parsing::Output::new();
let mut args = args.into_iter();
while let Some(arg) = args.next()
{
match parsing::parse_next(&mut args, &mut buffer, arg)? {
parsing::Continue::No => {
parsing::consume(args, &mut buffer);
break;
},
parsing::Continue::Abort(Some(change_to)) => return Ok(*change_to),
parsing::Continue::Abort(_) => break,
_ => (),
}
}
parsing::into_mode(buffer)
}

@ -0,0 +1,515 @@
//! For parsing arguments
use super::*;
use std::collections::{HashMap, HashSet};
use std::mem::Discriminant;
use std::fmt;
#[cfg(feature="inspect")] use config::OutputSerialisationMode;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum InspectKind
{
Treemap(Option<(u64, u64)>),
}
impl fmt::Display for InspectKind
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
match self {
Self::Treemap(None) => write!(f, "treemap"),
Self::Treemap(Some((x,y))) => write!(f, "treemap:{}:{}", x, y), // Width and height.
}
}
}
impl std::str::FromStr for InspectKind
{
type Err = eyre::Report;
fn from_str(s: &str) -> Result<Self, Self::Err>
{
todo!()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Argument
{
ModeChangeHelp,
LimitConcMaxProc,
LimitConc(NonZeroUsize),
UnlimitConc,
Save(String),
SaveStdout,
SaveRaw(String),
SaveRawStdout,
LimitRecurse(NonZeroUsize),
UnlimitRecurse,
LogVerbose,
LogQuiet,
LogSilent,
StopReading,
Inspect(InspectKind),
Input(String),
}
/// Kinds of modes of operation for the program.
///
/// These map to `super::Mode`.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd, Copy)]
#[non_exhaustive]
enum ModeKind
{
Normal,
Help
}
impl Default for ModeKind
{
#[inline]
fn default() -> Self
{
Self::Normal
}
}
impl Argument
{
/// What mode does this argument change to, if any?
fn mode_change_kind(&self) -> Option<ModeKind>
{
Some(match self
{
Self::ModeChangeHelp => ModeKind::Help,
_ => return None,
})
}
/// Insert this `Argument` into config
pub fn insert_into_cfg(self, cfg: &mut Config)
{
use Argument::*;
match self {
Inspect(InspectKind::Treemap(None)) => cfg.inspection.treemap = Some((640, 480)),
Inspect(InspectKind::Treemap(x)) => cfg.inspection.treemap = x,
LimitConcMaxProc => cfg.max_tasks = config::max_tasks_cpus(),
LimitConc(max) => cfg.max_tasks = Some(max),
UnlimitConc => cfg.max_tasks = None,
#[cfg(feature="inspect")] Save(output) => cfg.serialise_output = Some(OutputSerialisationMode::File(output.into())),
#[cfg(feature="inspect")] SaveStdout => cfg.serialise_output = Some(OutputSerialisationMode::Stdout),
#[cfg(feature="inspect")] SaveRaw(output) => {
cfg_if! {
if #[cfg(feature="prealloc")] {
cfg.serialise_output = Some(OutputSerialisationMode::PreallocFile(output.into()));
} else {
cfg.serialise_output = Some(OutputSerialisationMode::RawFile(output.into()));
}
}
},
#[cfg(feature="inspect")] SaveRawStdout => cfg.serialise_output = Some(OutputSerialisationMode::RawStdout),
LimitRecurse(limit) => cfg.recursive = if limit.get() == 1 { config::Recursion::None } else { config::Recursion::Limited(limit) },
UnlimitRecurse => cfg.recursive = config::Recursion::Unlimited,
LogVerbose => cfg.output_level = config::OutputLevel::Verbose,
LogQuiet => cfg.output_level = config::OutputLevel::Quiet,
LogSilent => cfg.output_level = config::OutputLevel::Silent,
Input(path) => cfg.paths.push(path.into()),
_ => (), //unreachable()! // Do nothing instead of panic.
}
}
}
impl fmt::Display for Argument
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
use Argument::*;
match self
{
Inspect(ins) => write!(f, "--inspect {}", ins),
ModeChangeHelp => write!(f, "--help"),
LimitConcMaxProc => write!(f, "-m"),
LimitConc(limit) => write!(f, "--threads {}", limit),
UnlimitConc => write!(f, "-M (--threads 0)"),
Save(s) => write!(f, "--save {:?}", s),
SaveStdout => write!(f, "-D"),
SaveRaw(s) => write!(f, "--save-raw {:?}", s),
SaveRawStdout => write!(f, "-R"),
LimitRecurse(rec) => write!(f, "--recursive {}", rec),
UnlimitRecurse => write!(f, "-r (--recursive 0)"),
LogVerbose => write!(f, "-v"),
LogQuiet => write!(f, "-q"),
LogSilent => write!(f, "-Q"),
StopReading => write!(f, "-"),
Input(input) => write!(f, "<{}>", input),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
enum MX
{
None,
Itself,
All,
Only(Discriminant<Argument>),
Many(&'static [Discriminant<Argument>]),
}
impl Default for MX
{
#[inline]
fn default() -> Self
{
Self::Itself
}
}
impl MX
{
/// Is this argument discriminant mutually exclusive with this other argument?
pub fn is_mx(&self, this: Discriminant<Argument>, other: &Argument) -> bool
{
use std::mem::discriminant;
let other = discriminant(other);
match self
{
Self::Itself if other == this => true,
Self::All => true,
Self::Only(disc) if other == *disc => true,
Self::Many(discs) if discs.contains(&other) => true,
_ => false,
}
}
}
impl Argument
{
/// Is this `Argument` mutually exclusive with another?
pub fn is_mx_with(&self, other: &Self) -> bool
{
use std::mem::discriminant;
lazy_static! {
static ref MX_REF: HashMap<Discriminant<Argument>, MaybeVec<MX>> = {
let mut out = HashMap::new();
macro_rules! mx {
(@) => {
std::iter::empty()
};
(@ self $($tt:tt)*) => {
iter![MX::Itself].chain(mx!(@ $($tt)*))
};
(@ [$inner:expr] $($tt:tt)*) => {
iter![MX::Only(discriminant(&$inner))].chain(mx!(@ $($tt)*))
};
(@ [$($inner:expr),*] $($tt:tt)*) => {
iter![MX::Many(vec![$(discriminant(&$inner)),*].leak())].chain(mx!(@ $($tt)*))
};
(@ $ident:ident $($tt:tt)*) => {
iter![MX::$ident].chain(mx!(@ $($tt)*))
};
($disc:expr => $($tt:tt)*) => {
out.insert(discriminant(&$disc), mx!(@ $($tt)*).collect());
};
}
mx!(Argument::ModeChangeHelp => All);
mx!(Argument::LimitConcMaxProc => self [Argument::UnlimitConc,
Argument::LimitConc(unsafe{NonZeroUsize::new_unchecked(1)})]);
mx!(Argument::UnlimitConc => self [Argument::LimitConcMaxProc, Argument::LimitConc(unsafe{NonZeroUsize::new_unchecked(1)})]);
mx!(Argument::LimitConc(unsafe{NonZeroUsize::new_unchecked(1)}) => self [Argument::LimitConcMaxProc, Argument::UnlimitConc]);
mx!(Argument::Save(String::default()) => self [Argument::SaveStdout,
Argument::SaveRaw(Default::default()),
Argument::SaveRawStdout]);
mx!(Argument::SaveStdout => self [Argument::Save(String::default()),
Argument::SaveRaw(Default::default()),
Argument::SaveRawStdout]);
mx!(Argument::SaveRaw(Default::default()) => self [Argument::Save(String::default()),
Argument::SaveStdout,
Argument::SaveRawStdout]);
mx!(Argument::SaveRawStdout => self [Argument::Save(String::default()),
Argument::SaveRaw(String::default()),
Argument::SaveStdout]);
mx!(Argument::LimitRecurse(unsafe{NonZeroUsize::new_unchecked(1)}) => self [Argument::UnlimitRecurse]);
mx!(Argument::UnlimitRecurse => self [Argument::LimitRecurse(unsafe{NonZeroUsize::new_unchecked(1)})]);
mx!(Argument::LogVerbose => self [Argument::LogQuiet, Argument::LogSilent]);
mx!(Argument::LogQuiet => self [Argument::LogVerbose, Argument::LogSilent]);
mx!(Argument::LogSilent => self [Argument::LogQuiet, Argument::LogVerbose]);
mx!(Argument::StopReading => All);
mx!(Argument::Input(String::default()) => None);
out
};
}
let this = discriminant(self);
match MX_REF.get(&this) {
Some(mx) if mx.iter().filter(|mx| mx.is_mx(this, other)).next().is_some() => true,
_ => false,
}
}
}
/// Should we continue parsing and/or reading arguments?
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Continue
{
/// Keep parsing the arguments
Yes,
/// Stop parsing arguments, add the rest of args as `Input`s
No,
/// On mode change, we don't need to parse the rest of the argument. Stop reading entirely, and optionally return the last one here, which must be a mode change argument.
///
/// Returning this when the contained value is `Some` immediately terminates parsing and precedes to mode-switch. However, if it is `None`, parsing of chained short args is allowed to continue, although `Abort(None)` will be returned at the end regardless of subsequent `Continue` results from that change (unless one is an `Abort(Some(_))`, which immediately returns itself.)
// Box `Argument` to reduce the size of `Continue`, as it is returned from functions often and when its value is set to `Some` it will always be the last `Argument` processed anyway and the only one to be boxed here at all.
//TODO: Deprecate the early return of an `Argument` here. Either change it to `Mode`, or have no early return. Mode change happens at the bottom in `into_mode` now.
Abort(Option<Box<Mode>>),
}
impl Continue
{
/// Should we keep *parsing* args?
#[inline] pub fn keep_reading(&self) -> bool
{
if let Self::Yes = self {
true
} else {
false
}
}
/// Is this an abort?
#[inline] pub fn is_abort(&self) -> bool
{
if let Self::Abort(_) = self {
true
} else {
false
}
}
}
impl Default for Continue
{
#[inline]
fn default() -> Self
{
Self::Yes
}
}
impl From<bool> for Continue
{
fn from(from: bool) -> Self
{
if from {
Self::Yes
} else {
Self::No
}
}
}
pub type Output = HashSet<Argument>;
#[inline] const fn suggestion_intended_arg() -> &'static str {
"If this was intended as a path instead of an option, use option `-` before it."
}
fn save_output(output: &mut Output, item: Argument) -> eyre::Result<()>
{
if let Some(mx) = output.iter().filter(|arg| item.is_mx_with(arg)).next() {
return Err(eyre!("Arguments are mutually exclusive"))
.with_section(|| item.header("Trying to addargument "))
.with_section(|| mx.to_string().header("Which is mutually exclusive with previously added"));
}
output.insert(item); //TODO: Warn when adding duplicate?
Ok(())
}
fn parse_single<I>(_args: &mut I, output: &mut Output, this: char) -> eyre::Result<Continue>
where I: Iterator<Item=String>
{
let item = match this
{
'r' => Argument::UnlimitRecurse,
#[cfg(feature="inspect")] 'D' => Argument::SaveStdout,
#[cfg(feature="inspect")] 'R' => Argument::SaveRawStdout,
'v' => Argument::LogVerbose,
'q' => Argument::LogQuiet,
'Q' => Argument::LogSilent,
'm' => Argument::LimitConcMaxProc,
'M' => Argument::UnlimitConc,
unknown => {
return Err(eyre!("Unknown short argument {:?}", unknown))
.with_suggestion(suggestion_intended_arg.clone());
},
};
save_output(output, item)
.with_section(|| this.header("Short argument was"))?;
Ok(Continue::Yes)
}
/// Consume this iterator into `Input`s
pub fn consume<I>(args: I, output: &mut Output)
where I: IntoIterator<Item=String>
{
output.extend(args.into_iter().map(Argument::Input));
}
pub fn parse_next<I>(args: &mut I, output: &mut Output, this: String) -> eyre::Result<Continue>
where I: Iterator<Item=String>
{
let mut keep_reading = Continue::Yes;
let item = match this.trim()
{
"--inspect" => {
let ins = args.next().ok_or(eyre!("`--inspect` expects a parameter"))
.with_suggestion(suggestion_intended_arg.clone())?;
Argument::Inspect(ins.parse().wrap_err(eyre!("Failed to parse parameter for `--inspect`"))?)
},
" --threads" => {
let max = args.next().ok_or(eyre!("`--threads` expects a parameter"))
.with_suggestion(suggestion_intended_arg.clone())?;
match NonZeroUsize::new(max.parse::<usize>()
.wrap_err(eyre!("`--threads` expects a non-negative number"))
.with_suggestion(suggestion_intended_arg.clone())
.with_section(move || max.header("Parameter given was"))?)
{
Some(max) => Argument::LimitConc(max),
None => Argument::UnlimitConc,
}
},
"--recursive" => {
let max = args.next().ok_or(eyre!("`--recursive` expects a parameter"))
.with_suggestion(suggestion_intended_arg.clone())?;
match NonZeroUsize::new(max.parse::<usize>().wrap_err(eyre!("`--recursive` expects a non-negative number"))
.with_suggestion(suggestion_intended_arg.clone())
.with_section(move || max.header("Parameter given was"))?)
{
Some(x) => Argument::LimitRecurse(x),
None => Argument::UnlimitRecurse,
}
},
"--help" => {
return Ok(Continue::Abort(Some(Box::new(Mode::Help))));
},
"-" => {
return Ok(Continue::No);
},
#[cfg(feature="inspect")] "--save" => {
let file = args.next().ok_or(eyre!("`--save` expects a parameter"))
.with_suggestion(suggestion_intended_arg.clone())?;
Argument::Save(file)
},
#[cfg(feature="inspect")] "--save-raw" => {
let file = args.next().ok_or(eyre!("`--save` expects a parameter"))
.with_suggestion(suggestion_intended_arg.clone())?;
Argument::SaveRaw(file)
},
single if single.starts_with("-") => {
for ch in single.chars().skip(1) {
match parse_single(args, output, ch)
.wrap_err(eyre!("Error parsing short argument"))
.with_section(|| this.clone().header("Full short argument chain was"))? {
abort @ Continue::Abort(Some(_)) => return Ok(abort),
x @ Continue::No |
x @ Continue::Abort(_) if !x.is_abort() => keep_reading = x,
_ => (),
}
}
return Ok(keep_reading);
},
_ => {
keep_reading = Continue::No;
Argument::Input(this)
}
};
save_output(output, item)?;
Ok(keep_reading)
}
/// Converts parsed argument lists into a respective mode.
///
/// # Notes
/// These functions assume the mode has already been correctly calculated to be the mode pertaining to that function.
mod modes {
use super::*;
use config::Config;
/// Consume a parsed list of arguments in `Normal` mode into a `Normal` mode `Config` object.
pub fn normal(args: Output) -> eyre::Result<config::Config>
{
let mut cfg = Config::default();
for arg in args.into_iter()
{
arg.insert_into_cfg(&mut cfg);
}
Ok(cfg)
}
}
/// Consume this parsed list of arguments into a `Mode` and return it
pub fn into_mode(args: Output) -> eyre::Result<Mode>
{
let mut mode_kind = ModeKind::default(); //Normal.
for arg in args.iter() {
//find any mode change Argument (with `Argument::mode_change_kind()`) in `args`, changing `mode_kind` in turn. There should be at most 1.
if let Some(mode) = arg.mode_change_kind()
{
mode_kind = mode;
break;
}
}
//pass `args` to the respective mode generation function in mode `modes`, and wrap that mode around its return value.
match mode_kind
{
ModeKind::Normal => modes::normal(args).map(Mode::Normal),
ModeKind::Help => Ok(Mode::Help),
}
}

@ -0,0 +1,12 @@
//! Dealing with bytes and stuff
/// Copy from `src` into `dst` and return the number of bytes copied.
///
/// # Notes
/// The regions *must not* overlap. This is UB if they do.
#[inline] pub unsafe fn copy_nonoverlapping_unchecked(src: &[u8], dst: &mut [u8]) -> usize
{
let len = std::cmp::min(dst.len(), src.len());
std::ptr::copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), len);
len
}

@ -1,6 +1,57 @@
use std::path::PathBuf;
use std::num::NonZeroUsize;
use std::{fmt,error};
use once_cell::sync::OnceCell;
static GLOBAL: OnceCell<Config> = OnceCell::new();
/// Get the global config instance, if it has been set.
#[inline] pub fn try_get_global() -> Option<&'static Config>
{
GLOBAL.get()
}
/// Get the global config instance.
/// # Panics
/// If one has not been set yet.
pub fn get_global() -> &'static Config
{
try_get_global().expect("Tried to access global config when it had not been initialised")
}
/// Try to set the global config instance, if it hasn't been already.
#[inline] pub fn try_set_global(cfg: Config) -> Result<(), Config>
{
GLOBAL.set(cfg)
}
/// Set the global config instance.
/// # Panics
/// If one has already been set.
pub fn set_global(cfg: Config)
{
try_set_global(cfg).expect("Tried to set global config more than once")
}
#[derive(Debug, Clone, PartialEq, Eq, Copy, PartialOrd, Ord)]
#[repr(u32)]
pub enum OutputLevel
{
Silent = 0,
Quiet = 1,
Noisy = 2,
Verbose = 3,
}
impl Default for OutputLevel
{
#[inline]
fn default() -> Self
{
Self::Noisy
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Recursion
@ -45,6 +96,49 @@ impl From<usize> for Recursion
}
}
#[cfg(feature="inspect")]
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum OutputSerialisationMode
{
Stdout,
File(PathBuf),
RawFile(PathBuf),
RawStdout,
#[cfg(feature="prealloc")] PreallocFile(PathBuf),
}
#[cfg(feature="inspect")]
impl OutputSerialisationMode
{
/// Should this serialisation mode be compressed?
#[inline] pub fn should_compress(&self) -> bool
{
match self {
Self::File(_) | Self::Stdout => true,
_ => false,
}
}
}
/// What to do with the graph afterwards?
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Inspection
{
pub treemap: Option<(u64, u64)>, //w and h
}
impl Default for Inspection
{
#[inline]
fn default() -> Self
{
Self {
treemap: None
}
}
}
/// Configuration for this run
#[derive(Debug, Clone, PartialEq, Eq)]
@ -53,6 +147,45 @@ pub struct Config
pub paths: Vec<PathBuf>,
pub recursive: Recursion,
pub max_tasks: Option<NonZeroUsize>,
pub output_level: OutputLevel,
#[cfg(feature="inspect")]
pub serialise_output: Option<OutputSerialisationMode>,
pub inspection: Inspection,
}
impl Config
{
/// Try to make this the global config instance, if one does not already exist.
#[inline] pub fn try_make_global(self) -> Result<&'static Self, Self>
{
try_set_global(self).map(|_| get_global())
}
/// Make this the global config instance.
/// # Panics
/// If a global config instance has already been set.
#[inline] pub fn make_global(self) -> &'static Self
{
set_global(self);
get_global()
//GLOBAL.get_or_init(move || self) // This isn't okay to do here. As it would silently fail if there was already an instance, when we want it to panic in that case.
}
/// Are we expected to dump data to `stdout`?
#[inline] pub fn is_using_stdout(&self) -> bool
{
#[cfg(feature="inspect")] {
return match self.serialise_output {
Some(OutputSerialisationMode::Stdout) |
Some(OutputSerialisationMode::RawStdout) => true,
_ => false,
}
}
#[cfg(not(feature="inspect"))] {
false
}
}
}
/// The default `max_tasks`
@ -72,7 +205,12 @@ impl Default for Config
Self {
paths: Vec::new(),
recursive: Default::default(),
max_tasks: max_tasks_cpus(),
max_tasks: None, //max_tasks_cpus(),
output_level: Default::default(),
#[cfg(feature="inspect")]
serialise_output: None,
inspection: Default::default(),
}
}
}
@ -118,7 +256,55 @@ impl fmt::Display for InvalidConfigError
match self {
Self::NoPaths => write!(f, "No input paths were given. Cannot do anything"),
Self::PathNotFound(path) => write!(f, "Root path {:?} not found", path),
#[cold] _ => write!(f, "Unknown error"),
_ => write!(f, "Unknown error"),
}
}
}
/// Print an error line in accordance with `Config`'s output directives.
#[macro_export] macro_rules! cfg_eprintln {
($cfg:expr, $fmt:literal $($tt:tt)*) => {
{
if $cfg.output_level > $crate::config::OutputLevel::Silent {
eprintln!($fmt $($tt)*);
}
}
};
($level:ident; $cfg:expr, $fmt:literal $($tt:tt)*) => {
{
if $cfg.output_level >= $crate::config::OutputLevel::$level {
eprintln!($fmt $($tt)*);
}
}
}
}
/// Print a line in accordance with `Config`'s output directives.
#[macro_export] macro_rules! cfg_println {
($cfg:expr, $fmt:literal $($tt:tt)*) => {
{
let cfg = &$cfg;
if cfg.output_level > $crate::config::OutputLevel::Quiet {
if cfg.is_using_stdout() {
eprintln!($fmt $($tt)*);
} else {
println!($fmt $($tt)*);
}
}
}
};
($level:ident; $cfg:expr, $fmt:literal $($tt:tt)*) => {
{
let cfg = &$cfg;
if cfg.output_level >= $crate::config::OutputLevel::$level {
if cfg.is_using_stdout() {
eprintln!($fmt $($tt)*);
} else {
println!($fmt $($tt)*);
}
}
}
};
}

@ -80,6 +80,13 @@ impl Cache
/// If there are shared references to the `Cache` other than this one, the future will return this object as `Err`.
pub fn try_complete(self) -> impl futures::Future<Output=Result<HashMap<INode, FsInfo>, Self>> + 'static
{
#[inline(never)]
#[cold]
fn only_one() -> !
{
unreachable!("Arc should have only 1 strong reference now. This should never happend")
}
async move {
if Arc::strong_count(&self.0) > 2 {
return Err(self); // there is another other than the background task holding the shared cache reference
@ -101,7 +108,7 @@ impl Cache
Ok(inner) => {
Ok(inner.cache.into_inner())
},
#[cold] Err(_) => unreachable!("Arc should have only 1 strong reference now. This should never happend"),
Err(_) => only_one(),
}
}
}

@ -15,8 +15,123 @@ pub struct INodeInfoGraph
children: HashMap<INode, Vec<INode>>, //reverse lookup for directory INodes and their parent INodes
}
#[derive(Debug, Clone)]
pub struct INodeInfoGraphEntry<'a>
{
master: &'a INodeInfoGraph,
inode: INode,
}
impl<'a> INodeInfoGraphEntry<'a>
{
/// Create an iterator over children of this graph item.
///
/// # Note
/// Returns `None` if this is not a directory item.
pub fn level(&self) -> Option<Level<'a>>
{
match self.master.inodes.get(&self.inode)
{
Some(FsInfo::Directory(parent)) => {
Some(
Level{
master: self.master,
inode: self.inode.clone(),
children: match self.master.children.get(&self.inode) {
Some(iter) => iter.iter(),
_ => [].iter(),
},
}
)
},
Some(FsInfo::File(_, _)) => None,
None => {
panic!("No lookup information for inode {:?}", self.inode)
}//Some(Level{master: self.master, inode: None, children: [].iter()}),
}
}
/// The `FsInfo` for this item.
pub fn info(&self) -> &FsInfo
{
self.master.inodes.get(&self.inode).unwrap()
}
/// The path for this inode
pub fn path(&self) -> &PathBuf
{
self.master.paths_reverse.get(&self.inode).unwrap()
}
}
#[derive(Debug, Clone)]
pub struct Level<'a>
{
master: &'a INodeInfoGraph,
inode: INode,// Should only ever be `Directory` for `Level`.
children: std::slice::Iter<'a, INode>,
}
#[derive(Debug, Clone)]
pub struct TopLevel<'a>
{
master: &'a INodeInfoGraph,
children: std::vec::IntoIter<&'a INode>,
}
impl<'a> Iterator for TopLevel<'a>
{
type Item = INodeInfoGraphEntry<'a>;
fn next(&mut self) -> Option<Self::Item>
{
let master = self.master;
self.children.next().map(|inode| {
INodeInfoGraphEntry{
master,
inode: inode.clone(),
}
})
}
}
impl<'a> Iterator for Level<'a>
{
type Item = INodeInfoGraphEntry<'a>;
fn next(&mut self) -> Option<Self::Item>
{
let master = self.master;
self.children.next().map(|inode| {
INodeInfoGraphEntry{
master,
inode: inode.clone(),
}
})
}
}
impl INodeInfoGraph
{
/// Total size of all files
pub fn total_size(&self) -> u64
{
self.inodes.iter().map(|(_, v)| {
match v {
FsInfo::File(len, _) => *len,
_ => 0,
}
}).sum()
}
/// An iterator over the top level of items
pub fn top_level(&self) -> TopLevel<'_>
{
let top_level = self.inodes.iter().filter(|(node, _)| {
!self.inodes.contains_key(node)
});
TopLevel {
master: self,
children: top_level.map(|(k, _)| k).collect::<Vec<_>>().into_iter(),
}
}
/// Create a new graph from these linked `HashMap`s
#[inline] pub fn new(inodes: HashMap<INode, FsInfo>, paths: HashMap<PathBuf, INode>) -> Self
{
@ -48,25 +163,31 @@ impl INodeInfoGraph
self.paths_reverse.extend(self.paths.iter().map(|(x,y)| (*y,x.clone())));
self
}
/// Total number of items in the graph
#[inline] pub fn len(&self) -> usize
{
self.children.len()
}
/// Get the FsInfo of this `INode`
#[inline] pub fn get_info(&self, node: impl Borrow<INode>) -> Option<&FsInfo>
{
self.inodes.get(node.borrow())
}
/*
/// An iterator over top-level children of this node
pub fn children_of(&self, node: impl Borrow<INode>) -> !//Children<'_>
{
todo!();
/*Children(self, match self.children.get(node.borrow()) {
Some(slc) => slc.iter(),
_ => [].iter(),
})*/
}
todo!();
/*Children(self, match self.children.get(node.borrow()) {
Some(slc) => slc.iter(),
_ => [].iter(),
})*/
}
/// An iterator over all the directories in this
pub fn directories(&self) -> !//Directories<'_>
{
todo!()//Directories(self, self.children.keys())
}
todo!()//Directories(self, self.children.keys())
}*/
/// Convert into a hierarchical representation
pub fn into_hierarchical(self) -> HierarchicalINodeGraph
@ -82,6 +203,7 @@ impl INodeInfoGraph
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature="inspect", derive(serde::Serialize, serde::Deserialize))]
enum NodeKind
{
Directory(Vec<PathBuf>, Option<u64>),
@ -100,6 +222,7 @@ impl NodeKind
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature="inspect", derive(serde::Serialize, serde::Deserialize))]
struct HierarchicalNode
{
kind: NodeKind,
@ -128,6 +251,7 @@ impl HierarchicalNode
/// A hierarchical graph of node sizes
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature="inspect", derive(serde::Serialize, serde::Deserialize))]
pub struct HierarchicalINodeGraph
{
table: HashMap<PathBuf, HierarchicalNode>
@ -260,6 +384,12 @@ impl HierarchicalINodeGraph
hi.len().map(|x| (path.as_path(), x))
}).max_by_key(|x| x.1)
}
/// Complete number of items in the tree
#[inline] pub fn len(&self) -> usize
{
self.table.len()
}
}
impl From<INodeInfoGraph> for HierarchicalINodeGraph

@ -18,6 +18,7 @@ pub use graph::{
INodeInfoGraph,
HierarchicalINodeGraph,
FsObjKind as FsKind,
INodeInfoGraphEntry,
};
/// A raw file or directory inode number
@ -25,6 +26,7 @@ pub use graph::{
/// Ususally created from the `.inode()` extension method on `fs::Metadata` found in prelude.
/// Can also be created with `new()` from a `fs::Metadata` reference, or created unsafely from an arbitrary `u64` with `new_unchecked`.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Copy)]
#[cfg_attr(feature="inspect", derive(serde::Serialize, serde::Deserialize))]
#[repr(transparent)]
pub struct INode(u64);

@ -0,0 +1,40 @@
//! Mechanism to defer dropping of large objects to background threads
use super::*;
use futures::{
prelude::*,
future::OptionFuture,
};
pub const DEFER_DROP_VEC_SIZE_FLOOR: usize = 1024 * 1024; // 1 MB
/// Drop a `Vec<T>` that is `Send` and `'static`.
///
/// This will move the object to a background task if it is deemed nessisary.
/// # Note
/// This *must* be `await`ed to work properly. If you are not in an async context, use `drop_vec_sync`.
pub fn drop_vec<T>(vec: Vec<T>) -> impl Future<Output = ()> + 'static
where T: Send + 'static
{
let len_bytes = vec.len() * std::mem::size_of::<T>();
OptionFuture::from(if len_bytes > DEFER_DROP_VEC_SIZE_FLOOR {
cfg_eprintln!(Verbose; config::get_global(), "Size of vector ({} bytes, {} elements of {:?}) exceeds defer drop size floor {}. Moving vector to a seperate thread for de-allocation", len_bytes, vec.len(), std::any::type_name::<T>(), DEFER_DROP_VEC_SIZE_FLOOR);
Some(async move {
tokio::task::spawn_blocking(move || drop(vec)).await.expect("Child panic while dropping vector");
})
} else {
None
}).map(|_| ())
}
/// Drop a `Vec<T>` that is `Send` and `'static`.
///
/// This will move the object to a background task if it is deemed nessisary.
pub fn drop_vec_sync<T>(vec: Vec<T>)
where T: Send + 'static
{
let len_bytes = vec.len() * std::mem::size_of::<T>();
if len_bytes > DEFER_DROP_VEC_SIZE_FLOOR {
cfg_eprintln!(Verbose; config::get_global(), "Size of vector ({} bytes, {} elements of {:?}) exceeds defer drop size floor {}. Moving vector to a seperate thread for de-allocation", len_bytes, vec.len(), std::any::type_name::<T>(), DEFER_DROP_VEC_SIZE_FLOOR);
tokio::task::spawn_blocking(move || drop(vec));
}
}

@ -11,12 +11,17 @@ use std::{
pin::Pin,
task::{Poll, Context},
};
use std::{fmt, error};
pub mod prelude
{
pub use super::StreamGateExt as _;
pub use super::StreamLagExt as _;
pub use super::INodeExt as _;
pub use super::OptionIterator;
pub use super::MaybeVec;
}
pub trait INodeExt
@ -200,6 +205,63 @@ where S: Stream
}
}
/// An iterator that can be constructed from an `Option<Iterator>`.
#[derive(Debug, Clone)]
pub struct OptionIterator<I>(Option<I>);
impl<I> OptionIterator<I>
{
/// Consume into the inner `Option`.
#[inline] pub fn into_inner(self) -> Option<I>
{
self.0
}
/// Does this `OptionIterator` have a value?
pub fn is_some(&self) -> bool
{
self.0.is_some()
}
}
impl<I: Iterator> From<Option<I>> for OptionIterator<I>
{
fn from(from: Option<I>) -> Self
{
Self(from)
}
}
impl<I: Iterator> Iterator for OptionIterator<I>
{
type Item = I::Item;
fn next(&mut self) -> Option<Self::Item>
{
self.0.as_mut().map(|x| x.next()).flatten()
}
fn size_hint(&self) -> (usize, Option<usize>) {
match self.0.as_ref() {
Some(i) => i.size_hint(),
_ => (0, Some(0)),
}
}
}
impl<I: Iterator> std::iter::FusedIterator for OptionIterator<I>
where I: std::iter::FusedIterator{}
impl<I: Iterator> ExactSizeIterator for OptionIterator<I>
where I: ExactSizeIterator{}
impl<I: Iterator> DoubleEndedIterator for OptionIterator<I>
where I: DoubleEndedIterator
{
fn next_back(&mut self) -> Option<Self::Item> {
self.0.as_mut().map(|x| x.next_back()).flatten()
}
}
/// Create a duration with time suffix `h`, `m`, `s`, `ms` or `ns`.
///
/// # Combination
@ -282,3 +344,68 @@ mod tests
assert_eq!((0..100).sum::<i32>(),sum);
}
}
/// Explicitly drop this item.
///
/// If `defer-drop` feature is enabled, this may move the object to the background collector thread.
///
/// # Speicialisations
/// There can be special handling for `Vec<T>` types in this way.
/// ```
/// let large_vec = vec![String::from("Hello world"); 1000];
/// drop!(vec large_vec);
/// ```
/// It also has an `async` variant, that lets you await the background dropping task.
/// ```
/// let large_vec = vec![String::from("Hello world"); 1000];
/// drop!(async vec large_vec);
/// ```
#[macro_export] macro_rules! drop {
(async vec $item:expr) => {
#[cfg(feature="defer-drop")] {
$crate::defer_drop::drop_vec($item).await;
}
#[cfg(not(feature="defer-drop"))] {
drop($item);
}
()
};
(vec $item:expr) => {
#[cfg(feature="defer-drop")] {
$crate::defer_drop::drop_vec_sync($item);
}
#[cfg(not(feature="defer-drop"))] {
drop($item);
}
()
}
}
/// Base type from macro `eyre_assert`.
#[derive(Debug)]
pub struct SoftAssertionFailedError;
impl error::Error for SoftAssertionFailedError{}
impl fmt::Display for SoftAssertionFailedError
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
write!(f, "Assertion failed")
}
}
/// A soft assertion that yields an `Err(eyre::Report)` if the condition fails.
#[macro_export] macro_rules! eyre_assert {
($cond:expr $(; $message:literal)?) => {
if !$cond {
Err($crate::ext::SoftAssertionFailedError)
$(.wrap_err(eyre!($message)))?
.with_section(|| stringify!($cond).header("Expression was"))
} else {
Ok(())
}
};
}
pub type MaybeVec<T> = smallvec::SmallVec<[T; 1]>;

@ -0,0 +1,124 @@
use super::*;
use treemap::{
Rect,
Mappable,
TreemapLayout
};
use data::{FsInfo, INodeInfoGraph, INodeInfoGraphEntry};
/// A treemap of all **files** in the graph.
#[derive(Debug, Clone, PartialEq)]
pub struct Treemap
{
//layout: TreemapLayout,
nodes: Vec<MapNode>,
}
impl Treemap
{
/// All nodes of the map.
#[inline] pub fn nodes(&self) -> &[MapNode]
{
&self.nodes[..]
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct MapNode
{
name: String,
vw_size: f64, // Should be halved each iteration
vw_bounds: Rect, // should be Rect::new() before aligntment
}
impl MapNode
{
/// The calculated bounds of the node
#[inline] pub fn bounds(&self) -> &Rect
{
&self.vw_bounds
}
/// The name of the node
#[inline] pub fn name(&self) -> &str
{
&self.name[..]
}
#[inline] fn size(&self) -> f64 //Is this useful for consumer?
{
self.vw_size
}
#[inline] fn new(name: String) -> Self
{
Self {
vw_size: 1.0,
vw_bounds: Rect::new(),
name,
}
}
}
/// Create a treemap from this graph.
pub fn treemap(_cfg: &Config, graph: &INodeInfoGraph, (w, h): (f64, f64)) -> Treemap
{
let layout = TreemapLayout::new();
let mut nodes = Vec::with_capacity(graph.len());
//TODO: Recursively walk the graph, halving size with each iteration. (Maybe we need `INodeInfoGraph` here, not `Hierarchicalinodegraph`?)
let total_size = graph.total_size();
let size = 1.0;
fn calc_path<'a, I: IntoIterator<Item = INodeInfoGraphEntry<'a>>>(insert: &'a mut Vec<MapNode>, from: I, total_size: u64, size: f64, scale: f64)
{
for top in from {
let path = top.path();
match top.info() {
FsInfo::Directory(_) => {
//TODO: Do we add dir itself? I think not?
// Add children
let size = size * 0.5;
calc_path(insert, top.level().unwrap(), total_size, size, scale);
},
&FsInfo::File(sz, _) => {
let fract = (sz as f64) / (total_size as f64);
insert.push(MapNode {
name: path.to_string_lossy().into_owned(),
vw_size: fract * scale,
vw_bounds: Rect::new(),
})
},
}
}
}
calc_path(&mut nodes, graph.top_level(), total_size, size, 1.0);
layout.layout_items(&mut nodes[..], Rect {
x: 0.0,
y: 0.0,
w, h,
});
Treemap {
//layout,
nodes
}
}
impl Mappable for MapNode
{
fn size(&self) -> f64
{
self.vw_size
}
fn bounds(&self) -> &Rect
{
&self.vw_bounds
}
fn set_bounds(&mut self, bounds: Rect)
{
self.vw_bounds = bounds;
}
}

@ -0,0 +1,21 @@
//! Prints the information found in graph in different ways
use super::*;
use data::HierarchicalINodeGraph;
use config::Config;
//pub mod repl;
/// Print the most basic info
pub fn print_basic_max_info(cfg: &Config, graph: &HierarchicalINodeGraph)
{
cfg_println!(Quiet; cfg, "Max size file: {:?}", graph.path_max_size_for(data::FsKind::File));
cfg_println!(Quiet; cfg, "Max size dir: {:?}", graph.path_max_size_for(data::FsKind::Directory));
cfg_println!(Quiet; cfg, "Max size all: {:?}", graph.path_max_size());
}
#[cfg(feature="treemap")]
mod map;
#[cfg(feature="treemap")]
pub use map::*;

@ -0,0 +1,131 @@
//! Graph inspection REPL
use super::*;
use std::{fmt, error};
use std::path::PathBuf;
use std::io;
use rustyline::error::ReadlineError;
use rustyline::Editor;
mod env;
mod command;
mod opcodes;
/// Default history file name
///
/// # Path lookup
/// * To make this an absolute path, start it with `/`
/// * To make this path relative to the user's home directory, start it with `~/` (Note: If we are unable to find the user's home directory, it is considered a lookup **failure** (*not* a **disable**) and `calculate_history_path()` will return `Err`.)
/// * Otherwise, the path is taken relative to the current working directory
///
/// # Notes
/// This is only used when the `save-history` feature is enabled.
const DEFAULT_HISTORY_FILE: &'static str = "~/.dirstat_history";
/// Get the path to the history file.
///
/// # Lookup
/// * If the `DIRSTAT_HISTORY` envvar is set and not empty, use this file path.
/// * If the `DIRSTAT_HISTORY` envvar is set and empty, saving history is considered **disabled**, we return `Ok(None)`.
/// * Otherwise, refer to lookup rules for `DEFAULT_HISTORY_FILE`.
pub fn calculate_history_path() -> io::Result<Option<PathBuf>>
{
cfg_if! {
if #[cfg(feature="save-history")] {
todo!()
} else {
unreachable!("Tried to calculate repl history path when binary was compiled with history saving perma-disabled.")
}
}
}
/// Inspect the graph with commands
///
/// # Note
/// This function synchronously blocks the current thread.
pub fn inspect(cfg: &Config, graph: &HierarchicalINodeGraph) -> Result<(), ReplExitError>
{
let mut repl = Editor::<()>::new(); //TODO: Change `()` to our completer, when we have a decent idea of how they'll work.
cfg_if! {
if #[cfg(feature="save-history")] {
let history_path = match calculate_history_path() {
Ok(Some(path)) => {
if let Err(err) = repl.load_history(&path)
{
cfg_eprintln!(cfg, "Failed to load repl history from {:?}: {}", path, err);
}
Some(path)
},
Ok(None) => None,
Err(err)
{
cfg_eprintln!(cfg, "Failed to find repl history: {}", err);
None
}
}
}
}
let res: Result<(), ReplExitError> = try {
loop {
let line = repl.readline("> ")?;
repl.add_history_entry(&line);
//TODO: How to interpret commands?
todo!("Interpret commands from `line`.");
}
};
cfg_if! {
if #[cfg(feature="save-history")] {
if let Some(path) = history_path {
if let Err(err) = repl.save_history(&path)
{
cfg_eprintln!(cfg, "Failed to save repl history to {:?}: {}", path, err);
}
}
}
}
res
}
/// When the inspection repl exists abnormally.
#[derive(Debug)]
pub enum ReplExitError
{
ReadLine(ReadlineError),
}
impl From<ReadlineError> for ReplExitError
{
#[inline] fn from(from: ReadlineError) -> Self
{
Self::ReadLine(from)
}
}
impl error::Error for ReplExitError
{
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
Some(match &self
{
Self::ReadLine(rl) => rl
})
}
}
impl fmt::Display for ReplExitError
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
match self
{
Self::ReadLine(ReadlineError::Eof) |
Self::ReadLine(ReadlineError::Interrupted) => write!(f, "exit"),
Self::ReadLine(_) => write!(f, "readline error"),
}
}
}

@ -0,0 +1,54 @@
//! Repl commands
use super::*;
use std::str::FromStr;
use super::env::*;
#[derive(Debug)]
pub struct Context<'a>
{
/// Environment containing variable name mappings.
env: &'a mut Lexenv,
}
/// Trait for commands.
///
/// # Defining commands
/// A command object should be created once only, and then referenced and executed using `params` and through mutating `cx`.
pub trait Command: fmt::Debug
{
fn execute(&self, cx: &mut Context<'_>, params: Vec<Value>) -> eyre::Result<()>;
}
/// Command structurally parsed.
///
/// Can be converted into `Command` with the `TryInto` trait.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IR
{
op: String,
params: Vec<Value>,
}
impl FromStr for IR
{
type Err = CommandParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
todo!()
}
}
/// Error when parsing a command into `IR`.
#[derive(Debug)]
pub struct CommandParseError(String);
impl error::Error for CommandParseError{}
impl fmt::Display for CommandParseError
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
write!(f, "failed to parse command from {:?}", self.0)
}
}

@ -0,0 +1,242 @@
//! Execution environment for repl
use super::*;
use std::str::FromStr;
use std::collections::{BTreeMap, HashMap};
#[derive(Debug)]
pub struct Lexenv
{
/// Maps symbol name to value in generations.
kvstack: BTreeMap<usize, HashMap<String, Value>>,
/// Current generation of the satck
current_generation: usize,
}
impl Lexenv
{
/// The generation number of the current level.
pub fn depth(&self) -> usize
{
self.current_generation
}
/// Create a new empty lexenv
pub fn new() -> Self
{
Self {
kvstack: BTreeMap::new(),
current_generation: 0,
}
}
/// All valid symbols at this level.
///
/// # Ordering
/// Each symbol's level will appear in the order from level 0 to the current level, however the order of intra-level symbols is undefined.
pub fn symbols(&self) -> impl Iterator<Item = &'_ Value> + '_
{
self.kvstack.range(0..=self.current_generation).flat_map(|(_, v)| v.values())
}
/// All valid symbols **in** this level.
pub fn symbols_local(&self) -> impl Iterator<Item = &'_ Value> + '_
{
OptionIterator::from(self.kvstack.get(&self.current_generation).map(|x| x.values()))
}
/// Remove the current level, but leave its memory allocated for further use.
pub fn pop(&mut self)
{
self.kvstack.entry(self.current_generation).or_insert_with(|| HashMap::new()).clear();
if self.current_generation > 0 {
self.current_generation-=1;
}
}
/// Remove a symbol from the **current** level.
pub fn remove(&mut self, name: &str) -> Option<Value>
{
self.kvstack.entry(self.current_generation).or_insert_with(|| HashMap::new()).remove(name)
}
/// Insert a new value mapping into the current level.
pub fn insert(&mut self, name: String, value: Value)
{
self.kvstack.entry(self.current_generation).or_insert_with(|| HashMap::new()).insert(name, value);
}
/// Look up a symbol in this or any of the above levels.
pub fn lookup(&self, name: &str) -> Option<&Value>
{
for (_, lvmap) in self.kvstack.range(0..=self.current_generation).rev()
{
let m = lvmap.get(name);
if m.is_some() {
return m;
}
}
None
}
/// Look up a symbol in this level.
pub fn lookup_local(&self, name: &str) -> Option<&Value>
{
self.kvstack.get(&self.current_generation).map(|map| map.get(name)).flatten()
}
/// Create a new, empty level.
pub fn push(&mut self)
{
self.current_generation+=1;
}
/// Remove the current level, deallocating any memory it was using.
pub fn pop_clear(&mut self)
{
if self.current_generation > 0 {
self.kvstack.remove(&self.current_generation);
self.current_generation -=1;
} else {
self.kvstack.entry(0).or_insert_with(|| HashMap::new()).clear();
}
}
}
/// The value type
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Value
{
String(String),
Symbol(String),
List(Vec<Value>),
}
impl Value
{
/// Parse from an iterator of `char`s.
pub fn parse_chars<T>(ch: &mut T) -> Result<Self, ValueParseError>
where T: Iterator<Item = char>
{
match ch.next()
{
Some('(') => {
todo!("list");
},
Some('"') => {
todo!("string");
},
Some(first_chr) => {
todo!("symbol");
},
_ => Err(ValueParseError(String::default())),
}
}
/// Parse a `Value` from this string and then return the rest of the string.
#[deprecated]
pub fn parse_running(s: &str) -> Result<(Self, &'_ str), ValueParseError>
{
match s.trim().as_bytes()
{
& [b'(', ..] => {
todo!("list");
},
& [b'"', ..] => {
todo!("string");
},
_ => {
todo!("shmbol");
}
}
}
}
impl FromStr for Value
{
type Err = ValueParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse_running(s).map(|(x, _)| x)
}
}
impl Value
{
pub fn try_as_symbol(&self) -> Result<&str, ValueTypeError>
{
match self {
Self::Symbol(s) => Ok(&s[..]),
_ => Err(ValueTypeError::Symbol),
}
}
pub fn try_as_string(&self) -> Result<&str, ValueTypeError>
{
match self {
Self::Symbol(s) |
Self::String(s) => Ok(&s[..]),
_ => Err(ValueTypeError::String),
}
}
pub fn try_as_list(&self) -> Result<&[Value], ValueTypeError>
{
match self {
Self::List(l) => Ok(&l[..]),
_ => Err(ValueTypeError::List),
}
}
pub fn as_symbol(&self) -> Option<&str>
{
match self {
Self::Symbol(s) => Some(&s[..]),
_ => None,
}
}
pub fn as_string(&self) -> Option<&str>
{
match self {
Self::Symbol(s) |
Self::String(s) => Some(&s[..]),
_ => None,
}
}
pub fn as_list(&self) -> Option<&[Value]>
{
match self {
Self::List(l) => Some(&l[..]),
_ => None,
}
}
}
/// Error when using `try_as_*` functions on `Value`.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Copy)]
pub enum ValueTypeError
{
Symbol,
String,
List,
}
/// Error when parsing a `Value` from a stirng.
#[derive(Debug)]
pub struct ValueParseError(String);
impl error::Error for ValueParseError{}
impl fmt::Display for ValueParseError
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
write!(f, "cannot parse {:?}", self.0)
}
}
impl error::Error for ValueTypeError{}
impl fmt::Display for ValueTypeError
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
write!(f, "type error: expected ")?;
match self {
Self::Symbol => write!(f, "symbol"),
Self::String => write!(f, "string"),
Self::List => write!(f, "list"),
}
}
}

@ -0,0 +1,13 @@
//! Defined commands
use super::*;
use env::*;
use command::*;
/// Contains all operations
#[derive(Debug, Clone)]
pub struct Operations
{
}

@ -1,9 +1,22 @@
#![feature(try_blocks)]
#![allow(dead_code)]
#[macro_use] extern crate pin_project;
#[macro_use] extern crate lazy_static;
#[macro_use] extern crate cfg_if;
#[macro_use] extern crate ad_hoc_iter;
#[cfg(feature="inspect")] use serde::{Serialize, Deserialize};
#[cfg(feature="jemalloc")]
use jemallocator::Jemalloc;
#[cfg(feature="jemalloc")]
#[global_allocator]
static GLOBAL: Jemalloc = Jemalloc;
use std::convert::{TryFrom, TryInto};
use color_eyre::{
eyre::{
self,
@ -18,20 +31,178 @@ use color_eyre::{
#[macro_use] mod ext;
pub use ext::prelude::*;
use std::sync::Arc;
mod bytes;
mod data;
mod config;
mod state;
mod arg;
mod work;
mod info;
#[cfg(feature="inspect")] mod serial;
#[cfg(feature="defer-drop")] mod defer_drop;
#[cfg(feature="inspect")]
async fn write_graph(graph: Arc<data::HierarchicalINodeGraph>) -> eyre::Result<()>
{
let cfg = config::get_global();
match cfg.serialise_output.as_ref().map(|ser_out| {
cfg_eprintln!(Verbose; cfg, "Writing graph to stream...");
type BoxedWrite = Box<dyn tokio::io::AsyncWrite + Unpin + Send + Sync>;
use futures::FutureExt;
use config::OutputSerialisationMode;
let should_comp = ser_out.should_compress();
match ser_out {
OutputSerialisationMode::File(output_file) |
OutputSerialisationMode::RawFile(output_file) => {
use tokio::fs::OpenOptions;
(async move {
let stream = OpenOptions::new()
.write(true)
.truncate(true)
.create(true)
.open(output_file).await
.wrap_err(eyre!("Failed to open file for writing"))
.with_section(|| format!("{:?}", output_file).header("File was"))?;
Ok::<BoxedWrite, eyre::Report>(Box::new(stream))
}.boxed(), None, should_comp)
},
OutputSerialisationMode::Stdout |
OutputSerialisationMode::RawStdout => (async move { Ok::<BoxedWrite, _>(Box::new(tokio::io::stdout())) }.boxed(), Option::<&std::path::PathBuf>::None, should_comp),
#[cfg(feature="prealloc")] OutputSerialisationMode::PreallocFile(output_file) => {
(async move {
Ok::<BoxedWrite, _>(Box::new(tokio::io::sink())) // we use a sink as a shim stream since it will never be used when tuple item `.1` is Some()
}.boxed(), Some(output_file), false)
},
}
}) {
// We use tuple item `.1` here to indicate if we're in normal write mode.
// None -> normal
// Some(path) -> prealloc
// `.2` indicates if we should compress while in normal write mode.
Some((stream_fut, None, compress)) => {
let stream = stream_fut.await?;
serial::write_async(stream, graph.as_ref(), compress).await
.wrap_err(eyre!("Failed to serialise graph to stream"))?;
},
#[cfg(feature="prealloc")] Some((_task_fut, Some(output_file), _)) => {
use tokio::fs::OpenOptions;
let file = OpenOptions::new()
.write(true)
.read(true) //needed for map I think?
.truncate(true)
.create(true)
.open(&output_file).await
.wrap_err(eyre!("Failed to open file for mapping"))
.with_section(|| format!("{:?}", output_file).header("File was"))?;
let mut file = file.into_std().await;
cfg_eprintln!(Verbose; cfg, "Opened file for prealloc-write");
tokio::task::spawn_blocking(move || {
serial::write_sync_map(&mut file, graph.as_ref())
}).await.wrap_err(eyre!("Prealloc panicked while dumping"))
.with_section(|| format!("{:?}", output_file).header("File was"))?
.wrap_err(eyre!("Prealloc failed to dump graph to file"))
.with_section(|| format!("{:?}", output_file).header("File was"))?;
},
_ => (),
}
Ok(())
}
async fn normal(cfg: config::Config) -> eyre::Result<()>
{
let state = state::State::new(cfg
.validate()
.wrap_err(eyre!("Invalid config"))
.with_suggestion(|| "Try running `--help`")?);
let (graph, cfg) = tokio::select!{
x = work::work_on_all(state) => {x}
_ = tokio::signal::ctrl_c() => {
return Err(eyre!("Interrupt signalled, exiting"));
}
};
let cfg = cfg.make_global();
let (treemap, graph) = tokio::task::spawn_blocking(move || {
cfg_println!(Verbose; cfg, "Computing hierarchy...");
async fn read_config() -> eyre::Result<config::Config>
// Create treemap
#[cfg(feature="treemap")]
let treemap = {
//Check config for if the treemap flag is present. If it is, get the width and height from there too.
if let Some(&(x,y)) = cfg.inspection.treemap.as_ref() {
Some(info::treemap(cfg, &graph, (
x as f64,
y as f64,
)))
} else {
None
}
};
#[cfg(not(feature="treemap"))]
let treemap = Option::<std::convert::Infallible>::None;
// Create recursive graph
let mut graph = graph.into_hierarchical();
cfg_println!(Verbose; cfg, "Computing sizes...");
graph.compute_recursive_sizes();
(treemap, graph)
}).await.expect("Failed to compute hierarchy from graph");
//#[cfg(debug_assertions)] cfg_eprintln!(Verbose; cfg, "{:?}", graph);
#[allow(unused_imports)] use futures::future::OptionFuture;
let (graph, writer) = {
cfg_if!{
if #[cfg(feature="inspect")] {
let graph = Arc::new(graph);
let (g, w) = if cfg.serialise_output.is_some() {
(Arc::clone(&graph), Some(tokio::spawn(write_graph(graph))))
} else {
(graph, None)
};
(g, OptionFuture::from(w))
} else {
(graph, ())//OptionFuture::from(Option::<futures::future::BoxFuture<'static, ()>>::None))
}
}
};
#[cfg(feature="treemap")]
if let Some(treemap) = treemap {
//TODO: Print treemap
todo!("{:?}",treemap);
}
info::print_basic_max_info(&cfg, &graph);
#[cfg(feature="inspect")]
match writer.await {
Some(Ok(Ok(_))) if cfg.serialise_output.is_some() => cfg_eprintln!(Verbose; cfg, "Written successfully"),
Some(Ok(error @ Err(_))) => return error.wrap_err(eyre!("Failed to write graph to output stream")),
Some(Err(_)) => cfg_eprintln!(Silent; cfg, "Panic while writing graph to stream"),
_ => (),
}
#[cfg(not(feature="inspect"))] drop(writer);
Ok(())
}
async fn parse_mode() -> eyre::Result<()>
{
match arg::parse_args().wrap_err(eyre!("Failed to parse args"))?
match arg::parse_args()
.wrap_err(eyre!("Failed to parse args"))
.with_suggestion(|| "Try running `--help`")?
{
arg::Mode::Normal(cfg) => {
#[cfg(debug_assertions)] eprintln!("Parsed config: {:#?}\n", cfg);
Ok(cfg)
#[cfg(debug_assertions)] eprintln!("cfg: {:#?}", cfg);
normal(cfg).await
},
arg::Mode::Help => arg::help(),
}
@ -41,22 +212,10 @@ async fn read_config() -> eyre::Result<config::Config>
async fn main() -> eyre::Result<()> {
color_eyre::install()?;
let state = state::State::new(read_config().await
.wrap_err(eyre!("Failed to load config"))?
.validate()
.wrap_err(eyre!("Invalid config"))
.with_suggestion(|| "Try running `--help`")?);
let graph = work::work_on_all(state).await;
let mut graph = graph.into_hierarchical();
graph.compute_recursive_sizes();
println!("{:?}", graph);
parse_mode().await
.wrap_err(eyre!("Fatal error"))?;
println!("Max size file: {:?}", graph.path_max_size_for(data::FsKind::File));
println!("Max size dir: {:?}", graph.path_max_size_for(data::FsKind::Directory));
println!("Max size all: {:?}", graph.path_max_size());
/* let max_size = graph.directories().map(|x| x.size()).max();
/* let max_size = graph.directories().map(|x| x.size()).max();
println!("Max size: {:?}", max_size);*/
//println!("{:?}", graph);

@ -0,0 +1,288 @@
//! For serializing
use super::*;
use tokio::prelude::*;
use std::ops::{Deref, DerefMut};
use serde::de::DeserializeOwned;
use async_compression::tokio_02::write::{
BzEncoder,
BzDecoder,
};
type Compressor<T> = BzEncoder<T>;
type Decompressor<T> = BzDecoder<T>;
const DEFER_DROP_SIZE_FLOOR: usize = 1024 * 1024; // 1 MB
const DESERIALISE_OBJECT_READ_LIMIT: usize = 1024 * 1024 * 1024 * 2; // 2GB
const BUFFER_SIZE: usize = 4096;
#[derive(Debug)]
enum MaybeCompressor<'a, T>
{
Compressing(Compressor<&'a mut T>),
Decompressing(Decompressor<&'a mut T>),
Raw(&'a mut T),
}
/// Compress or decompress?
#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy, PartialOrd, Ord)]
enum CompKind
{
Compress,
Decompress
}
impl Default for CompKind
{
#[inline]
fn default() -> Self
{
Self::Compress
}
}
impl<'a, T> MaybeCompressor<'a, T>
{
/// What kind is this compressor set to
pub fn kind(&self) -> Option<CompKind>
{
Some(match self {
Self::Raw(_) => return None,
Self::Compressing(_) => CompKind::Compress,
Self::Decompressing(_) => CompKind::Decompress,
})
}
}
impl<'a, T> MaybeCompressor<'a, T>
where T: AsyncWrite +Send + Unpin + 'a
{
pub fn new(raw: &'a mut T, compress: Option<CompKind>) -> Self
{
match compress {
Some(CompKind::Compress) => Self::Compressing(Compressor::new(raw)),
Some(CompKind::Decompress) => Self::Decompressing(Decompressor::new(raw)),
None => Self::Raw(raw),
}
}
}
impl<'a, T> DerefMut for MaybeCompressor<'a, T>
where T: AsyncWrite + Send + Unpin + 'a
{
fn deref_mut(&mut self) -> &mut Self::Target {
match self {
Self::Compressing(t) => t,
Self::Decompressing(t) => t,
Self::Raw(o) => o,
}
}
}
impl<'a, T> Deref for MaybeCompressor<'a, T>
where T: AsyncWrite + Unpin + Send + 'a
{
type Target = dyn AsyncWrite + Send + Unpin+ 'a;
fn deref(&self) -> &Self::Target {
match self {
Self::Compressing(t) => t,
Self::Decompressing(t) => t,
Self::Raw(o) => o,
}
}
}
async fn copy_with_limit<R,W>(mut from: R, mut to: W) -> io::Result<usize>
where R: AsyncRead + Unpin,
W: AsyncWrite + Unpin
{
let mut buffer = [0u8; BUFFER_SIZE];
let mut read;
let mut done=0;
while {read = from.read(&mut buffer[..]).await?; read>0}
{
to.write_all(&buffer[..read]).await?;
done+=read;
if done > DESERIALISE_OBJECT_READ_LIMIT {
return Err(io::Error::new(io::ErrorKind::ConnectionAborted, eyre!("Exceeded limit, aborting.")
.with_section(|| DESERIALISE_OBJECT_READ_LIMIT.header("Object read size limit was"))
.with_section(|| done.header("Currently read"))));
}
}
Ok(done)
}
/// Deserialise an object from this stream asynchronously
///
/// # Note
/// If the stream is compressed, `compressed` must be set to true or an error will be produced.
/// An autodetect feature may be added in the future
pub async fn read_async<T: DeserializeOwned + Send + 'static, R>(mut from: R, compressed: bool) -> eyre::Result<T>
where R: AsyncRead + Unpin + Send
{
let sect_type_name = || std::any::type_name::<T>().header("Type trying to deserialise was");
let sect_stream_type_name = || std::any::type_name::<R>().header("Stream type was");
let vec = {
let mut vec = Vec::new();
let mut writer = MaybeCompressor::new(&mut vec, compressed.then(|| CompKind::Decompress));
copy_with_limit(&mut from, writer.deref_mut()).await
.wrap_err(eyre!("Failed to copy stream into in-memory buffer"))
.with_section(sect_type_name.clone())
.with_section(sect_stream_type_name.clone())?;
writer.flush().await.wrap_err(eyre!("Failed to flush decompression stream"))?;
writer.shutdown().await.wrap_err(eyre!("Failed to shutdown decompression stream"))?;
vec
};
tokio::task::spawn_blocking(move || {
(serde_cbor::from_slice(&vec[..])
.wrap_err(eyre!("Failed to deseralised decompressed data"))
.with_section(sect_type_name.clone())
.with_section(sect_stream_type_name.clone()),
{drop!(vec vec);}).0
}).await.wrap_err(eyre!("Panic while deserialising decompressed data"))?
}
/// Serialise this object asynchronously
///
/// # Note
/// This compresses the output stream.
/// It cannot be used by `prealloc` read/write functions, as they do not compress.
pub async fn write_async<T: Serialize, W>(mut to: W, item: &T, compress: bool) -> eyre::Result<()>
where W: AsyncWrite + Unpin + Send
{
let sect_type_name = || std::any::type_name::<T>().header("Type trying to serialise was");
let sect_stream_type_name = || std::any::type_name::<W>().header("Stream type was");
let vec = tokio::task::block_in_place(|| serde_cbor::to_vec(item))
.wrap_err(eyre!("Failed to serialise item"))
.with_section(sect_stream_type_name.clone())
.with_section(sect_type_name.clone())?;
{
let mut stream = MaybeCompressor::new(&mut to, compress.then(|| CompKind::Compress));
cfg_eprintln!(Verbose; config::get_global(), "Writing {} bytes of type {:?} to stream of type {:?}", vec.len(), std::any::type_name::<T>(), std::any::type_name::<W>());
stream.write_all(&vec[..])
.await
.wrap_err(eyre!("Failed to write serialised memory to stream"))
.with_section(|| vec.len().to_string().header("Size of the serialised object was"))
.with_section(sect_stream_type_name.clone())
.with_section(sect_type_name.clone())?;
stream.flush().await.wrap_err(eyre!("Failed to flush output compression stream"))?;
stream.shutdown().await.wrap_err(eyre!("Failed to shutdown output compression stream"))?;
}
// Extremely overcomplicated concurrent flush+drop.
use futures::FutureExt;
let flush_fut = async {
to.flush().await.wrap_err(eyre!("Failed to flush output backing stream"))?;
to.shutdown().await.wrap_err(eyre!("Failed to shutdown output backing stream"))?;
Ok::<(), eyre::Report>(())
}.fuse();
tokio::pin!(flush_fut);
tokio::select!{
res = &mut flush_fut => {
return res;
}
_ = async move { drop!(async vec vec); } => {}
}
flush_fut.await
}
#[cfg(feature="prealloc")]
mod prealloc {
use super::*;
use std::os::unix::prelude::*;
use std::fs::File;
use memmap::{MmapMut, Mmap};
/// Write this object as-is to this file descriptor.
///
/// # Note
/// This does not compress like `write_aynsc()` does. It is just a 1-1 dump of the serialisation.
/// Therefore, data written with `write_prealloc()` cannot be then read used with `read_async()`.
///
/// This is a completely synchronous operation. You should use it with `spawn_blocking` et al. to prevent task hangups.
pub fn write_prealloc<T: Serialize>(file: &mut File, item: &T) -> eyre::Result<()>
{
let sect_type_name = || std::any::type_name::<T>().header("Type trying to serialise was");
let vec = tokio::task::block_in_place(|| serde_cbor::to_vec(item))
.wrap_err(eyre!("Failed to serialise item"))
.with_section(sect_type_name.clone())?;
let fd = file.as_raw_fd();
cfg_eprintln!(Verbose; config::get_global(), "Writing (raw) {} bytes of type {:?} to fd {}", vec.len(), std::any::type_name::<T>(), fd);
unsafe {
if libc::fallocate(fd, 0, 0, vec.len().try_into()
.wrap_err(eyre!("Failed to cast buffer size to `off_t`"))
.with_section(|| vec.len().header("Buffer size was"))
.with_section(|| libc::off_t::MAX.to_string().header("Max value of `off_t` is"))
.with_warning(|| "Usually `off_t` is a signed 64 bit integer. Whereas the buffer's size is unsigned. On systems where `off_t` is 64 bits or higher, this should realistically never happen and probably indicates a bug.")?) < 0 {
// Error
Err(std::io::Error::last_os_error())
} else {
Ok(())
}
}.wrap_err("fallocate() failed")
.with_section(|| vec.len().header("Bytes to allocate was"))
.with_suggestion(|| "Make sure there is enough space for the fallocate() call")
.with_suggestion(|| "Make sure we are able to write to the file")?;
// fallocate() succeeded in allocating `vec.len()` bytes to map.
let mut map = unsafe { MmapMut::map_mut(file) }
.wrap_err(eyre!("Failed to map file for read + write"))
.with_section(|| fd.header("fd was"))
.with_suggestion(|| "Do we have the premissions for both reading and writing of this file and fd?")?;
eyre_assert!(tokio::task::block_in_place(|| unsafe {
bytes::copy_nonoverlapping_unchecked(&vec[..], &mut map[..])
}) == vec.len(); "Length mismatch")
.with_section(|| vec.len().header("Expected"))
.with_section(|| map.len().header("Got"))
.with_warning(|| "This should never happen, it indicates a bug")?;
tokio::task::block_in_place(move || map.flush())
.wrap_err(eyre!("Failed to flush map in place"))?; //map is dropped here
drop!(vec vec);
Ok(())
}
/// Read this object as-is from this file descriptor.
///
/// # Note
/// This does not decompress like `read_aynsc()` does. It is just a 1-1 read of the serialisation.
/// Therefore, `read_prealloc()` cannot be used with data written by `write_async()`.
///
/// This is a completely synchronous operation. You should use it with `spawn_blocking` et al. to prevent task hangups.
// This must be `DeserializeOwned` because the lifetime it is bound to is that of the memory map created and destroyed in the function, not of the fd `file` itself.
pub fn read_prealloc<T: serde::de::DeserializeOwned>(file: &File) -> eyre::Result<T>
{
let map = unsafe { Mmap::map(file) }
.wrap_err(eyre!("Failed to map file for read"))
.with_section(|| file.as_raw_fd().header("fd was"))
.with_suggestion(|| "Do we have the premissions for both reading and writing of this file and fd?")?;
tokio::task::
block_in_place(move || serde_cbor::from_slice(&map[..]))
.wrap_err(eyre!("Failed to deserialise from map"))
.with_note(|| "The prealloc read and write functions handle only *uncompressed* data. Make sure you're not feeding it compressed data (written with the non-prealloc read and write functions)")
}
}
#[cfg(feature="prealloc")] pub use prealloc::{
write_prealloc as write_sync_map,
read_prealloc as read_sync_map,
};

@ -1,5 +1,4 @@
use super::*;
use std::sync::Arc;
use tokio::sync::{
Semaphore,
@ -93,11 +92,11 @@ impl State
/// Try to consume the state into the cache.
///
/// Fails if there are other references of this state alive.
pub fn try_into_cache(self) -> Result<Cache, Self>
pub fn try_into_cache(self) -> Result<(Cache, Config), Self>
{
match Arc::try_unwrap(self.config)
{
Ok(_) => Ok(self.cache),
Ok(cfg) => Ok((self.cache, cfg)),
Err(config) => Err(Self{config, ..self}),
}
}

@ -14,6 +14,7 @@ use data::INode;
use data::FsInfo;
use state::State;
use data::INodeInfoGraph;
use config::Config;
async fn process_entry(entry: &tokio::fs::DirEntry, parent: INode) -> io::Result<FsInfo>
{
@ -34,7 +35,7 @@ async fn process_entry(entry: &tokio::fs::DirEntry, parent: INode) -> io::Result
///
/// # Panics
/// If there are any more held references to `state`.
pub async fn work_on_all(state: State) -> INodeInfoGraph
pub async fn work_on_all(state: State) -> (INodeInfoGraph, Config)
{
let comp_children = join_all(state.config().paths.iter().map(|path| {
let path = path.clone();
@ -46,7 +47,7 @@ pub async fn work_on_all(state: State) -> INodeInfoGraph
.ok()
},
Err(err) => {
eprintln!("Failed to stat root {:?}: {}", path, err);
cfg_eprintln!(state.config(), "Failed to stat root {:?}: {}", path, err);
None
},
}
@ -54,9 +55,9 @@ pub async fn work_on_all(state: State) -> INodeInfoGraph
})).await;
// All children have completed here. Unwrap cache
let ino_map = {
let cache = state.try_into_cache().unwrap();
cache.try_complete().await.unwrap()
let (ino_map, cfg) = {
let (cache, cfg) = state.try_into_cache().unwrap();
(cache.try_complete().await.unwrap(), cfg)
};
let mut output = HashMap::with_capacity(ino_map.len());
@ -65,14 +66,13 @@ pub async fn work_on_all(state: State) -> INodeInfoGraph
if let Some(res) = path_comp
{
output.extend(res);
}
}
INodeInfoGraph::new(
(INodeInfoGraph::new(
ino_map,
output,
)
), cfg)
}
/// Walk this directory.
@ -88,7 +88,9 @@ fn walk(state: State, root: PathBuf, root_ino: INode) -> BoxFuture<'static, Hash
async move {
{
let _guard = state.lock().enter().await;
println!(" -> {:?}", root);
cfg_println!(state.config(), " -> {:?}", root); //TODO: Flag to disable this?
match fs::read_dir(&root).await
{
Ok(mut dir) => {
@ -117,15 +119,15 @@ fn walk(state: State, root: PathBuf, root_ino: INode) -> BoxFuture<'static, Hash
let mut cache = state.cache_sub();
cache.insert(ino, fsinfo).await;
},
Err(err) => eprintln!("Failed to stat {:?}: {}", entry.path(), err),
Err(err) => cfg_eprintln!(state.config(), "Failed to stat {:?}: {}", entry.path(), err),
}
}
},
Err(err) => eprintln!("Walking {:?} failed: {}", root, err),
Err(err) => cfg_eprintln!(state.config(), "Walking {:?} failed: {}", root, err),
}
}
},
Err(err) => eprintln!("Failed to walk {:?}: {}", root, err),
Err(err) => cfg_eprintln!(state.config(), "Failed to walk {:?}: {}", root, err),
}
// drop work guard here
}
@ -136,7 +138,7 @@ fn walk(state: State, root: PathBuf, root_ino: INode) -> BoxFuture<'static, Hash
{
output.extend(map);
} else {
eprintln!("Child panic");
cfg_eprintln!(state.config(), "Child panic");
}
}

Loading…
Cancel
Save