commit 0fc93ee39e1e1fd0a39ce5f34ab32c1b0c391976 Author: Avril Date: Thu Jul 9 01:45:20 2020 +0100 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e2a3069 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +*~ diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..9665fb0 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,617 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "arc-swap" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d25d88fd6b8041580a654f9d0c581a047baee2b3efee13275f2fc392fc75034" + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bytes" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "118cf036fbb97d0816e3c34b2d7a1e8cfc60f68fcf63d550ddbe9bd5f59c213b" +dependencies = [ + "loom", +] + +[[package]] +name = "cc" +version = "1.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a06fb2e53271d7c279ec1efea6ab691c35a2ae67ec0d91d7acec0caf13b518" + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cpuid-bool" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d375c433320f6c5057ae04a04376eef4d04ce2801448cf8863a78da99107be4" + +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "fuchsia-zircon" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" +dependencies = [ + "bitflags", + "fuchsia-zircon-sys", +] + +[[package]] +name = "fuchsia-zircon-sys" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" + +[[package]] +name = "futures" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e05b85ec287aac0dc34db7d4a569323df697f9c55b99b15d6b4ef8cde49f613" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f366ad74c28cca6ba456d95e6422883cfb4b252a83bed929c83abfdbbf2967d5" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59f5fff90fd5d971f936ad674802482ba441b6f09ba5e15fd8b39145582ca399" + +[[package]] +name = "futures-executor" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10d6bb888be1153d3abeb9006b11b02cf5e9b209fda28693c31ae1e4e012e314" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de27142b013a8e869c14957e6d2edeef89e97c289e69d042ee3a49acd8b51789" + +[[package]] +name = "futures-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0b5a30a4328ab5473878237c447333c093297bded83a4983d10f4deea240d39" +dependencies = [ + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f2032893cb734c7a05d85ce0cc8b8c4075278e93b24b66f9de99d6eb0fa8acc" + +[[package]] +name = "futures-task" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb66b5f09e22019b1ab0830f7785bcea8e7a42148683f99214f73f8ec21a626" +dependencies = [ + "once_cell", +] + +[[package]] +name = "futures-util" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8764574ff08b701a084482c3c7031349104b07ac897393010494beaa18ce32c6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project", + "pin-utils", + "proc-macro-hack", + "proc-macro-nested", + "slab", +] + +[[package]] +name = "generator" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "add72f17bb81521258fcc8a7a3245b1e184e916bfbe34f0ea89558f440df5c68" +dependencies = [ + "cc", + "libc", + "log", + "rustc_version", + "winapi 0.3.9", +] + +[[package]] +name = "generic-array" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac746a5f3bbfdadd6106868134545e684693d54d9d44f6e9588a7d54af0bf980" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "hermit-abi" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9" +dependencies = [ + "libc", +] + +[[package]] +name = "iovec" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" +dependencies = [ + "libc", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9f8082297d534141b30c8d39e9b1773713ab50fdbe4ff30f750d063b3bfd701" + +[[package]] +name = "log" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "loom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ecc775857611e1df29abba5c41355cdf540e7e9d4acfdf0f355eefee82330b7" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", +] + +[[package]] +name = "memchr" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" + +[[package]] +name = "mio" +version = "0.6.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fce347092656428bc8eaf6201042cb551b8d67855af7374542a92a0fbfcac430" +dependencies = [ + "cfg-if", + "fuchsia-zircon", + "fuchsia-zircon-sys", + "iovec", + "kernel32-sys", + "libc", + "log", + "miow 0.2.1", + "net2", + "slab", + "winapi 0.2.8", +] + +[[package]] +name = "mio-named-pipes" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0840c1c50fd55e521b247f949c241c9997709f23bd7f023b9762cd561e935656" +dependencies = [ + "log", + "mio", + "miow 0.3.5", + "winapi 0.3.9", +] + +[[package]] +name = "mio-uds" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afcb699eb26d4332647cc848492bbc15eafb26f08d0304550d5aa1f612e066f0" +dependencies = [ + "iovec", + "libc", + "mio", +] + +[[package]] +name = "miow" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c1f2f3b1cf331de6896aabf6e9d55dca90356cc9960cca7eaaf408a355ae919" +dependencies = [ + "kernel32-sys", + "net2", + "winapi 0.2.8", + "ws2_32-sys", +] + +[[package]] +name = "miow" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07b88fb9795d4d36d62a012dfbf49a8f5cf12751f36d31a9dbe66d528e58979e" +dependencies = [ + "socket2", + "winapi 0.3.9", +] + +[[package]] +name = "net2" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ba7c918ac76704fb42afcbbb43891e72731f3dcca3bef2a19786297baf14af7" +dependencies = [ + "cfg-if", + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b631f7e854af39a1739f401cf34a8a013dfe09eac4fa4dba91e9768bd28168d" + +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + +[[package]] +name = "pin-project" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12e3a6cdbfe94a5e4572812a0201f8c0ed98c1c452c7b8563ce2276988ef9c17" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a0ffd45cf79d88737d7cc85bfd5d2894bee1139b356e616fe85dc389c61aaf7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pin-project-lite" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282adbf10f2698a7a77f8e983a74b2d18176c19a7fd32a45446139ae7b02b715" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "proc-macro-hack" +version = "0.5.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e0456befd48169b9f13ef0f0ad46d492cf9d2dbb918bcf38e01eed4ce3ec5e4" + +[[package]] +name = "proc-macro-nested" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eba180dafb9038b050a4c280019bbedf9f2467b61e5d892dcad585bb57aadc5a" + +[[package]] +name = "proc-macro2" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.1.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" + +[[package]] +name = "rmdupe" +version = "0.1.0" +dependencies = [ + "futures", + "sha2", + "tokio", +] + +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver", +] + +[[package]] +name = "scoped-tls" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "332ffa32bf586782a3efaeb58f127980944bbc8c4d6913a86107ac2a5ab24b28" + +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + +[[package]] +name = "sha2" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1" +dependencies = [ + "block-buffer", + "cfg-if", + "cpuid-bool", + "digest", + "opaque-debug", +] + +[[package]] +name = "signal-hook-registry" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f478ede9f64724c5d173d7bb56099ec3e2d9fc2774aac65d34b8b890405f41" +dependencies = [ + "arc-swap", + "libc", +] + +[[package]] +name = "slab" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" + +[[package]] +name = "socket2" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03088793f677dce356f3ccc2edb1b314ad191ab702a5de3faf49304f7e104918" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "winapi 0.3.9", +] + +[[package]] +name = "syn" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d5d96e8cbb005d6959f119f773bfaebb5684296108fb32600c00cde305b2cd" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "tokio" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d099fa27b9702bed751524694adbe393e18b36b204da91eb1cbbbbb4a5ee2d58" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "iovec", + "lazy_static", + "libc", + "memchr", + "mio", + "mio-named-pipes", + "mio-uds", + "num_cpus", + "pin-project-lite", + "signal-hook-registry", + "slab", + "tokio-macros", + "winapi 0.3.9", +] + +[[package]] +name = "tokio-macros" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3acc6aa564495a0f2e1d59fab677cd7f81a19994cfc7f3ad0e64301560389" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "typenum" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33" + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "version_check" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "ws2_32-sys" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..0ec159c --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "rmdupe" +version = "0.1.0" +authors = ["Avril "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[profile.release] +opt-level = 3 +lto = "fat" +codegen-units = 1 + +[features] +threads = ["tokio", "futures"] + +[dependencies] +tokio = { version = "0.2", features = ["full"], optional = true } +sha2 = "0.9" +futures = { version = "0.3", optional = true} \ No newline at end of file diff --git a/src/bytes.rs b/src/bytes.rs new file mode 100644 index 0000000..63249e1 --- /dev/null +++ b/src/bytes.rs @@ -0,0 +1,15 @@ +use super::*; + +pub fn copy_slice(mut dst: D, src: S) -> usize +where S: AsRef<[T]>, + D: AsMut<[T]>, +T: Clone +{ + let mut sz=0; + for (d,s) in dst.as_mut().iter_mut().zip(src.as_ref().iter()) + { + *d = s.clone(); + sz+=1; + } + sz +} diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..24f3d5c --- /dev/null +++ b/src/config.rs @@ -0,0 +1,27 @@ +use super::*; + +#[derive(Debug, Clone)] +pub enum RecursionMode +{ + None, + All, + N(usize), +} + +#[derive(Debug, Clone)] +pub struct Mode +{ + pub error_mode: error::Mode, + pub recursion_mode: RecursionMode, +} + +impl Default for Mode +{ + fn default() -> Self + { + Self { + error_mode: error::Mode::Cancel, + recursion_mode: RecursionMode::None, + } + } +} diff --git a/src/container.rs b/src/container.rs new file mode 100644 index 0000000..7c74b85 --- /dev/null +++ b/src/container.rs @@ -0,0 +1,108 @@ +use super::*; +use std::{ + collections::HashSet, + io::{ + self, + Write, + Read, + }, +}; + +#[derive(Clone, PartialEq, Eq)] +pub struct DupeMap(HashSet); + +impl DupeMap +{ + /// Create a new empty dupe map + pub fn new() -> Self + { + Self(HashSet::new()) + } + + /// Iterator over all added keys + pub fn iter(&self) -> std::collections::hash_set::Iter + { + self.0.iter() + } + + /// Is this hash in the set? + pub fn peek(&self, hash: &hash::Sha256Hash) -> bool { + self.0.contains(hash) + } + + /// Try to add an entry, returns true if was not a dupe, false if it was. + pub fn try_add(&mut self, hash: hash::Sha256Hash) -> bool + { + if self.0.contains(&hash) { + false + } else { + self.0.insert(hash); + true + } + } + + /// Save this list to a file + pub fn save(&self, to: &mut W) -> io::Result + { + let mut done=0; + for x in self.0.iter() + { + to.write(x.as_ref())?; + done+=1; + } + Ok(done) + } + /// Save this list to a file async + #[cfg(feature="threads")] + pub async fn save_async(&self, to: &mut W) -> io::Result + where W: tokio::io::AsyncWrite + std::marker::Send + std::marker::Sync + std::marker::Unpin + { + use tokio::prelude::*; + + let mut done=0; + for x in self.0.iter() + { + to.write(x.as_ref()).await?; + done+=1; + } + Ok(done) + } + + /// Load from file. + pub fn load(&mut self, from: &mut R) -> io::Result + { + let mut done=0; + let mut read; + let mut buffer = [0u8; hash::SHA256_SIZE]; + + while {read = from.read(&mut buffer[..])?; read==hash::SHA256_SIZE} { + done += if self.try_add(hash::Sha256Hash::new(buffer)) { + 1 + } else { + 0 + }; + } + Ok(done) + } + + /// Load from file. + #[cfg(feature="threads")] + pub async fn load_async(&mut self, from: &mut R) -> io::Result + where R: tokio::io::AsyncRead + std::marker::Send + std::marker::Sync + std::marker::Unpin + { + use tokio::prelude::*; + + let mut done=0; + let mut read; + let mut buffer = [0u8; hash::SHA256_SIZE]; + + while {read = from.read(&mut buffer[..]).await?; read==hash::SHA256_SIZE} { + done += if self.try_add(hash::Sha256Hash::new(buffer)) { + 1 + } else { + 0 + }; + } + Ok(done) + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..5641141 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,107 @@ +use std::{ + error, + fmt, + io, +}; + +#[derive(Debug, Clone)] +pub enum Mode +{ + Ignore, + Warn, + Cancel, + Terminate, +} + +impl Mode +{ + pub fn handle(&self, err: Result) -> Result,E> + { + match err { + Err(err) => { + match self { + Self::Warn => println!("[WARN]: {}", err), + Self::Cancel => return Err(err), + Self::Terminate => Err(err).expect("Terminating on error"), + _ => (), + }; + Ok(None) + }, + Ok(ok) => Ok(Some(ok)), + } + } +} + +#[derive(Debug)] +pub enum Error +{ + Unknown, + Arch(Option<&'static str>), + IO(io::Error), + Size{expected: usize, got: usize}, + Internal(Box<(dyn error::Error + std::marker::Send)>), +} + +impl error::Error for Error +{ + fn source(&self) -> Option<&(dyn error::Error + 'static)> + { + Some(match &self { + Error::IO(e) => e, + _ => return None, + }) + } +} + +impl fmt::Display for Error +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "error: ")?; + + match self { + Self::Arch(Some(expl)) => write!(f, "bad arch: {}", expl), + Self::Arch(_) => write!(f, "bad arch (this is usually an indicator of an improperly compiled binary.)"), + Self::Size{expected, got} => write!(f, "size mismatch: expected {}, got {}", expected, got), + Self::IO(e) => write!(f, "io: {}", e), + Error::Internal(e) => write!(f, "internal: {}", e), + _ => write!(f, "unknown"), + } + } +} + +impl From> for Error +{ + fn from(bx: Box<(dyn error::Error + std::marker::Send)>) -> Self + { + Self::Internal(bx) + } +} + +impl From for Error +{ + fn from(er: io::Error) -> Self + { + Self::IO(er) + } +} + +// Helper functions: + +/// Return error for size if needed +#[inline] +pub fn check_size(expected: usize, got: usize) -> Result<(), Error> +{ + if expected == got { + Ok(()) + } else { + Err(Error::Size{expected, got}) + } +} + +/// Return an `Error::Internal` if needed. +#[inline] +pub fn internal(from: Result) -> Result +{ + from.or_else(move |e| Err(Error::Internal(Box::new(e)))) +} diff --git a/src/hash.rs b/src/hash.rs new file mode 100644 index 0000000..ebab7f8 --- /dev/null +++ b/src/hash.rs @@ -0,0 +1,107 @@ +use super::*; +use sha2::{ + Sha256, + Digest, +}; +use std::{ + fmt, + io::{ + self, Read, + }, +}; + +pub const SHA256_SIZE: usize = 32; +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct Sha256Hash([u8; SHA256_SIZE]); + +impl Sha256Hash +{ + pub fn new(from: [u8; SHA256_SIZE]) -> Self + { + Self(from) + } + pub fn from_slice(from: T) -> Self + where T: AsRef<[u8]> + { + let mut out = [0u8; SHA256_SIZE]; + bytes::copy_slice(&mut out, from.as_ref()); + Self(out) + } + +} + +impl fmt::Display for Sha256Hash +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "Sha256hash (")?; + for b in self.0.iter() + { + write!(f,"{:02x}", *b)?; + } + write!(f, ")") + } +} + +impl Default for Sha256Hash +{ + fn default() -> Self + { + Self([0u8; SHA256_SIZE]) + } +} + +impl From for Sha256Hash +{ + fn from(hash: Sha256) -> Sha256Hash + { + let mut out = [0u8; SHA256_SIZE]; + assert_eq!(bytes::copy_slice(&mut out, hash.finalize()), SHA256_SIZE); + Self(out) + } +} +impl AsRef<[u8]> for Sha256Hash +{ + fn as_ref(&self) -> &[u8] + { + &self.0[..] + } +} + + + +/// Compute SHA256 hash from a stream. +pub fn compute(stream: &mut R, result: &mut Sha256Hash) -> io::Result +{ + let mut buffer = [0u8; BUFFER_SIZE]; + let mut read; + let mut done=0; + let mut digest = Sha256::new(); + + while {read = stream.read(&mut buffer[..])?; read!=0} { + digest.update(&buffer[..read]); + done+=read; + } + *result = digest.into(); + Ok(done) +} + +/// Compute SHA256 hash from a stream. +#[cfg(feature="threads")] +pub async fn compute_async(stream: &mut R, result: &mut Sha256Hash) -> io::Result +where R: tokio::io::AsyncRead + std::marker::Send + std::marker::Sync + std::marker::Unpin +{ + use tokio::prelude::*; + + let mut buffer = [0u8; BUFFER_SIZE]; + let mut read; + let mut done=0; + let mut digest = Sha256::new(); + + while {read = stream.read(&mut buffer[..]).await?; read!=0} { + digest.update(&buffer[..read]); + done+=read; + } + *result = digest.into(); + Ok(done) +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..aadc2b5 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,14 @@ +#![allow(dead_code)] + +pub const BUFFER_SIZE: usize = 4096; + +mod bytes; +mod error; +mod hash; +mod container; +mod config; +mod proc; + +fn main() { + +} diff --git a/src/proc.rs b/src/proc.rs new file mode 100644 index 0000000..f8746c9 --- /dev/null +++ b/src/proc.rs @@ -0,0 +1,201 @@ +use super::*; +use std::{ + io::{ + self, Read, + }, + path::{ + Path + }, + fs::{ + self, + OpenOptions, + }, + convert::{ + TryInto, + }, + ops::{ + self, + }, +}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct DupeCount +{ + pub total: usize, + pub dupes: usize, +} + +impl From for DupeCount { + fn from(b: bool) -> Self + { + Self{ + total: 1, + dupes: if b {0} else {1}, + } + } +} + +impl ops::Add for DupeCount +{ + type Output = Self; + fn add(self, other: Self) -> Self + { + Self { + total: self.total + other.total, + dupes: self.dupes + other.dupes, + } + } +} + +impl ops::AddAssign for DupeCount +{ + fn add_assign(&mut self, other: Self) + { + *self = Self { + total: self.total + other.total, + dupes: self.dupes + other.dupes, + }; + } +} + +impl Default for DupeCount +{ + fn default() -> Self + { + Self{total:0, dupes:0} + } +} + +/// Process a file and add it to the table, returns true if is not a dupe. +pub fn process_file>(file: P, set: &mut container::DupeMap) -> Result +{ + let mut file = OpenOptions::new() + .read(true) + .open(file)?; + let sz: usize = file.metadata()?.len().try_into().or(Err(error::Error::Arch(Some("Filesize is too large to be known. you have likely compiled the binary for 32-bit architecture or less. This shouldn't happen on 64-bit systems."))))?; + + let mut result = hash::Sha256Hash::default(); + error::check_size(sz, hash::compute(&mut file, &mut result)?)?; + + Ok(set.try_add(result)) +} + + +/// Process a file and add it to the table, returns true if is not a dupe. +#[cfg(feature="threads")] +pub async fn process_file_async>(file: P, set: &std::sync::Arc>) -> Result +{ + use tokio::{ + prelude::*, + fs::{ + OpenOptions, + }, + }; + let mut file = OpenOptions::new() + .read(true) + .open(file).await?; + let sz: usize = file.metadata().await?.len().try_into().or(Err(error::Error::Arch(Some("Filesize is too large to be known. you have likely compiled the binary for 32-bit architecture or less. This shouldn't happen on 64-bit systems."))))?; + + let mut result = hash::Sha256Hash::default(); + error::check_size(sz, hash::compute_async(&mut file, &mut result).await?)?; + + let mut set = set.lock().await; + Ok(set.try_add(result)) +} + +/// Walk a dir structure and remove all dupes in it +pub fn do_dir>(dir: P, depth: usize, set: &mut container::DupeMap, mode: &config::Mode) -> Result +{ + let recurse = match mode.recursion_mode { + config::RecursionMode::N(n) if n > depth => true, + config::RecursionMode::All => true, + _ => false, + }; + let cmode = mode; + let mode = &mode.error_mode; + let mut count = DupeCount::default(); + + for obj in fs::read_dir(dir.as_ref())? //always return error if this fails + { + if let Some(obj) = mode.handle(obj)? { // Each one is allowed to fail if `mode` says so + let obj = obj.path(); + + if obj.is_dir() && recurse { + count += mode.handle(do_dir(obj, depth+1, set, cmode))?.unwrap_or_default(); + } else { + count += if mode.handle(process_file(&obj, set))?.unwrap_or_default() { + DupeCount{total: 1, dupes: 0} + } else { + println!(" -> {:?}", obj); + //TODO: Delete dupe? + DupeCount{total: 1, dupes: 1} + }; + } + } + } + + Ok(count) +} + +/// Walk a dir structure and remove all dupes in it +#[cfg(feature="threads")] +pub fn do_dir_async + std::marker::Send + std::marker::Sync + 'static>(dir: P, depth: usize, set: std::sync::Arc>, mode: config::Mode) -> futures::future::BoxFuture<'static, Result> +{ + use std::sync::Arc; + use futures::future::{ + FutureExt + }; + async move { + let recurse = match mode.recursion_mode { + config::RecursionMode::N(n) if n > depth => true, + config::RecursionMode::All => true, + _ => false, + }; + let cmode = mode; + let mode = &cmode.error_mode; + let mut children = Vec::new(); + let mut workers = Vec::new(); + + let mut dir = tokio::fs::read_dir(dir.as_ref()).await?; //always return error if this fails + + while let Some(Some(obj)) = mode.handle(dir.next_entry().await)? + { + let obj = obj.path(); + + if obj.is_dir() && recurse { + let set = Arc::clone(&set); + let cmode = cmode.clone(); + let mode = mode.clone(); + children.push(tokio::task::spawn(async move { + match mode.handle(do_dir_async(obj, depth+1, set, cmode).await) { + Ok(v) => Ok(v.unwrap_or_default()), + Err(v) => Err(v), + } + })); + } else { + let set = Arc::clone(&set); + let mode = mode.clone(); + workers.push(tokio::task::spawn(async move { + match mode.handle(process_file_async(&obj, &set).await) { + Ok(v) => Ok(v.unwrap_or_default()), + Err(v) => Err(v), + } + })); + } + } + + async fn wait_on>>, U: Default+Into>(children: T, mode: &error::Mode) -> Result + { + let mut count = DupeCount::default(); + for child in children.into_iter() { + count += mode.handle(error::internal(child.await)? /* thread panicked */)?.unwrap_or_default().into(); + } + Ok(count) + } + + // Wait for all children to complete before error checking. + let er1 = wait_on(workers, &mode).await; + let er2 = wait_on(children, &mode).await; + Ok(er1? + er2?) + }.boxed() +}