From 9a7234806f15b9e2273286a749e108b2a818e7de Mon Sep 17 00:00:00 2001 From: Avril Date: Wed, 14 Oct 2020 14:01:29 +0100 Subject: [PATCH] start dedup and container --- Cargo.lock | 144 +++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 + src/container.rs | 9 +++ src/dedup.rs | 94 +++++++++++++++++++++++++++++++ src/main.rs | 2 + 5 files changed, 251 insertions(+) create mode 100644 src/container.rs create mode 100644 src/dedup.rs diff --git a/Cargo.lock b/Cargo.lock index 6ba6139..d097af5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,6 +103,12 @@ dependencies = [ "safemem", ] +[[package]] +name = "build_const" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39" + [[package]] name = "byte-tools" version = "0.3.1" @@ -175,6 +181,44 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634" +[[package]] +name = "crc" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" +dependencies = [ + "build_const", +] + +[[package]] +name = "crypto-mac" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58bcd97a54c7ca5ce2f6eb16f6bede5b0ab5f0055fedc17d2f0b4466e21671ca" +dependencies = [ + "generic-array 0.14.4", + "subtle", +] + +[[package]] +name = "cryptohelpers" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cfc491baaffd7cbd6acc02ebd23564760d83a2c17e1a47e6a04a8d5a86e7fb5" +dependencies = [ + "crc", + "getrandom", + "hex-literal", + "hmac", + "libc", + "openssl", + "pbkdf2", + "serde", + "serde_derive", + "sha2", + "tokio", +] + [[package]] name = "digest" version = "0.8.1" @@ -236,6 +280,21 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "fuchsia-cprng" version = "0.1.1" @@ -457,6 +516,22 @@ dependencies = [ "libc", ] +[[package]] +name = "hex-literal" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5af1f635ef1bc545d78392b136bfe1c9809e029023c84a3638a864a10b8819c8" + +[[package]] +name = "hmac" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deae6d9dbb35ec2c502d62b8f7b1c000a0822c3b0794ba36b3149c0a1c840dff" +dependencies = [ + "crypto-mac", + "digest 0.9.0", +] + [[package]] name = "http" version = "0.2.1" @@ -621,6 +696,7 @@ dependencies = [ "async-compression", "bzip2-sys", "cfg-if 1.0.0", + "cryptohelpers", "futures", "hyper", "lazy_static", @@ -632,6 +708,7 @@ dependencies = [ "rustc_version", "serde", "serde_cbor", + "sha2", "smallmap", "tokio", "toml", @@ -802,6 +879,48 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" +[[package]] +name = "openssl" +version = "0.10.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d575eff3665419f9b83678ff2815858ad9d11567e082f5ac1814baba4e2bcb4" +dependencies = [ + "bitflags", + "cfg-if 0.1.10", + "foreign-types", + "lazy_static", + "libc", + "openssl-sys", +] + +[[package]] +name = "openssl-sys" +version = "0.9.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a842db4709b604f0fe5d1170ae3565899be2ad3d9cbc72dedc789ac0511f78de" +dependencies = [ + "autocfg 1.0.1", + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "pbkdf2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170d73bf11f39b4ce1809aabc95bf5c33564cdc16fc3200ddda17a5f6e5e48b" +dependencies = [ + "base64", + "crypto-mac", + "hmac", + "rand 0.7.3", + "rand_core 0.5.1", + "sha2", + "subtle", +] + [[package]] name = "percent-encoding" version = "2.1.0" @@ -1229,6 +1348,19 @@ dependencies = [ "opaque-debug 0.3.0", ] +[[package]] +name = "sha2" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1" +dependencies = [ + "block-buffer 0.9.0", + "cfg-if 0.1.10", + "cpuid-bool", + "digest 0.9.0", + "opaque-debug 0.3.0", +] + [[package]] name = "signal-hook-registry" version = "1.2.1" @@ -1266,6 +1398,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "subtle" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "343f3f510c2915908f155e94f17220b19ccfacf2a64a2a5d8004f2c3e311e7fd" + [[package]] name = "syn" version = "1.0.42" @@ -1536,6 +1674,12 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7" +[[package]] +name = "vcpkg" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6454029bf181f092ad1b853286f23e2c507d8e8194d01d92da4a55c274a5508c" + [[package]] name = "version_check" version = "0.9.2" diff --git a/Cargo.toml b/Cargo.toml index aaad18f..56f9d4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,6 +68,8 @@ smallmap = "1.1.5" lazy_static = "1.4.0" once_cell = "1.4.1" bzip2-sys = {version = "0.1.9", optional = true} +cryptohelpers = {version = "1.5.1", features= ["sha256", "async"]} +sha2 = "0.9.1" [build-dependencies] rustc_version = "0.2" diff --git a/src/container.rs b/src/container.rs new file mode 100644 index 0000000..158ddef --- /dev/null +++ b/src/container.rs @@ -0,0 +1,9 @@ +//! Container for chain +use super::*; +use chain::{Chain, Chainable}; + +pub struct ChainContainer +{ + chain: Chain, + inputs_exact: dedup::HashRefSet<[T]>, +} diff --git a/src/dedup.rs b/src/dedup.rs new file mode 100644 index 0000000..c74331b --- /dev/null +++ b/src/dedup.rs @@ -0,0 +1,94 @@ +//! De-duplicating inputs +use super::*; +use std::{ + hash::{Hash, Hasher}, + collections::{HashSet, hash_set}, + marker::PhantomData, + borrow::Borrow, +}; +use sha2::{Sha256, Digest}; +use cryptohelpers::sha256::Sha256Hash; + +fn compute(thing: &T) -> Sha256Hash +{ + use std::mem::size_of; + struct Sha256Hasher(Sha256); + impl Hasher for Sha256Hasher + { + fn write(&mut self, bytes: &[u8]) + { + self.0.update(bytes); + } + fn finish(&self) -> u64 + { + let ar = self.0.clone().finalize(); + let mut rest = [0u8; size_of::()]; + bytes::copy_slice(&mut rest[..], &ar[..]); + u64::from_le_bytes(rest) + } + } + + let mut hasher = Sha256Hasher(Sha256::new()); + thing.hash(&mut hasher); + hasher.0.into() +} + +/// A HashSet that doesn't own its items. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HashRefSet(HashSet, PhantomData>); + +unsafe impl Send for HashRefSet{} +unsafe impl Sync for HashRefSet{} + +impl HashRefSet +{ + pub fn new() -> Self + { + Self(HashSet::new(), PhantomData) + } + pub fn with_capacity(cap: usize) -> Self + { + Self(HashSet::with_capacity(cap), PhantomData) + } +} + +impl util::NewCapacity for HashRefSet +{ + fn new() -> Self + { + Self::new() + } + + fn with_capacity(cap: usize) -> Self + { + Self::with_capacity(cap) + } +} + +impl HashRefSet +{ + pub fn insert(&mut self, value: &U) -> bool + where U: Borrow + { + self.0.insert(compute(value.borrow())) + } + pub fn remove(&mut self, value: &U) -> bool + where U: Borrow + { + self.0.remove(&compute(value.borrow())) + } + pub fn contains(&self, value: &U) -> bool + where U: Borrow + { + self.0.contains(&compute(value.borrow())) + } + pub fn len(&self) -> usize + { + self.0.len() + } + pub fn hashes(&self) -> hash_set::Iter<'_, Sha256Hash> + { + self.0.iter() + } +} + diff --git a/src/main.rs b/src/main.rs index 368668f..5765cbf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -79,6 +79,8 @@ mod save; mod forwarded_list; use forwarded_list::XForwardedFor; mod handle; +mod dedup; +mod container; mod feed; mod gen;