From 75e31beacccb72ad195a967567f1c0a2d27c782f Mon Sep 17 00:00:00 2001 From: Avril Date: Mon, 19 Oct 2020 17:29:17 +0100 Subject: [PATCH] update readme --- Cargo.toml | 3 +++ README.md | 26 ++++++++++++++++++++++++-- build.rs | 24 ++++++++++++++++++++++++ src/lib.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 build.rs diff --git a/Cargo.toml b/Cargo.toml index 021b34a..34adfb8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,6 @@ serde = {version = "1.0", optional = true, features=["derive"]} [dev-dependencies] serde_json = "1.0" serde_cbor = "0.11.1" + +[build-dependencies] +rustc_version = "0.2" diff --git a/README.md b/README.md index 7af2e23..193d529 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,34 @@ It can be used to "mark" items without the need to transfer ownership to the map } } ``` -# Serialisation support with `serde` crate + # Serialisation support with `serde` crate `HashRefSet` and `HashType` both implement `Serialize` and `Deserialize` from the `serde` crate if the `serde` feature is enabled. By default it is not. -# Drawbacks + # Hashing + We use the SHA512 hashing algorithm for the implementation at present. + I may implement the ability to choose different types, but as of now I think it is sufficient. + + # Drawbacks Since the item is not inserted itself, we cannot use `Eq` to double check there was not a hash collision. While the hashing algorithm used (Sha512) is extremely unlikely to produce collisions, especially for small data types, keep in mind that it is not infallible. +## Speed +`HashRefSet` is significantly slower than `HashSet`, so `HashSet` should be preferred in most cases. +Even when `Clone` is required to insert into `HashSet`, it can be ~10x faster for trivial data structures. +`HashRefSet` should be used if `Clone` is either not an option, or `Clone` is a significantly heavy operation on the type you're inserting. + +| Benchmark | Tests | Result | +|--------------------|-----------------------------------------------|-----------------| +| owning_strings | Inserts `String` into `HashSet` by cloning | ~4,538 ns/iter | +| non_owning_strings | Inserts `str` into `HashRefSet` by reference | ~48,271 ns/iter | +| owning_ints | Inserts `u32` into `HashSet` by copy | ~937 ns/iter | +| non_owning_ints | Inserts `&u32` into `HashRefSet` by reference | ~31,089 ns/iter | + +# When to use over `HashSet` +* The type you're inserting needs to be both in the set and moved elsewhere. (see exmaple) +* Simply using `Clone` to insert a copy of the item into a `HashSet` is not possible (non-`Clone` type) or is a significantly heavy operation. (see benchmarks) +* The fallibility of potential (albeing extremely unlikely) collisions of the SHA512 algorithm is not a concern +* You need to insert an unsized type into a `HashSet` + # License MIT diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..6399463 --- /dev/null +++ b/build.rs @@ -0,0 +1,24 @@ + +extern crate rustc_version; +use rustc_version::{version, version_meta, Channel}; + +fn main() { + // Assert we haven't travelled back in time + assert!(version().unwrap().major >= 1); + + // Set cfg flags depending on release channel + match version_meta().unwrap().channel { + Channel::Stable => { + println!("cargo:rustc-cfg=stable"); + } + Channel::Beta => { + println!("cargo:rustc-cfg=beta"); + } + Channel::Nightly => { + println!("cargo:rustc-cfg=nightly"); + } + Channel::Dev => { + println!("cargo:rustc-cfg=dev"); + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 9740dc7..efa91c2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,10 @@ //! # Drawbacks //! Since the item is not inserted itself, we cannot use `Eq` to double check there was not a hash collision. //! While the hashing algorithm used (Sha512) is extremely unlikely to produce collisions, especially for small data types, keep in mind that it is not infallible. + +#![cfg_attr(nightly, feature(test))] +#[cfg(nightly)] extern crate test; + use std::{ collections::{ hash_set, @@ -211,4 +215,52 @@ mod tests assert!(refset.insert("none")); assert!(!refset.insert("two")); } + + #[cfg(nightly)] + mod benchmarks + { + use test::{black_box, Bencher}; + const STRINGS: &str = "leo vel fringilla est ullamcorper eget nulla facilisi etiam dignissim diam quis enim lobortis scelerisque fermentum dui faucibus in ornare quam viverra orci sagittis eu volutpat odio facilisis mauris sit amet massa vitae tortor condimentum lacinia quis vel eros donec ac odio tempor orci dapibus ultrices in iaculis nunc sed augue lacus viverra vitae congue eu consequat ac felis donec et odio pellentesque diam volutpat commodo sed egestas egestas fringilla phasellus faucibus scelerisque eleifend donec pretium vulputate sapien nec sagittis aliquam malesuada bibendum arcu vitae elementum curabitur vitae nunc sed velit dignissim sodales ut eu sem integer vitae justo eget"; + const INTS: &[u32] = &[182,248,69,225,164,219,73,122,14,205,148,221,24,107,209,83,210,87,148,249,234,181,217,154,180,240,132,145,208,15,77,4,117,16,43,1,95,49,150,18,207,161,107,216,215,100,76,198,43,21,99,177,77,28,29,172,117,136,151,96,66,208,244,138,90]; + + #[bench] fn non_owning_strings(b: &mut Bencher) + { + let strings: Vec = STRINGS.split(char::is_whitespace).map(ToOwned::to_owned).collect(); + let mut map = super::HashRefSet::new(); + b.iter(|| { + for string in strings.iter() { + black_box(map.insert(string.as_str())); + } + }) + } + #[bench] fn owning_strings(b: &mut Bencher) + { + let strings: Vec = STRINGS.split(char::is_whitespace).map(ToOwned::to_owned).collect(); + let mut map = std::collections::HashSet::new(); + b.iter(|| { + for string in strings.iter() { + black_box(map.insert(string.clone())); //clone is needed here :/ + } + }) + } + + #[bench] fn non_owning_ints(b: &mut Bencher) + { + let mut map = super::HashRefSet::new(); + b.iter(|| { + for int in INTS.iter() { + black_box(map.insert(int)); + } + }) + } + #[bench] fn owning_ints(b: &mut Bencher) + { + let mut map = std::collections::HashSet::new(); + b.iter(|| { + for int in INTS.iter() { + black_box(map.insert(int)); + } + }) + } + } }