From 80246388cd9523c085920608bec16af70b956019 Mon Sep 17 00:00:00 2001 From: Avril Date: Thu, 25 Jun 2020 14:01:59 +0100 Subject: [PATCH] fix bad index wrapping; add bench; add dist --- Makefile | 3 +- README.org | 11 ++-- src/lib.rs | 169 +++++++++++++++++++++++++++++++++++++++++++++++- src/map.rs | 6 +- src/mnemonic.rs | 12 ++-- 5 files changed, 188 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 4494a97..f495983 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,8 @@ khash-nonative: cd $(CLI) && $(MAKE) kana-hash test: - cargo test + RUSTFLAGS="-C target-cpu=native -C opt-level=3" cargo test + RUSTFLAGS="-C target-cpu=native -C opt-level=3" cargo bench cd $(CLI) && $(MAKE) install: diff --git a/README.org b/README.org index e2bf7eb..abd606a 100644 --- a/README.org +++ b/README.org @@ -235,11 +235,12 @@ The kana algorithm is a 16-bit block digest that works as follows: - Stage 0: 1. The byte is sign tested (bitwise ~AND~ =0x80=), store this as a boolean in /sign0/. 2. The valid first character range is looked up using the result of the sign test (either 0 or 1), store the range in /range/, and the slice ~KANA~ taken from the range in /kana/. - 3. The first index is calculated as the unsigned first byte modulo the size (exclusive) of /range/. Store this as /index0/. - 4. The swap table is checked to see if /index0/ has an entry. Then each following step is checked in order: - + If the swap entry exists and the first byte bitwise ~AND~ =0x2= is not 0, set the first character of the output to the value found in the swap table. - + If the swap entry exists and the first byte bitwise ~AND~ =0x8= is not 0 and the index has an entry in the 2nd swap table, set the first character of the output to the value found in the 2nd swap table. - + In any other case, set the first character of the output to the value found in the /kana/ slice at the /index/. + 3. The first index is calculated as the unsigned first byte modulo the size (exclusive) of /range/. Store this as /index/. + 4. Compute the value of the first byte bitwise ~XOR~ the second byte, store this as /index1/. + 5. The swap table is checked to see if /index/ + start of /range/ has an entry. Then each following step is checked in order: + + If the swap entry exists and /index1/ bitwise ~AND~ =0x2= is =0=, set the first character of the output to the value found in the swap table. + + If the swap entry exists and /index1/ bitwise ~AND~ =0x8= is =0= and /index/ + start of /range/ has an entry in the 2nd swap table, set the first character of the output to the value found in the 2nd swap table. + + In any other case, set the first character of the output to the value found in the /kana/ slice at /index/. - Stage 1: 1. Compute a sub table for /index/ plus the start of /range/ using the ranges defined in ~KANA_SUB_VALID_FOR~ and store it in /sub/. If there is no sub table possible, skip to step 3. 2. If there is an entry in /sub/ for the index of the 2nd byte modulo the size of ~KANA_SUB~, set the second output character to be that character. diff --git a/src/lib.rs b/src/lib.rs index dfd0217..1f3e55a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ //#![feature(const_generics)] +#![feature(test)] #![allow(dead_code)] use std::{ io::{ @@ -7,11 +8,177 @@ use std::{ fmt::Write, }; +extern crate test; + type HASHER = hash::Crc64Checksum; #[cfg(test)] mod tests { use super::*; + use std::collections::HashMap; + use test::{Bencher, black_box,}; + + #[bench] + fn speed_sha256t(b: &mut Bencher) + { + const ITERATIONS: usize = 1; + + let context = ctx::Context::new(ctx::Algorithm::Sha256Truncated, salt::Salt::random().unwrap()); + const INPUT: &'static [u8] = b"owo uwu owo uwuw owuw ua eaowi oiho aido iahs doi ajosidj aoi"; + b.iter(|| { + for _x in 0..ITERATIONS + { + black_box(generate(&context, INPUT)).unwrap(); + } + }); + } + #[bench] + fn speed_sha256(b: &mut Bencher) + { + const ITERATIONS: usize = 1; + + let context = ctx::Context::new(ctx::Algorithm::Sha256, salt::Salt::random().unwrap()); + const INPUT: &'static [u8] = b"owo uwu owo uwuw owuw ua eaowi oiho aido iahs doi ajosidj aoi"; + b.iter(|| { + for _x in 0..ITERATIONS + { + black_box(generate(&context, INPUT)).unwrap(); + } + }); + } + #[bench] + fn speed_crc64(b: &mut Bencher) + { + const ITERATIONS: usize = 1; + + let context = ctx::Context::new(ctx::Algorithm::Crc64, salt::Salt::random().unwrap()); + const INPUT: &'static [u8] = b"owo uwu owo uwuw owuw ua eaowi oiho aido iahs doi ajosidj aoi"; + b.iter(|| { + for _x in 0..ITERATIONS + { + black_box(generate(&context, INPUT)).unwrap(); + } + }); + } + #[bench] + fn speed_crc32(b: &mut Bencher) + { + const ITERATIONS: usize = 1; + + let context = ctx::Context::new(ctx::Algorithm::Crc32, salt::Salt::random().unwrap()); + const INPUT: &'static [u8] = b"owo uwu owo uwuw owuw ua eaowi oiho aido iahs doi ajosidj aoi"; + b.iter(|| { + for _x in 0..ITERATIONS + { + black_box(generate(&context, INPUT)).unwrap(); + } + }); + } + + #[test] + fn distrubution() + { + const THREADS: usize = 10; + const ITERATIONS: usize = 1000; + + use std::{ + sync::{ + Arc, + Mutex, + }, + thread, + }; + let global = Arc::new(Mutex::new(HashMap::with_capacity(map::KANA.len()+map::KANA_SUB.len()))); + + let _ = { + let mut global = global.lock().unwrap(); + for init_c in map::KANA.iter().chain(map::KANA_SUB.iter()) + { + global.insert(*init_c, 0); + } + for init_c in map::KANA_SWAP.iter().chain(map::KANA_SWAP2.iter()) + { + if let &Some(init_c) = init_c { + global.insert(init_c, 0); + } + } + }; + + fn do_work(num: usize, global: Arc>>, mut local: HashMap) + { + let mut random_buffer = [0u8; 4096]; + let context = ctx::Context::new(ctx::Algorithm::Sha256, salt::Salt::none()); + for _ in 0..num + { + getrandom::getrandom(&mut random_buffer[..]).unwrap(); + let kana = generate(&context, &random_buffer[..]).unwrap(); + for c in kana.chars() + { + *local.get_mut(&c).unwrap() += 1; + } + } + + let mut global = global.lock().unwrap(); + for (k,v) in local.into_iter() + { + *global.get_mut(&k).unwrap() += v; + } + } + + let joiners: Vec> = { + let lock = global.lock().unwrap(); + + (0..THREADS).map(|_| { + let global = Arc::clone(&global); + let local = lock.clone(); + thread::spawn(move || { + do_work(ITERATIONS, global, local); + }) + }).collect() + }; + + for x in joiners.into_iter() + { + x.join().unwrap(); + } + + println!("Running {} x {} ({}) hashes (sha256)", ITERATIONS, THREADS, (ITERATIONS*THREADS)); + let global = global.lock().unwrap(); + let mut lowest = usize::MAX; + let mut highest = 0; + + let mut lowest_char = '.'; + let mut highest_char = '.'; + const FMAX: f64 = (ITERATIONS*THREADS) as f64; + + let global = { + let mut out = Vec::with_capacity(global.len()); + for (&k, &v) in global.iter() + { + out.push((k, v)); + } + out.sort_by(|b, a| a.1.partial_cmp(&b.1).unwrap()); + out.into_iter() + }; + + for (k, v) in global + { + println!("{} -> {} ({}%)", k, v, ((v as f64)/FMAX)*100.00); + if v < lowest { + lowest = v; + lowest_char = k; + } + if v > highest { + highest = v; + highest_char = k; + } + } + println!("Lowest was '{}' {} ({}%)", lowest_char, lowest, ((lowest as f64)/FMAX)*100.00); + println!("Highest was '{}' {} ({}%)", highest_char, highest, ((highest as f64)/FMAX)*100.00); + println!("Range was {}", highest-lowest); + assert!(lowest > 0); + } + #[test] fn it_works() -> Result<(), error::Error> { @@ -20,7 +187,7 @@ mod tests { let kana = generate(&context, input)?; println!("kana: {}", kana); - assert_eq!(kana, "もッちゅゆをヌョ"); + assert_eq!(kana, "もシちゅゆをヌョ"); Ok(()) } #[test] diff --git a/src/map.rs b/src/map.rs index 393cfed..e10d75a 100644 --- a/src/map.rs +++ b/src/map.rs @@ -97,8 +97,10 @@ pub const KANA_SUB_VALID_FOR: &[Definition; 18] = &[ // Should we properly restr Definition::single(51..=85), Definition::single(51..=85), Definition::single(51..=85), - Definition::any(), - Definition::any(), + + Definition::single(0..=45), + Definition::single(46..=91), + Definition::single(5..=39), Definition::single(5..=39), Definition::single(5..=39), diff --git a/src/mnemonic.rs b/src/mnemonic.rs index 9b53ae8..9127978 100644 --- a/src/mnemonic.rs +++ b/src/mnemonic.rs @@ -24,10 +24,14 @@ impl Digest { let range = &map::KANA_SIGN[sign0 as usize]; let kana = &map::KANA[range.clone()]; let oneesan = usize::from(from[0]) % kana.len(); - - d.0 = Some(match map::KANA_SWAP[oneesan] { - Some(swap) if (from[0] & 0x2) == 0x2 => swap, - Some(_) if (from[0] & 0x8) == 0x8 && map::KANA_SWAP2[oneesan].is_some() => map::KANA_SWAP2[oneesan].unwrap(), + let xor = if from.len() > 1 { + from[0] ^ from[1] + } else { + from[0] + } as u32; + d.0 = Some(match map::KANA_SWAP[range.start()+oneesan] { + Some(swap) if xor & 2 == 0 => swap, + Some(_) if xor & 8 == 0 && map::KANA_SWAP2[range.start() + oneesan].is_some() => map::KANA_SWAP2[range.start()+oneesan].unwrap(), _ => kana[oneesan], }); if from.len() > 1 {