commit fe3381650e2f9589559c773489bdfc27a3efe095 Author: Avril Date: Mon Jun 22 20:28:05 2020 +0100 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..80aca69 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +Cargo.lock +*~ diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..7f02c50 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "kana-hash" +version = "0.1.0" +authors = ["Avril "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +crate-type = ["cdylib"] + +[dependencies] +sha2 = "0.9" +malloc-array = "1.3.3" +libc = "0.2" +crc = "1.8" \ No newline at end of file diff --git a/src/array.rs b/src/array.rs new file mode 100644 index 0000000..dc5f65c --- /dev/null +++ b/src/array.rs @@ -0,0 +1,16 @@ + + + +pub fn copy_slice(mut dst: D, src: S) -> usize +where T: Clone, + D: AsMut<[T]>, + S: AsRef<[T]> +{ + let mut i =0; + for (d,s) in dst.as_mut().iter_mut().zip(src.as_ref().iter()) + { + *d = s.clone(); + i+=1; + } + i +} diff --git a/src/def.rs b/src/def.rs new file mode 100644 index 0000000..7b7b5b6 --- /dev/null +++ b/src/def.rs @@ -0,0 +1,36 @@ +use std::ops::RangeInclusive; + +#[derive(Clone,Debug,PartialEq,Eq,Hash)] +pub enum Definition +{ + Single(RangeInclusive), + Any, + None, +} + +impl Definition +{ + + pub const fn single(idx: RangeInclusive) -> Self + { + Self::Single(idx) + } + pub const fn any() -> Self + { + Self::Any + } + pub const fn none() -> Self + { + Self::None + } + + pub fn contains(&self, sz: usize) -> bool + { + use Definition::*; + match self { + Single(range) => range.contains(&sz), + Any => true, + _ => false, + } + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..9ea0ef2 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,68 @@ +use std::{ + fmt, + io, + error, +}; + +#[derive(Debug)] +pub enum Error +{ + IO(io::Error), + Format(fmt::Error), + Length{expected: usize, got:usize,}, + Unknown, +} + +impl error::Error for Error +{ + fn source(&self) -> Option<&(dyn error::Error + 'static)> + { + match &self { + Error::IO(e_io) => Some(e_io), + Error::Format(e_fmt) => Some(e_fmt), + _ => None, + } + } +} + +impl fmt::Display for Error +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "kana-hash error: ")?; + match self { + Error::IO(io) => write!(f, "io: {}", io), + Error::Format(fmt) => write!(f, "fmt: {}", fmt), + Error::Length{expected, got} => write!(f, "invalid length: expected {}, got {}", expected, got), + _ => write!(f, "unknown failure"), + } + } +} + +impl From for i32 +{ + fn from(er: Error) -> Self + { + match er { + Error::IO(_) => 1, + Error::Format(_) => 2, + Error::Length{..} => 3, + _ => -1, + } + } +} + +impl From for Error +{ + fn from(i: io::Error) -> Self + { + Self::IO(i) + } +} +impl From for Error +{ + fn from(i: fmt::Error) -> Self + { + Self::Format(i) + } +} diff --git a/src/ffi.rs b/src/ffi.rs new file mode 100644 index 0000000..13e5100 --- /dev/null +++ b/src/ffi.rs @@ -0,0 +1,47 @@ +#![allow(unused_macros)] + +pub const GENERIC_ERROR: i32 = -1; +pub const GENERIC_SUCCESS: i32 = 0; + +macro_rules! c_try { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(e) => return i32::from(e), + } + } +} + +macro_rules! no_unwind { + (try $t:expr; $($e:tt)*) => { + { + match std::panic::catch_unwind(|| { + $($e)* + }) { + Ok(v) => i32::from(v), + Err(_) => return i32::from($t), + } + } + }; + ($($e:tt)*) => { + no_unwind! {try $crate::ffi::GENERIC_ERROR; $($e)*} + } + +} + + +macro_rules! string_from_ffi { + ($file:expr) => { + unsafe { + let file = $file; + if file.is_null() { + return $crate::ffi::GENERIC_ERROR; + } + let file = CStr::from_ptr(file); + match file.to_str() { + Ok(file) => file.to_owned(), + Err(_) => return $crate::ffi::GENERIC_ERROR, + } + } + } +} diff --git a/src/group.rs b/src/group.rs new file mode 100644 index 0000000..577a452 --- /dev/null +++ b/src/group.rs @@ -0,0 +1,57 @@ +/// Group iterator output every n values into `Vec`. +pub struct GroupIter +where I: Iterator +{ + buffer: Vec, + iter: I, + group_at: usize, +} + +impl Iterator for GroupIter +where I: Iterator +{ + type Item = Vec; + + fn next(&mut self) -> Option + { + while self.buffer.len() < self.group_at + { + if let Some(value) = self.iter.next() { + self.buffer.push(value) + } else { + return self.swap(); + } + } + self.swap() + } +} + +impl GroupIter +where I: Iterator +{ + fn swap(&mut self) -> Option> + { + let buff = { + let next = Vec::with_capacity(self.group_at); + std::mem::replace(&mut self.buffer, next) + }; + if buff.len() > 0 { + Some(buff) + } else { + None + } + } +} + +pub trait GroupExt: Iterator + Sized { + fn group_at(self, at: usize) -> GroupIter<::Item, Self> + { + GroupIter{ + group_at: at, + iter: self, + buffer: Vec::with_capacity(at), + } + } +} +impl GroupExt for T where T: Iterator{} + diff --git a/src/hash/crc32.rs b/src/hash/crc32.rs new file mode 100644 index 0000000..c14a44b --- /dev/null +++ b/src/hash/crc32.rs @@ -0,0 +1,41 @@ +use super::*; +use crc::{Hasher32, crc32}; + +pub struct Crc32Checksum +{ + hash: u32, +} + +impl provider::ByteProvider for Crc32Checksum +{ + fn bytes(&self) -> &[u8] + { + unsafe{reinterpret::bytes(&self.hash)} + } + + fn compute(input: &mut T, done: &mut usize) -> Result + { + let mut buffer = [0u8; BUFFER_SIZE]; + let mut hasher = crc32::Digest::new(crc32::IEEE); + let mut read; + while (read = input.read(&mut buffer[..])?, read!=0).1 + { + hasher.write(&buffer[..read]); + *done += read; + } + Ok(Self{hash: hasher.sum32()}) + } +} + +use std::fmt; +impl fmt::Display for Crc32Checksum +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "Crc32checksum (")?; + for b in provider::ByteProvider::bytes(self) { + write!(f, "{:02x}", *b)?; + } + write!(f, ")") + } +} diff --git a/src/hash/crc64.rs b/src/hash/crc64.rs new file mode 100644 index 0000000..2af4c3f --- /dev/null +++ b/src/hash/crc64.rs @@ -0,0 +1,41 @@ +use super::*; +use crc::{Hasher64, crc64}; + +pub struct Crc64Checksum +{ + hash: u64, +} + +impl provider::ByteProvider for Crc64Checksum +{ + fn bytes(&self) -> &[u8] + { + unsafe{reinterpret::bytes(&self.hash)} + } + + fn compute(input: &mut T, done: &mut usize) -> Result + { + let mut buffer = [0u8; BUFFER_SIZE]; + let mut hasher = crc64::Digest::new(crc64::ECMA); + let mut read; + while (read = input.read(&mut buffer[..])?, read!=0).1 + { + hasher.write(&buffer[..read]); + *done += read; + } + Ok(Self{hash: hasher.sum64()}) + } +} + +use std::fmt; +impl fmt::Display for Crc64Checksum +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "Crc64checksum (")?; + for b in provider::ByteProvider::bytes(self) { + write!(f, "{:02x}", *b)?; + } + write!(f, ")") + } +} diff --git a/src/hash/mod.rs b/src/hash/mod.rs new file mode 100644 index 0000000..dca4925 --- /dev/null +++ b/src/hash/mod.rs @@ -0,0 +1,10 @@ +use crate::*; + +mod sha256; +pub use sha256::*; + +mod crc64; +pub use crc64::*; + +mod crc32; +pub use crc32::*; diff --git a/src/hash/sha256.rs b/src/hash/sha256.rs new file mode 100644 index 0000000..3e259d3 --- /dev/null +++ b/src/hash/sha256.rs @@ -0,0 +1,83 @@ +use super::*; +use sha2::{Sha256, Digest}; +use std::{ + io::{ + self, + Read, + }, +}; + +pub const SHA256_SIZE: usize = 32; +#[repr(C)] +#[repr(packed)] +#[derive(Copy,Clone,Debug,PartialEq,Eq,Hash)] +pub struct Sha256Hash +{ + hash: [u8; SHA256_SIZE], +} + + +fn compute_stream(input: &mut T, output: &mut D) -> io::Result +{ + let mut buffer = [0u8; BUFFER_SIZE]; + + let mut read; + let mut done=0; + while (read = input.read(&mut buffer[..])?, read!=0).1 + { + output.update(&buffer[..read]); + done+=read; + } + Ok(done) +} + +impl Sha256Hash +{ + /// Compute a hash from a stream. + pub fn compute(input: &mut T) -> io::Result<(usize, Self)> + { + let mut hash = [0u8; SHA256_SIZE]; + + let mut hasher = Sha256::new(); + + let ok = compute_stream(input, &mut hasher)?; + + assert_eq!(array::copy_slice(&mut hash, hasher.finalize()), SHA256_SIZE); + Ok((ok, Self{hash})) + } + + pub fn bytes(&self) -> &[u8; SHA256_SIZE] + { + &self.hash + } +} + +use std::fmt; +impl fmt::Display for Sha256Hash +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + write!(f, "Sha256hash (")?; + for byte in self.hash.iter() + { + write!(f, "{:02x}", *byte)?; + } + write!(f, ")") + } +} + + +impl provider::ByteProvider for hash::Sha256Hash +{ + fn bytes(&self) -> &[u8] + { + &self.bytes()[..] + } + + fn compute(input: &mut T, done: &mut usize) -> Result + { + let (ok, this) = Self::compute(input)?; + *done = ok; + Ok(this) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..6034abc --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,104 @@ +#![allow(dead_code)] +use std::{ + io::{ + Read, + }, + fmt::Write, +}; + +type HASHER =hash::Crc64Checksum; + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn it_works() -> Result<(), error::Error> + { + let input = b"hello world!!"; + let kana = generate(input)?; + println!("kana: {}", kana); + panic!("uhh") + } +} + +pub const BUFFER_SIZE: usize = 4096; + +mod array; +mod reinterpret; +mod group; +mod sixteen; +use sixteen::Bit16IterExt; +mod def; +mod map; +mod hash; +mod provider; +mod mnemonic; +mod error; + +#[macro_use] +mod ffi; +use ffi::*; + +fn compute(mut from: T) -> Result<(usize, String), error::Error> +{ + let (read, hash) = provider::compute::<_, Digest>(&mut from)?; + + println!("hash ({}): {}", read, hash); + let mut output = String::with_capacity(128); + for element in hash.bytes().iter() + .into_16() + .map(|bytes| mnemonic::Digest::new(unsafe{reinterpret::bytes(&bytes)})) + { + write!(output, "{}", element)?; + } + + Ok((read,output)) +} + +pub fn generate>(bytes: T) -> Result +{ + let bytes = bytes.as_ref(); + let mut nbytes = bytes; + let (ok, string) = compute::<_, HASHER>(&mut nbytes)?; + if ok == bytes.len() { + Ok(string) + } else { + return Err(error::Error::Length{expected: bytes.len(), got: ok}); + } +} + +use std::ffi::c_void; +use libc::{ + size_t, + c_char, +}; + +use malloc_array::{ + HeapArray, +}; + +#[no_mangle] +pub unsafe extern "C" fn _kana_length(bin: *const c_void, sz: size_t, out_len: *mut size_t) -> i32 +{ + no_unwind!{ + try error::Error::Unknown; + let bin = HeapArray::::from_raw_copied(bin as *const u8, usize::from(sz)); + let string = c_try!(generate(&bin)); + *out_len = (string.bytes().len()+1).into(); + + GENERIC_SUCCESS + } +} +#[no_mangle] +pub unsafe extern "C" fn _kana_do(bin: *const c_void, sz: size_t, out_str: *mut c_char, str_len: size_t) -> i32 +{ + no_unwind!{ + try error::Error::Unknown; + let bin = HeapArray::::from_raw_copied(bin as *const u8, usize::from(sz)); + let string: Vec = c_try!(generate(&bin)).bytes().collect(); + + libc::memcpy(out_str as *mut c_void, &string[0] as *const u8 as *const c_void, std::cmp::min(str_len, string.len())); + + GENERIC_SUCCESS + } +} diff --git a/src/map.rs b/src/map.rs new file mode 100644 index 0000000..c5c7b23 --- /dev/null +++ b/src/map.rs @@ -0,0 +1,92 @@ +pub const KANA: &[char; 92] = &[ + 'あ', 'い', 'う', 'え', 'お', + 'か', 'き', 'く', 'け', 'こ', + 'さ', 'し', 'す', 'せ', 'そ', + 'た', 'ち', 'つ', 'て', 'と', + 'な', 'に', 'ぬ', 'ね', 'の', + 'は', 'ひ', 'ふ', 'へ', 'ほ', + 'ま', 'み', 'む', 'め', 'も', + 'ら', 'り', 'る', 'れ', 'ろ', + 'や', 'ゆ', 'よ', 'わ', 'ん', + 'を', //45 + 'ア', 'イ', 'ウ', 'エ', 'オ', + 'カ', 'キ', 'ク', 'ケ', 'コ', + 'サ', 'シ', 'ス', 'セ', 'ソ', + 'タ', 'チ', 'ツ', 'テ', 'ト', + 'ナ', 'ニ', 'ヌ', 'ネ', 'ノ', + 'ハ', 'ヒ', 'フ', 'ヘ', 'ホ', + 'マ', 'ミ', 'ム', 'メ', 'モ', + 'ラ', 'リ', 'ル', 'レ', 'ロ', + 'ヤ', 'ユ', 'ヨ', + 'ワ', 'ン', 'ヲ', +]; +pub const KANA_SUB: &[char; 18] = &[ + 'ゃ', + 'ゅ', + 'ょ', + 'ャ', + 'ュ', + 'ョ', + 'っ', + 'ッ', + 'ぁ','ぃ','ぅ','ぇ','ぉ', + 'ァ','ィ','ゥ','ェ','ォ', +]; + +use crate::def::Definition; + +pub const KANA_SUB_VALID_FOR: &[Definition; 18] = &[ // Should we properly restrict these to only ones that make sense? (i.e. KI SHI HI etc..) + Definition::single(5..=39), + Definition::single(5..=39), + Definition::single(5..=39), + Definition::single(51..=85), + Definition::single(51..=85), + Definition::single(51..=85), + Definition::any(), + Definition::any(), + Definition::single(5..=39), + Definition::single(5..=39), + Definition::single(5..=39), + Definition::single(5..=39), + Definition::single(5..=39), + Definition::single(51..=85), + Definition::single(51..=85), + Definition::single(51..=85), + Definition::single(51..=85), + Definition::single(51..=85), +]; + +/// Find all subs that are okay for this kana. If `kana` is not in `KANA`, return None. +pub fn find_sub(kana: char) -> Option> +{ + for (i,x) in (0..(KANA.len())).zip(KANA.iter()) { + if *x == kana { + let mut output = Vec::with_capacity(KANA_SUB.len()); + for (def,sub) in KANA_SUB_VALID_FOR.iter().zip(KANA_SUB.iter()) + { + if def.contains(i) { + output.push(sub.clone()); + } + } + return Some(output); + } + } + None +} + +/// Find subs by index. +pub fn sub(i: usize) -> Option> +{ + if i < KANA.len() { + let mut output = Vec::with_capacity(KANA_SUB.len()); + for (def,sub) in KANA_SUB_VALID_FOR.iter().zip(KANA_SUB.iter()) + { + if def.contains(i) { + output.push(sub.clone()); + } + } + Some(output) + } else { + None + } +} diff --git a/src/mnemonic.rs b/src/mnemonic.rs new file mode 100644 index 0000000..613e37c --- /dev/null +++ b/src/mnemonic.rs @@ -0,0 +1,59 @@ +use crate::*; + +#[derive(Debug,Clone,PartialEq,Eq,Hash)] +pub struct Digest(Option, Option); + +impl Default for Digest +{ + fn default() -> Self + { + Self(None,None) + } +} + +impl Digest { + /// Create new single 2-byte digest. + pub fn new(from: &[u8]) -> Self + { + let mut d = Self::default(); + + let master = usize::from(from[0]) % map::KANA.len(); + d.0 = Some(map::KANA[master]); + if from[1] > 0 { + if let Some(slaves) = map::sub(master) { + if slaves.len() > 0 { + d.1 = Some(slaves[usize::from(from[1]) % slaves.len()]); + return d; + } + } + let from = [from[1]]; + d.1 = Self::new(&from[..]).0; + } + d + /*let master = usize::from(from) % map::KANA.len(); + d.0 = Some(map::KANA[master]); + if let Some(slaves) = map::sub(master) { + if slaves.len() > 0 { + } + } else { + + } + + return d;*/ + } +} + +use std::fmt; +impl fmt::Display for Digest +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result + { + if let Some(master) = self.0 { + write!(f, "{}", master)?; + } + if let Some(slave) = self.1 { + write!(f, "{}", slave)?; + } + Ok(()) + } +} diff --git a/src/provider.rs b/src/provider.rs new file mode 100644 index 0000000..9f37eb4 --- /dev/null +++ b/src/provider.rs @@ -0,0 +1,15 @@ +use crate::*; + +pub trait ByteProvider: Sized + std::fmt::Display +{ + fn compute(input: &mut T, provided: &mut usize) -> Result; + fn bytes(&self) -> &[u8]; +} + + +pub fn compute(input: &mut T) -> Result<(usize, P), error::Error> +{ + let mut output = 0usize; + let this = P::compute(input, &mut output)?; + Ok((output, this)) +} diff --git a/src/reinterpret.rs b/src/reinterpret.rs new file mode 100644 index 0000000..1441d45 --- /dev/null +++ b/src/reinterpret.rs @@ -0,0 +1,13 @@ + + +pub unsafe fn bytes<'a, T>(src: &'a T) -> &'a [u8] + where T: ?Sized +{ + std::slice::from_raw_parts(src as *const T as *const u8, std::mem::size_of_val(src)) +} + +pub unsafe fn bytes_mut<'a, T>(src: &'a mut T) -> &'a mut [u8] + where T: ?Sized +{ + std::slice::from_raw_parts_mut(src as *mut T as *mut u8, std::mem::size_of_val(src)) +} diff --git a/src/sixteen.rs b/src/sixteen.rs new file mode 100644 index 0000000..32d2364 --- /dev/null +++ b/src/sixteen.rs @@ -0,0 +1,49 @@ +use std::borrow::Borrow; + +/// Iter that converts 2 `u8`s into 1 `u16` +pub struct Bit16Iter +where I: Iterator, +::Item: Borrow +{ + iter: I, +} + +impl Iterator for Bit16Iter +where I: Iterator, +::Item: Borrow +{ + type Item = u16; + fn next(&mut self) -> Option + { + let mut c = 0u16; + unsafe { + if let Some(a) = self.iter.next() { + crate::reinterpret::bytes_mut(&mut c)[0] = *a.borrow(); + } else { + return None; + } + if let Some(b) = self.iter.next() { + crate::reinterpret::bytes_mut(&mut c)[1] = *b.borrow(); + } + } + Some(c) + } +} + +pub trait Bit16IterExt: Iterator + Sized +where ::Item: Borrow +{ + fn into_16(self) -> Bit16Iter; +} + +impl Bit16IterExt for I +where I: Iterator, +::Item: Borrow +{ + fn into_16(self) -> Bit16Iter + { + Bit16Iter{ + iter: self + } + } +}