From eefb716ac26b026e039f029e3672a13eb654414d Mon Sep 17 00:00:00 2001
From: Avril <flanchan@cumallover.me>
Date: Thu, 25 Jun 2020 10:50:17 +0100
Subject: [PATCH] khash_max_length; describe algorithm in README

---
 Makefile        |  3 +++
 README.org      | 34 +++++++++++++++++++++++++++-------
 cli/src/main.c  |  4 ++--
 include/khash.h | 17 ++++++++++-------
 src/lib.rs      | 48 ++++++++++++++++++++++++++++++++++++++++--------
 5 files changed, 82 insertions(+), 24 deletions(-)

diff --git a/Makefile b/Makefile
index dd616b8..e38a1ec 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,6 @@
 INSTALL:= /usr/local/lib
 INSTALL-BIN:= /usr/local/bin
+INSTALL-INCLUDE:=/usr/local/include
 CLI:= cli
 
 
@@ -19,7 +20,9 @@ test:
 install:
 	cp -f ./target/release/libkhash.so $(INSTALL)/libkhash.so
 	cp -f $(CLI)/build/kana-hash $(INSTALL-BIN)/kana-hash
+	cp -f include/khash.h $(INSTALL-INCLUDE)/khash.h
 
 uninstall:
 	rm -f $(INSTALL)/libkana_hash.so
 	rm -f $(INSTALL-BIN)/kana-hash
+	rm -f $(INSTALL-INCLUDE)/khash.h
diff --git a/README.org b/README.org
index 4c8f8f5..c5bd36b 100644
--- a/README.org
+++ b/README.org
@@ -137,6 +137,7 @@
      | ~khash_clone_context~ | /src/, /dst/                                   | Clone a context allocated with ~khash_new_context()~ into another. The newly allocated /dst/ must be properly released (with ~khash_free_context()~ or ~khash_do()~) as well as the source. /src/ is expected to be a valid pointer to an allocated context, and /dst/ is expected to be a valid pointer to an unallocated context.                                                                                                              |
      | ~khash_length~        | /ctx/, /data/, /size/, /length/                | Compute the length required to hold the output string for ~khash_do()~ for a given input. Will read exactly /size/ bytes from /data/ and compute the value into what is pointed to by /length/ (which is expected to be a valid pointer to a type of ~size_t~.) The resulting length does not include a =NUL= terminator for the string.                                                                                                         |
      | ~khash_do~            | /ctx/, /data/, /size/, /output/, /output_size/ | Compute the kana-hash of /size/ bytes from /data/ and store no more than /output_size/ of the the result into the string pointed to by /output/. Each pointer is expected to be valid. This function frees the supplied /ctx/ after the hash has been computed, and thus /ctx/ is no longer valid afterwards.                                                                                                                                    |
+     | ~khash_max_length~    | /algo/, /input_len/, /output_len/              | Calculate the max possible size for the given algorithm (expected to be one of the =KHASH_ALGO_= macros) and input length, and store this result in /output_len/ (expected to be a valid non-~NULL~ pointer.) /input_len/ is not required unless the algorithm is dynamically sized (all currently implemented ones are not.)                                                                                                                     |
 
 ** Node FFI bindings
    NPM package in [[file:./node/index.js][./node]]
@@ -157,31 +158,31 @@
 
 **** Create a context
      Create the context by specifying an algorithm identifier, and an optional salt.
-     If provided, the salt must be of type `Salt`.
+     If provided, the salt must be of type ~Salt~.
      #+BEGIN_SRC javascript
   const ctx = new hash.Kana(hash.Kana.ALGO_DEFAULT, new hash.Salt("optional salt~"));
      #+END_SRC
 
 **** Create a hash
-     The `once()` function consumes the context and outputs a hash string.
+     The ~once()~ function consumes the context and outputs a hash string.
      #+BEGIN_SRC javascript
   const output = ctx.once("input string");
      #+END_SRC
 
 ***** Creating a hash without consuming
-      If you want to reuse the context, use the `hash()` function.
+      If you want to reuse the context, use the ~hash()~ function.
       #+BEGIN_SRC javascript
   const output = ctx.hash("input string");
       #+END_SRC
 
 ***** Freeing the context
-      The context must be release after use if you have not called `once()`.
+      The context must be release after use if you have not called ~once()~.
       #+BEGIN_SRC javascript
   ctx.finish();
       #+END_SRC
 
 ***** Cloning an existing context
-      The new context must also be freed with either `once()` or `finish()`.
+      The new context must also be freed with either ~once()~ or ~finish()~.
       #+BEGIN_SRC javascript
   const new_ctx = ctx.clone();
       #+END_SRC
@@ -209,7 +210,26 @@
 ** Notes
    The strings generated by this library are meant to be pretty, not secure. It is not a secure way of representing a hash as many collisions are possible.
 
-*** TODO Digest algorithm
-
+*** Digest algorithm
+    The kana algorithm is a 16-bit block digest that works as follows:
+     - The most and least significant 8 bits are each seperated into /Stage 0/ and /Stage 1/ each operating on the first and second byte respectively.
+     - Stage 0:
+       1. The byte is sign tested (bitwise ~AND~ =0x80=), store this as a boolean in /sign0/.
+       2. The valid first character range is looked up using the result of the sign test (either 0 or 1), store the range in /range/, and the slice ~KANA~ taken from the range in /kana/.
+       3. The first index is calculated as the unsigned first byte modulo the size (exclusive) of /range/. Store this as /index0/.
+       4. The swap table is checked to see if /index0/ has an entry. Then each following step is checked in order:
+	  + If the swap entry exists and the first byte bitwise ~AND~ =0x2= is not 0, set the first character of the output to the value found in the swap table.
+	  + If the swap entry exists and the first byte bitwise ~AND~ =0x8= is not 0 and the index has an entry in the 2nd swap table, set the first character of the output to the value found in the 2nd swap table.
+	  + In any other case, set the first character of the output to the value found in the /kana/ slice at the /index/.
+     - Stage 1:
+       1. Compute a sub table for /index/ plus the start of /range/ using the ranges defined in ~KANA_SUB_VALID_FOR~ and store it in /sub/. If there is no sub table possible, skip to step 3.
+       2. If there is an entry in /sub/ for the index of the 2nd byte modulo the size of ~KANA_SUB~, set the second output character to be that character.
+       3. If there was no value set from the sub table, the 2nd output character becomes the first output character from inputting the 2nd byte back through /Stage 0/ as the first byte.
+     - Concatenate both characters and move to the next 16-bit block.
+
+    Notes:
+     - It is valid for a single iterator to produce between 0 and 2 characters but no more.
+     - If an input given to the algorithm that cannot be divided exactly into 16-bit blocks (i.e. one byte is left over), a padding byte of 0 is added as the 2nd byte to make it fit.
+    For more information see [[file:./src/mnemonic.rs][mnemonic.rs]].  
 ** License
    GPL'd with love <3
diff --git a/cli/src/main.c b/cli/src/main.c
index a94d4f8..a306ff0 100644
--- a/cli/src/main.c
+++ b/cli/src/main.c
@@ -18,11 +18,11 @@ int main(void)
   assert(khash_new_context(KHASH_ALGO_SHA256, KHASH_SALT_TYPE_NONE, NULL, 0, &ctx) == KHASH_SUCCESS);
   printf("salt: %d\n", (int)ctx.salt.size);
   size_t length;
-  assert(khash_length(&ctx, string, strlen(string), &length) == KHASH_SUCCESS);
+  assert(khash_max_length(KHASH_ALGO_SHA256, strlen(string), &length) == KHASH_SUCCESS);
   printf("length: %d\n", (int)length);
   char* output = alloca(length+1);
+  memset(output,0,length+1);
   assert(khash_do(&ctx, string, strlen(string), output,length) == KHASH_SUCCESS);
-  output[length] = 0;
   printf("output: %s\n", output);
   return 0;
 }
diff --git a/include/khash.h b/include/khash.h
index 5ed2782..2c153a6 100644
--- a/include/khash.h
+++ b/include/khash.h
@@ -57,13 +57,8 @@ extern "C" {
   /// Unknown error
 #define KHASH_ERROR_UNKNOWN ((int32_t)-1)
 
-  /// Create a new salt. `salt_type` is expected to be one of the above defined `KHASH_SALT_TYPE_*` macros.
-  /// Depending on the type, `data` may be `NULL`.
-  extern int32_t khash_new_salt(uint8_t salt_type, const void* data, size_t size, khash_salt* output) _deprecated("Use `khash_new_context` instead."); 
-  /// Free a salt allocated with `khash_new_salt`. It is okay to call this multiple times.
-  extern int32_t khash_free_salt(khash_salt* salt) _deprecated("Use `khash_free_context` instead.");
-  /// Clone a salt allocated with `khash_new_salt`.
-  extern int32_t khash_clone_salt(const khash_salt* src, khash_salt* dst) _deprecated("Use `khash_close_context` instead."); 
+  /// Find the maximum possible digest output size for the given algorithm and input length, and set that to `digest_length`. If the algorithm's output is not dynamically sized `input_length` does not need to be provided.
+  extern int32_t khash_max_length(uint8_t algo, size_t input_length, size_t* digest_length);
 
   /// Create a new context with the specified algorithm (one of the `KHASH_ALGO_*` macro constants), salt type (one of the `KHASH_SALT_TYPE_*` constants), optional salt `data` and salt length `size`, and output pointer `output`.
   /// `data` may be `NULL` if the corresponding `salt_type` does not require an input.
@@ -80,6 +75,14 @@ extern "C" {
   /// This function takes ownership of and frees `context` after it has been called.
   extern int32_t khash_do(khash_ctx* context, const void* data, size_t size, char* string, size_t strlen);
 
+  /// Create a new salt. `salt_type` is expected to be one of the above defined `KHASH_SALT_TYPE_*` macros.
+  /// Depending on the type, `data` may be `NULL`.
+  extern int32_t khash_new_salt(uint8_t salt_type, const void* data, size_t size, khash_salt* output) _deprecated("Use `khash_new_context` instead."); 
+  /// Free a salt allocated with `khash_new_salt`. It is okay to call this multiple times.
+  extern int32_t khash_free_salt(khash_salt* salt) _deprecated("Use `khash_free_context` instead.");
+  /// Clone a salt allocated with `khash_new_salt`.
+  extern int32_t khash_clone_salt(const khash_salt* src, khash_salt* dst) _deprecated("Use `khash_close_context` instead.");
+  
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/lib.rs b/src/lib.rs
index 0b47ced..dfd0217 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -34,6 +34,34 @@ mod tests {
 	    println!("kana: {}", kana);
 	}
     }
+
+    #[test]
+    fn max_len()
+    {
+	fn max_length(algo: ctx::Algorithm, data_len: usize) -> usize
+	{
+	    let mut output: libc::size_t = 0;
+	    unsafe {
+		assert_eq!(khash_max_length(algo.into(), data_len.into(), &mut output as *mut libc::size_t), GENERIC_SUCCESS);
+	    }
+	    output
+	}
+
+	let input = "owowowoakpwodkapowkdapowkdpaokwpdoakwd";
+
+	let algos = [ctx::Algorithm::Crc32, ctx::Algorithm::Crc64, ctx::Algorithm::Sha256, ctx::Algorithm::Sha256Truncated];
+	for i in  0..1000
+	{
+	    let max_len = max_length(algos[i%algos.len()].clone(), 0);
+	    print!("{} - len of {:?}: {}... ", i, algos[i%algos.len()], max_len);
+	    let len = {
+		let con = ctx::Context::new(algos[i%algos.len()].clone(), salt::Salt::random().unwrap());
+		generate(&con, input).unwrap().len()
+	    };
+	    assert!(len < max_len);
+	    println!("\t\tOK {}", len);
+	}
+    }
 }
 
 pub const BUFFER_SIZE: usize = 4096;
@@ -235,14 +263,18 @@ pub unsafe extern "C" fn khash_clone_salt(salt: *const salt::FFI, out: *mut salt
     }   
 }
 
-
-//TODO:
-/*
-mod ctx;
-
+/// Find the maximum length possible for a given algorithm's output.
 #[no_mangle]
-pub unsafe extern "C" fn khash_new_context(salt: *mut salt::FFI, ctx: *mut ctx::CContext) -> i32
+pub unsafe extern "C" fn khash_max_length(algo: u8, _input_sz: libc::size_t, max_len: *mut libc::size_t) -> i32
 {
-
+    no_unwind!{
+	let hash_sz = match ctx::Algorithm::from(algo) {
+	    ctx::Algorithm::Crc32 => std::mem::size_of::<hash::Crc32Checksum>(),
+	    ctx::Algorithm::Crc64 => std::mem::size_of::<hash::Crc64Checksum>(),
+	    ctx::Algorithm::Sha256 => std::mem::size_of::<hash::Sha256Hash>(),
+	    ctx::Algorithm::Sha256Truncated => std::mem::size_of::<hash::Sha256Truncated>(),
+	};
+	*max_len =  std::mem::size_of::<char>() * hash_sz;
+	GENERIC_SUCCESS
+    }
 }
-*/