=======================                         Revision 2018-05-04
MARSCHALL HASH 3 (MHA3)                         Daniel Marschall, ViaThinkSoft
=======================                         Public

Usage
=====

	MHA3 is a method to strengten a weak hash function, e.g. in case that
	the development environment or hardware does not allow using a new
	implementation of a modern hash variant like SHA256 or SHA3.

	MHA3 was designed with following goals:
	1. It should be simple to implement
	2. The memory usage should be low
	3. The calculation time should be high, to avoid brute force;
	   the complexity can be defined by the number of iterations
	4. The length of the hash should be variable
	5. It should hardening each hash, crc32 as well as md5
	6. If the base hash function is compromised, MHA3 would be still safe.


Usage of MHA3 with MCF (Modular Crypt Format)
=============================================

<?php
include 'vts_crypt.inc.php'; // https://github.com/danielmarschall/php_utils/blob/master/vts_crypt.inc.php

$hash = vts_password_hash('hello world', PASSWORD_VTS_MHA3, ['algo'=>'sha1', 'length'=>32, 'iterations'=>500]);

assert(vts_password_verify('hello world', $hash));
assert(!vts_password_verify('hello world 2', $hash));
?>

Definition
==========

	MHA3(C,L,I) := A(C,L,0) xor ... xor A(C,L,I-1)

	with:

		A(C,L,n) := B(C,n*L) & ... & B(C,n*L+(L-1))

		B(C,n)   := Q(H(C & (R^n)))

	Symbols:

		C: input data.

		L: Desired length of the hash in bytes  (L >= 1).

		I: Number of iterations (I >= 1).

		& is concatenation.

		H(s) is an existing hash function (unsalted).

		Q(s) is the sum of all bytes mod 256 of the string s.

		R is a constant value. We choose the single byte 0x01.

		R^n are n repetitions (concatenations) of R.


Example
=======

	First step:

		MHA3("test",3,2) = A("test",3,0) xor A("test",3,1)

		A("test",3,0) = B("test",0) & B("test",1) & B("test",2)
		A("test",3,1) = B("test",3) & B("test",4) & B("test",5)

		B("test",0) = Q(H("test"))
		B("test",1) = Q(H("test" & 0x01))
		B("test",2) = Q(H("test" & 0x01 & 0x01))
		B("test",3) = Q(H("test" & 0x01 & 0x01 & 0x01))
		B("test",4) = Q(H("test" & 0x01 & 0x01 & 0x01 & 0x01))
		B("test",5) = Q(H("test" & 0x01 & 0x01 & 0x01 & 0x01 & 0x01))

	Second step:

		MHA3("test",3,2) = (B("test",0) & B("test",1) & B("test",2)) xor
		                   (B("test",3) & B("test",4) & B("test",5))

	Third step:

		MHA3("test",3,2) = (Q(H("test"&(R^0))) & Q(H("test"&(R^1))) & Q(H("test"&(R^2)))) xor
		                   (Q(H("test"&(R^3))) & Q(H("test"&(R^4))) & Q(H("test"&(R^5))))


Runtime complexity
==================

	The number of hash OPs is <iterations> times <length>.


Naming convention
=================

	Naming proposal for documentations
	----------------------------------

	If you want to describe the algorithm used in a documentation, you can use following simplified notation:

	mha3_md5_16_500
	^    ^   ^  ^
	|    |   |  |
	|    |   |  +--- number of iterations (I)
	|    |   +------ digit count (L)
	|    +---------- base hash function (H)
	+--------------- MHA3 algorithm


	Hash storage in heterogenous systems
	------------------------------------

	If a hash is stored together with other hashes of different algorithms (e.g. in a .htpasswd file)
	or transfered between heterogenous systems, it is recommended to
	store the hash in following notation:

	<mha3-oid>$<base-hash>$<length>$<iterations>$<hash-base64>

	-  <mha3-oid> is the OID for a MHA3 algorithm in its dot-notation without leading dot,
	   which is 1.3.6.1.4.1.37476.3.2.1.3

	   This OID has ASN.1 Notation:
	   { iso(1) identified-organization(3) dod(6) internet(1) private(4) enterprise(1)
	     37476 specification(3) algorithm(2) hash(1) mha3-family(3) }

	-  <base-hash> should be an OID which represents the base hash ("H") algorithm used.

	   The following table shows the OIDs which have been assigned
	   for exchange between heterogenous systems. If you want to use a different
	   hash algorithm or a derivate of it, please define your own OID
	   (you can use an UUID OID, get a free IANA PEN OID, or a free ViaThinkSoft OID),
	   but please do not use this arc, since only ViaThinkSoft may maintain it.

	   --------------------------------------------------------------------------------------------
	   Algorithm                                  OID proposal
	   --------------------------------------------------------------------------------------------
	   Message Digest 4 (MD4)                     { 1 3 6 1 4 1 37476 3 2 1 99 md4(1) }
	   Message Digest 5 (MD5)                     { 1 3 6 1 4 1 37476 3 2 1 99 md5(2) }
	   RIPEMD-160                                 { 1 3 6 1 4 1 37476 3 2 1 99 ripemd160(3) }
	   Secure Hash Algorithm                      { 1 3 6 1 4 1 37476 3 2 1 99 sha0(4) }
	   Secure Hash Algorithm 1                    { 1 3 6 1 4 1 37476 3 2 1 99 sha1(5) }
	   Secure Hash Algorithm 2 (224 bits)         { 1 3 6 1 4 1 37476 3 2 1 99 sha2(6) 224 }
	   Secure Hash Algorithm 2 (256 bits)         { 1 3 6 1 4 1 37476 3 2 1 99 sha2(6) 256 }
	   Secure Hash Algorithm 2 (384 bits)         { 1 3 6 1 4 1 37476 3 2 1 99 sha2(6) 384 }
	   Secure Hash Algorithm 2 (512 bits)         { 1 3 6 1 4 1 37476 3 2 1 99 sha2(6) 512 }
	   Secure Hash Algorithm 3 (224 bits)         { 1 3 6 1 4 1 37476 3 2 1 99 sha3(7) 224 }
	   Secure Hash Algorithm 3 (256 bits)         { 1 3 6 1 4 1 37476 3 2 1 99 sha3(7) 256 }
	   Secure Hash Algorithm 3 (384 bits)         { 1 3 6 1 4 1 37476 3 2 1 99 sha3(7) 384 }
	   Secure Hash Algorithm 3 (512 bits)         { 1 3 6 1 4 1 37476 3 2 1 99 sha3(7) 512 }
	   --------------------------------------------------------------------------------------------

	-  <length> is the length of the resulting hash in bytes, e.g. 500.

	-  <iterations> is the number of iterations, e.g. 16.

	-  <hash-base64> is the resulting hash, encoded in Base64.


Test vectors
============

	mha3_md5_16_500('')                                             = 'ba09a41f928b072726c4671eaf8823eb';
	mha3_md5_16_500('The quick brown fox jumps over the lazy dog')  = 'c6863ee2b38240935862d39300b766e4';

	mha3_md5_32_500('')                                             = 'e86de25a963a3eacbaaf3d96bba578e28216b538fc797f7e2d8f0ec3109f5f4b';
	mha3_md5_32_500('The quick brown fox jumps over the lazy dog')  = '326cb56bb284b65b0a29c9f519bcf63d5f4e21b7e5615825dc98f5181c2cbda0';

	mha3_sha1_16_500('')                                            = '698f8745123787d1630f1d0a068f40ff';
	mha3_sha1_16_500('The quick brown fox jumps over the lazy dog') = '1b7039660d63bbe3c4573697c11c44df';

	mha3_sha1_32_500('')                                            = 'd420012c1ddb2e36c3403edc8e4d145e49fed20ebcf1f9ae9c4900741e20202f';
	mha3_sha1_32_500('The quick brown fox jumps over the lazy dog') = 'cfc731d10ad818585d55f17cbabb434b1e9320aa49c23c0bd8e1eda3e69d8277';


Reference implementation in PHP with H=SHA1
===========================================

<?php

class MHA3 {

    const 
MHA3_FORMAT_RAW_BINARY_STR   0;
    const 
MHA3_FORMAT_RAW_BINARY_ARRAY 1;
    const 
MHA3_FORMAT_BINARY_STRING    2;
    const 
MHA3_FORMAT_FULLY_QUALIFIED  3;

    public static function 
calc($content$length$iterations$algo='sha1'$format=MHA3::MHA3_FORMAT_BINARY_STRING) {
        
$out = array();
        for (
$l=0$l<$length$l++) {
            
$out[$l] = 0;
        }

        for (
$i=0$i<$iterations$i++) {
            for (
$l=0$l<$length$l++) {
                
$n $i*$length $l;
                
$salt str_repeat(chr(1), $n);
                
$out[$l] ^= self::bytesum_mod256(hash($algo$content.$salttrue));
            }
        }

        switch (
$format) {
            case 
MHA3::MHA3_FORMAT_RAW_BINARY_STR:
                return 
self::array_to_binarystring($out);
            case 
MHA3::MHA3_FORMAT_RAW_BINARY_ARRAY:
                return 
$out;
            case 
MHA3::MHA3_FORMAT_BINARY_STRING:
                return 
self::array_to_hexdump($out);
            case 
MHA3::MHA3_FORMAT_FULLY_QUALIFIED:
                return 
"mha3_{$algo}_{$length}_{$iterations}:".base64_encode(self::array_to_binarystring($out));
            default:
                return 
false;
        }
    }

    private static function 
array_to_binarystring($ary) {
        
$out '';
        for (
$i 0$i count($ary); $i++) {
            
$out .= chr($ary[$i]);
        }
        return 
$out;
    }

    private static function 
array_to_hexdump($ary) {
        
$out '';
        for (
$i 0$i count($ary); $i++) {
            
$out .= str_pad(dechex($ary[$i]), 2'0'STR_PAD_LEFT);
        }
        return 
$out;
    }

    private static function 
bytesum_mod256($x) {
        
$res 0;
        for (
$i=0$i<strlen($x); $i++) $res = ($res ord($x[$i])) % 256;
        return 
$res;
    }
}

# ---

/*
$algo = 'sha1'; $len = 16; $iterations = 500; $text = "The quick brown fox jumps over the lazy dog";
$time_start = microtime_float();
$hash = MHA3::calc($text, $len, $iterations, $algo);
$time_end = microtime_float();
$time = $time_end - $time_start;
echo "mha3_{$algo}_{$len}_{$iterations}('$text') = '$hash'; // $time seconds\n";

function microtime_float() {
    list($usec, $sec) = explode(" ", microtime());
    return ((float)$usec + (float)$sec);
}
*/