[PATCH] Staging: Skein: Patch adds the test vectors

Sanidhya Solanki jpage.lkml at gmail.com
Sat Dec 5 11:44:10 UTC 2015


The patch is included below.
I would like to apologize for the past few emails. It turns out the
documentation included in the eMail clients file is woefully out of
date. I have checked the patches for errors using the scripts provided.
However, it turns out that the scripts are slightly incorrect, as you
can see when it keeps asking to align the statements with the brackets.
Another issue has turned out to be that you cannot follow the scripts
completely without altering the code in a significant way, as it
suggests changing the deterministic portions of the hash, changing
certain functions, reducing line length, etc.
I have sent the patch to myself, applied it, as well as compared sha512
hashes of both the original and the mailed copy.
For the rest of the work on this patch and the skein subsystem, I defer
to someone with the expertise and experience of making Linux modules
and the skein hash families. I believe that if I continue to make
further changes to these files, I might make a mistake which alters
functionality of the hash function. I will continue to contribute to
the other kernel branches.
I will submit a patch to correct the email documentation tomorrow. 
Thanks for you patience everyone.
-----------------------------------------------------------------------
[PATCH] Staging: Skein: Patch adds the test vectors

    This patch adds the test vectors, and cleans up most of the
    syntactical and formatting errors. It also updates the TODO for
    the remaining tasks.

Signed-off-by: Sanidhya Solanki <jpage.lkml at gmail.com>
---
 drivers/staging/skein/TODO         |   8 +-
 drivers/staging/skein/skein_test.c | 831
+++++++++++++++++++++++++++++++++++++
drivers/staging/skein/skein_test.h | 516 +++++++++++++++++++++++ 3
files changed, 1351 insertions(+), 4 deletions(-) create mode 100644
drivers/staging/skein/skein_test.c create mode 100644
drivers/staging/skein/skein_test.h

diff --git a/drivers/staging/skein/TODO b/drivers/staging/skein/TODO
index cd3508d..10a4b6cf 100644
--- a/drivers/staging/skein/TODO
+++ b/drivers/staging/skein/TODO
@@ -1,8 +1,8 @@
 skein/threefish TODO
 
- - move macros into appropriate header files
- - add / pass test vectors
+ - clean up skein_test.c and skein_test.h
+ - pass test vectors
  - module support
 
-Please send patches to Jason Cooper <jason at lakedaemon.net> in addition
to the -staging tree mailinglist.
+Please send patches to Jason Cooper <jason at lakedaemon.net> in addition
+to the staging tree mailinglist.
diff --git a/drivers/staging/skein/skein_test.c
b/drivers/staging/skein/skein_test.c new file mode 100644
index 0000000..9c1a794
--- /dev/null
+++ b/drivers/staging/skein/skein_test.c
@@ -0,0 +1,831 @@
+/***********************************************************************
+**
+** Test/verification code for the Skein block functions.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+** Testing:
+**   - buffering of incremental calls (random cnt steps)
+**   - partial input byte handling
+**   - output sample hash results (for comparison of ref vs. optimized)
+**   - performance
+**
+***********************************************************************/
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <time.h>
+#include <assert.h>
+
+#include "skein.h"
+#include "SHA3api_ref.h"
+
+static const uint_t hash_bits[] =	/* list of hash hash lengths
to test */
+		{ 160, 224, 256, 384, 512, 1024, 256 + 8, 512 + 8,
1024 + 8, 2048 + 8 }; +
+/* External function to process blkcnt (nonzero) full block(s) of
data. */ +void	skein_256_process_block(skein_256_ctxt_t *ctx,
const u08b_t *blkptr, size_t blkcnt, size_t bytecntadd); +void
skein_512_process_block(skein_512_ctxt_t *ctx, const u08b_t *blkptr,
size_t blkcnt, size_t bytecntadd); +void
skein_1024_process_block(skein_1024_ctxt_t *ctx, const u08b_t *blkptr,
size_t blkcnt, size_t bytecntadd); + +/********************** debug i/o
helper routines **********************/ +void fatalerror(const char
*s, ...) +{
+	/* print out a msg and exit with an error code */
+	va_list ap;
+
+	va_start(ap, s);
+	vprintf(s, ap);
+	va_end(ap);
+	printf("\n");
+	exit(2);
+}
+
+static uint_t _quiet_   =   0;  /* quiet processing? */
+static uint_t verbose   =   0;  /* verbose flag bits */
+static uint_t kathash   = ~0u;  /* use as a quick check on KAT results
*/ +
+void show_bytes(uint_t cnt, const u08b_t *b)
+{
+	/* formatted output of byte array */
+	uint_t i;
+
+	for (i = 0; i < cnt; i++) {
+		if (i  % 16 ==  0)
+			printf("	");
+		else if (i % 4 == 0)
+			printf(" ");
+		printf(" %02X", b[i]);
+		kathash = (kathash ^ b[i]) * 0xDEADBEEF;
+		kathash = (kathash ^ (kathash >> 23) ^ (kathash >> 17)
^ (kathash >> 9)) * 0xCAFEF00D;
+		if (i % 16 == 15 || i == cnt - 1)
+			printf("\n");
+		}
+	}
+
+void show_debug(const char *s, ...)
+{
+	if (SKEIN_DEBUG_FLAG) {			/* are we
showing debug info? */
+		va_list ap;
+
+		va_start(ap, s);
+		vprintf(s, ap);
+		va_end(ap);
+		}
+	}
+
+/************** Timing routine (for performance measurements)
***********/ +/* unfortunately, this is generally assembly code and not
very portable */ +
+uint_32t high_res_time(void)
+{
+/********************** use RC4 to generate test data
******************/ +/* Note: this works identically on all platforms
(big/little-endian)   */ +static struct {
+	uint_t I,
J;						 /* RC4 vars */
+	u08b_t state[256];
+	} prng;
+
+void randbytes(void *dst, uint_t bytecnt)
+{
+	u08b_t a, b;
+	u08b_t *d = (u08b_t *)dst;
+
+	for (; bytecnt; bytecnt--, d++) {		/* run RC4  */
+		prng.I = (prng.I + 1) & 0xFF;
+		a = prng.state[prng.I];
+		prng.J = (prng.J + a) & 0xFF;
+		b = prng.state[prng.J];
+		prng.state[prng.I] = b;
+		prng.state[prng.J] = a;
+		*d = prng.state[(a + b) & 0xFF];
+		}
+	}
+
+/* get a pseudo-random 32-bit integer in a portable way */
+uint_t rand32(void)
+{
+	uint_t i, n;
+	u08b_t tmp[4];
+
+	randbytes(tmp, sizeof(tmp));
+
+	for (i = n = 0; i < sizeof(tmp); i++)
+		n = n * 256 + tmp[i];
+
+	return n;
+	}
+
+/* init the (RC4-based) prng */
+void rand_init(u64b_t seed0 {
+	uint_t i, j;
+	u08b_t tmp[512];
+
+	/* init the "key" in an endian-independent fashion */
+	for (i = 0; i < 8; i++)
+		tmp[i] = (u08b_t)(seed >> (8 * i));
+
+	/* initialize the permutation */
+	for (i = 0; i < 256; i++)
+		prng.state[i] = (u08b_t)i;
+
+	/* now run the RC4 key schedule */
+	for (i = j = 0; i < 256; i++) {
+		j = (j + prng.state[i] + tmp[i % 8]) & 0xFF;
+		tmp[256]	  = prng.state[i];
+		prng.state[i] = prng.state[j];
+		prng.state[j] = tmp[256];
+		}
+	prng.I = prng.J = 0;  /* init I,J variables for RC4 */
+
+	/* discard initial keystream before returning */
+	randbytes(tmp, sizeof(tmp));
+	}
+
+/***********************************************************************/
+/* An AHS-like API that allows explicit setting of block
size		  */ +/*	[i.e., the AHS API selects a block
size based solely on the ]	*/ +/*	[hash result length,
while Skein allows independent hash	]	*/ +/*
[result size and block
size
]	*/
+/***********************************************************************/
+ +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* select the context size and init the context */ +int skein_init(int
blksize, hashstate *state, int hashbit_len) +{
+	switch (blksize) {
+	case  256:
+		state->statebits = 64 * skein_256_STATE_WORDS;
+		return skein_256_init(&state->u.ctx_256,
(size_t)hashbit_len);
+	case  512:
+		state->statebits = 64 * skein_512_STATE_WORDS;
+		return skein_512_init(&state->u.ctx_512,
(size_t)hashbit_len);
+	case 1024:
+		state->statebits = 64 * skein_1024_STATE_WORDS;
+		return skein_1024_init(&state->u.ctx1024,
(size_t)hashbit_len);
+	default:
+		return SKEIN_FAIL;
+	}
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* select the context size and init (extended) the context */
+int skein_init_ext(int blksize, hashstate *state, int hashbit_len,
u64b_t treeinfo, const u08b_t *key, size_t keybytes) +{
+	switch (blksize) {
+	case  256:
+		state->statebits = 64 * SKEIN_256_STATE_WORDS;
+		return skein_256_init_ext(&state->u.ctx_256,
(size_t)hashbit_len, treeinfo, key, keybytes);
+	case  512:
+		state->statebits = 64 * SKEIN_512_STATE_WORDS;
+		return skein_512_init_ext(&state->u.ctx_512,
(size_t)hashbit_len, treeinfo, key, keybytes);
+	case 1024:
+		state->statebits = 64 * skein_1024_STATE_WORDS;
+		return skein_1024_init_ext(&state->u.ctx1024,
(size_t)hashbit_len, treeinfo, key, keybytes);
+	default:
+		return SKEIN_FAIL;
+	}
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process data to be hashed */
+int skein_update(hashstate *state, const bitsequence *data,
data_length databit_len) +{
+	/* only the final Update() call is allowed do partial bytes,
else assert an error */
+	skein_assert((state->u.h.T[1] & skein_t1_FLAG_BIT_PAD) == 0 ||
databit_len == 0, FAIL); +
+	if ((databit_len & 7) == 0) {
+		switch (state->statebits) {
+		case  512:  return skein_512_update(&state->u.ctx_512,
data, databit_len >> 3);
+		case  256:  return skein_256_update(&state->u.ctx_256,
data, databit_len >> 3);
+		case 1024:  return
skein_1024_update(&state->u.ctx1024, data, databit_len >> 3);
+		default: return SKEIN_FAIL;
+		}
+	} else {
+		size_t bcnt = (databit_len >> 3) +
1;				  /* number of bytes to handle */
+		u08b_t mask, *p;
+
+		switch (state->statebits) {
+		case  512:
+			skein_512_update
+				(&state->u.ctx_512, data, bcnt);
+				p	= state->u.ctx_512.b;
+				break;
+		case  256:
+			skein_256_update
+				(&state->u.ctx_256, data, bcnt);
+				p	= state->u.ctx_256.b;
+				break;
+		case 1024:
+			skein_1024_update
+				(&state->u.ctx1024, data, bcnt);
+				p	= state->u.ctx1024.b;
+				break;
+		default:
+			return FAIL;
+		}
+
skein_set_bit_pad_flag(state->u.h);					 /*
set tweak flag for the final call */
+		/* now "pad" the final partial byte the way NIST likes
*/
+		bcnt = state->u.h.bcnt;		 /* get the
bcnt value (same location for all block sizes) */
+		skein_assert(bcnt != 0);		/* internal
sanity check: there IS a partial byte in the buffer! */
+		mask = (u08b_t)(1u << (7 - (databit_len &
7)));		 /* partial byte bit mask */
+		p[bcnt - 1]  = (u08b_t)((p[bcnt - 1] & (0 - mask)) |
mask);   /* apply bit padding on final byte (in the buffer) */ +
+		return SUCCESS;
+		}
+	}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize hash computation and output the result (hashbit_len bits)
*/ +int skein_final(hashstate *state, bitsequence *hashval)
+{
+	switch (state->statebits) {
+	case  512:  return skein_512_final(&state->u.ctx_512, hashval);
+	case  256:  return skein_256_final(&state->u.ctx_256, hashval);
+	case 1024:  return skein_1024_final(&state->u.ctx1024,
hashval);
+	default:	return SKEIN_FAIL;
+	}
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* all-in-one hash function */
+int skein_hash(int blksize, int hashbit_len, const bitsequence
*data, /* all-in-one call */
+				data_length databit_len, bitsequence
*hashval) {
+	hashstate  state;
+	int r = skein_init(blksize, &state, hashbit_len);
+		if (r == SKEIN_SUCCESS) { /* these calls do not fail
when called properly */
+		r = skein_update(&state, data, databit_len);
+		skein_final(&state, hashval);
+		}
+	return r;
+	}
+
+/***********************************************************************/
+/* various self-consistency checks */
+uint_t skein_test(uint_t blksize, uint_t maxlen, uint_t hashlen,
uint_t nstep, uint_t oneblk) +{
+	enum		{ MAX_BUF = 1024 };
+	u08b_t	  b[MAX_BUF + 4], hashval[2][MAX_BUF + 4];
+	uint_t	  i, j, k, n, bcnt, use_AHS, step, bit_len,
testcnt = 0;
+	hashstate   s[2];
+
+	assert(blksize > 0 && blksize <= 1024 && (blksize % 256) == 0);
+	assert((hashlen % 8) == 0);
+
+	if (maxlen  > MAX_BUF * 8)	 /* keep things reasonably
small */
+		maxlen  = MAX_BUF * 8;
+	if (hashlen > MAX_BUF * 8)
+		hashlen = MAX_BUF * 8;
+	if (maxlen  == 0)			/* default sizes */
+		maxlen  = blksize * 2;
+	if (hashlen == 0)
+		hashlen = blksize;
+
+	if (oneblk) {
+		if (oneblk > MAX_BUF * 8)
+			oneblk = MAX_BUF * 8;
+		for (i = 0; i < oneblk / 8; i++)
+			b[i] = (u08b_t)i;
+		if (skein_hash(blksize, hashlen, b, oneblk,
hashval[0]) != SKEIN_SUCCESS)
+			fatalerror("skein_hash != SUCCESS");
+		return 1;
+		}
+
+	if (nstep == 0) {
+		printf("Testing Skein: blksize = %4d bits. hashlen =
%4d bits. maxmsglen = %4d bits.\n",
+			   blksize, hashlen, maxlen);
+		nstep = 1;
+	}
+
+	n = SKEIN_DEBUG_FLAG;
+	SKEIN_DEBUG_FLAG = 0;		/* turn of debug display
for this "fake" AHS call */
+	if (init(&s[0], hashlen) != SUCCESS) /* just see if AHS API
supports this <blksize,hashlen> pair */
+		fatalerror("AHS_API Init() error!");
+	SKEIN_DEBUG_FLAG = n;		/* restore debug display
status */ +
+	use_AHS = (s[0].statebits == blksize);  /* does this
<blksize,hashlen> pair work via AHS_API? */ +
+	bcnt = (maxlen + 7) / 8;	/* convert maxlen to bytes */
+	for (n = 0; n < bcnt; n += nstep) {/* process all the data
lengths (# bytes = n+1)*/
+		randbytes(b, maxlen);	/* get something to hash
*/
+		for (j = 8; j > 0; j--)	 {   /* j = # bits in
final byte */
+			testcnt++;
+			memset(hashval, 0, sizeof(hashval));
+			show_debug("\n*** Single Hash() call (%d
bits)\n", 8 * n + j);
+			if (skein_hash(blksize, hashlen, b, 8 * n + j,
hashval[0]) != SKEIN_SUCCESS)
+				fatalerror("skein_hash != SUCCESS");
+			for (k = hashlen / 8; k <= MAX_BUF; k++)
+				if (hashval[0][k] != 0)
+					fatalerror("Skein hash output
overrun!: hashlen = %d bits", hashlen);
+			if (use_AHS)	{	 /* compare using
AHS API, if supported */
+				show_debug("\n*** Single AHS API
Hash() call\n");
+				if (hash(hashlen, b, 8 * n + j,
hashval[1]) != SUCCESS)
+					fatalerror("skein_hash !=
SUCCESS");
+				for (k = hashlen / 8; k <= MAX_BUF;
k++)
+					if (hashval[1][k] != 0)
+						fatalerror("Skein
AHS_API hash output overrun!: hashlen = %d bits", hashlen);
+				if (memcmp(hashval[1], hashval[0],
hashlen / 8))
+					fatalerror("Skein vs. AHS API
miscompare");
+				}
+			/* now try (randomized) steps thru entire
input block */
+			for (i = 0; i < 4; i++) {
+				show_debug("\n*** Multiple Update()
calls [%s]", (i) ? "random steps" : "step == 1");
+				if (i >= 2) {
+					show_debug("[re-use
precomputed state]");
+					s[0] = s[1];
+					}
+				else {
+					k = (i) ? skein_init(blksize,
&s[0], hashlen) :
+
skein_init_ext(blksize, &s[0], hashlen, skein_CFG_TREE_INFO_SEQUENTIAL,
NULL, 0);
+					if (k != SKEIN_SUCCESS)
+
fatalerror("skein_init != SUCCESS");
+					s[1] =
s[0];			/* make a copy for next time */
+					}
+				show_debug("\n");
+				for (k = 0; k < n + 1; k += step) { /*
step thru with variable sized steps */
+					/* for i == 0, step one byte
at a time. for i>0, randomly */
+					step = (i == 0) ? 1 : 1 +
(rand32() % (n + 1 - k));	 /* # bytes to process */
+					bit_len = (k + step >= n +
1) ? 8 * (step - 1) + j : 8 * step;  /* partial final byte handling */
+					if (skein_update(&s[0], &b[k],
bit_len) != SKEIN_SUCCESS)
+
fatalerror("skein_update != SUCCESS");
+					}
+				if (skein_final(&s[0], hashval[1]) !=
SKEIN_SUCCESS)
+					fatalerror("skein_final !=
SUCCESS");
+				for (k = hashlen / 8; k <= MAX_BUF;
k++)
+					if (hashval[0][k] != 0)
+						fatalerror("Skein hash
output overrun! : hashlen = %d bits", hashlen);
+				if (memcmp(hashval[1], hashval[0],
hashlen / 8))
+					fatalerror("Skein Hash() vs.
Update() miscompare!");
+				}
+			}
+		}
+	return testcnt;
+	}
+
+/* filter out <blksize,hashbits> pairs in short KAT mode */
+uint_t short_KAT_OK(uint_t blksize, uint_t hashbits)
+	{
+	switch (blksize) {
+	case  256:
+	if (hashbits != 256 && hashbits != 224)
+		return 0;
+	case  512:
+		if (hashbits != 256 && hashbits != 384 && hashbits !=
512)
+			return 0;
+	case 1024:
+		if (hashbits != 384 && hashbits != 512 && hashbits !=
1024)
+			return 0;
+	default:
+		return 0;
+	}
+return 1;
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* pad final block, no OUTPUT stage */
+int skein_final_pad(hashstate *state, bitsequence *hashval)
+{
+	switch (state->statebits) {
+	case  512:  return skein_512_final_pad(&state->u.ctx_512,
hashval);
+	case  256:  return skein_256_final_pad(&state->u.ctx_256,
hashval);
+	case 1024:  return skein_1024_final_pad(&state->u.ctx1024,
hashval);
+	default:	return SKEIN_FAIL;
+	}
+}
+
+/* just the OUTPUT stage */
+
+int skein_output(hashstate *state, bitsequence *hashval)
+{
+	switch (state->statebits) {
+	case  512:  return skein_512_output(&state->u.ctx_512,
hashval);
+	case  256:  return skein_256_output(&state->u.ctx_256,
hashval);
+	case 1024:  return skein_1024_output(&state->u.ctx1024,
hashval);
+	default:	return SKEIN_FAIL;
+	}
+}
+
+/* generate a KAT test for the given data and tree parameters. */
+/* This is an "all-in-one" call. It is not intended to represent */
+/* how a real multi-processor version would be implemented, but  */
+/* the results will be the same */
+void skein_tree_hash(uint_t blksize, uint_t hashbits, const u08b_t
*msg, size_t msg_bytes,
+					 uint_t leaf, uint_t node,
uint_t maxlevel, u08b_t *hash_res)
+	{
+	enum	  { MAX_HEIGHT = 32 };		  /* how
deep we can go here */
+	uint_t	height;
+	uint_t	blkbytes  = blksize / 8;
+	uint_t	savedebug = SKEIN_DEBUG_FLAG;
+	size_t	n, node_len, src_offs, dst_offs, bcnt;
+	u64b_t	treeinfo;
+	u08b_t	M[MAX_TREE_MSG_LEN + 4];
+	hashstate G, s;
+
+	assert(node < 256 && leaf < 256 && maxlevel < 256);
+	assert(node >  0  && leaf >  0  && maxlevel >  1);
+	assert(blksize == 256 || blksize == 512 || blksize == 1024);
+	assert(blkbytes <= sizeof(M));
+	assert(msg_bytes <= sizeof(M));
+
+	/* precompute the config block result G for multiple uses
below */ +#ifdef SKEIN_DEBUG
+	if (SKEIN_DEBUG_FLAG)
+		SKEIN_DEBUG_FLAG |= SKEIN_DEBUG_CONFIG;
+#endif
+	treeinfo = skein_CFG_TREE_INFO(leaf, node, maxlevel);
+	if (skein_init_ext(blksize, &G, hashbits, treeinfo, NULL,
0) != SKEIN_SUCCESS)
+		fatalerror("skein_init_ext() fails in tree");
+	SKEIN_DEBUG_FLAG = savedebug;
+
+	bcnt = msg_bytes;
+	memcpy(M, msg, bcnt);
+	for (height = 0;; height++)	{		/* walk up
the tree */
+		if (height && (bcnt == blkbytes)) /* are we done (with
only one block left)? */
+			break;
+		if (height + 1 == maxlevel)	{   /* is this the
final allowed level? */
+			/* if so, do it as one big hash */
+			s = G;
+			skein_set_tree_level(s.u.h, height + 1);
+			skein_update(&s, M, bcnt * 8);
+			skein_final_pad(&s, M);
+			break;
+			}
+		node_len = blkbytes << ((height) ? node : leaf);
+		for (src_offs = dst_offs = 0; src_offs <= bcnt;) {
+			n = bcnt - src_offs;		 /* number
of bytes left at this level */
+			if (n > node_len)			/*
limit to node size */
+				n = node_len;
+			s = G;
+			s.u.h.T[0] = src_offs;	   /* nonzero
initial offset in tweak! */
+			skein_set_tree_level(s.u.h, height + 1);
+			skein_update(&s, M + src_offs, n * 8);
+			skein_final_pad(&s, M + dst_offs);  /* finish
up this node, output intermediate result to M[]*/
+			dst_offs += blkbytes;
+			src_offs += n;
+			if (src_offs >= bcnt)		/*
special logic to handle (msg_bytes == 0) case */
+				break;
+			}
+		bcnt = dst_offs;
+		}
+
+	/* output the result */
+	skein_output(&s, hash_res);
+	}
+
+/*
+** Generate tree-mode hash KAT vectors.
+** Note:
+**	Tree vectors are different enough from non-tree vectors that
it +**	makes sense to separate this out into a different
function, rather +**	than shoehorn it into the same KAT logic as
the other modes. +**/
+void skein_gen_kat_tree(uint_t blksize)
+	{
+	static const struct
+		{
+		uint_t leaf, node, maxlevel, levels;
+		}
+		TREE_PARMS[] = { {2, 2, 2, 2}, {1, 2, 3, 2}, {2, 1,
0xFF, 3} }; +#define tree_parm_cnt (ARRAY_SIZE(TREE_PARMS) /
ARRAY_SIZE(TREE_PARMS[0])) +
+	u08b_t  msg[MAX_TREE_MSG_LEN + 4], hashval[MAX_TREE_MSG_LEN +
4];
+	uint_t  i, j, k, n, p, q, hashbits, node, leaf, leaf_bytes,
msg_bytes, bytecnt, levels, maxlevel; +
+	assert(blksize == 256 || blksize == 512 || blksize == 1024);
+	for (i = 0; i < MAX_TREE_MSG_LEN; i += 2) {
+		/* generate "incrementing" tree hash input msg data */
+		msg[i] = (u08b_t)((i ^ blksize) ^ (i >> 16));
+		msg[i + 1] = (u08b_t)((i ^ blksize) >> 8);
+		}
+	for (k = q = n = 0; k < hash_bits_cnt; k++) {
+		hashbits = hash_bits[k];
+		if (!short_KAT_OK(blksize, hashbits))
+			continue;
+		if ((verbose & V_KAT_SHORT) && (hashbits != blksize))
+			continue;
+		for (p = 0; p < tree_parm_cnt; p++) {
+			if (p && (verbose & V_KAT_SHORT))
+				continue;		   /* keep
short KATs short */
+			if (p && hashbits != blksize)
+				continue;		   /* we only
need one "non-full" size */ +
+			leaf	  = TREE_PARMS[p].leaf;
+			node	  = TREE_PARMS[p].node;
+			maxlevel  = TREE_PARMS[p].maxlevel;
+			levels	= TREE_PARMS[p].levels;
+			leaf_bytes = (blksize / 8) << leaf;	/*
number of bytes in a "full" leaf */ +
+			for (j = 0; j < 4; j++)	{   /*
different numbers of leaf results */
+				if ((verbose & V_KAT_SHORT) && (j !=
3) && (j != 0))
+					continue;
+				if (j && (hashbits != blksize))
+					break;
+				switch (j) {
+				case 0:
+					n = 1;
+					break;
+				case 1:
+					n = 2;
+					break;
+				case 2:
+					n = (1 << (node * (levels -
2))) * 3 / 2;
+					if (n <= 2)
+					continue;
+					break;
+				case 3:
+					n = (1 << (node * (levels -
1)));
+					break;
+				}
+				bytecnt = n * leaf_bytes;
+				assert(bytecnt > 0);
+				if (bytecnt > MAX_TREE_MSG_LEN)
+					continue;
+				q = (q + 1) % leaf_bytes;
+				msg_bytes = bytecnt - q;
+				switch (blksize) {
+				case  256:
+					printf("\n:Skein-256: ");
break;
+				case  512:
+					printf("\n:Skein-512: ");
break;
+				case 1024:
+					printf("\n:Skein-1024:");
break;
+				}
+				printf(" %4d-bit hash, msglen =%6d
bits", hashbits, msg_bytes * 8);
+				printf(". Tree: leaf =%02X, node
=%02X, maxlevels =%02X\n", leaf, node, maxlevel);
+				printf("\nMessage data:\n");
+				if (msg_bytes == 0)
+					printf("	(none)\n");
+				else
+					show_bytes(msg_bytes, msg);
+
+				skein_tree_hash(blksize, hashbits,
msg, msg_bytes, leaf, node, maxlevel, hashval); +
+				printf("Result:\n");
+				show_bytes((hashbits + 7) / 8,
hashval);
+
printf("--------------------------------\n");
+				}
+			}
+		}
+	}
+#endif
+
+/*
+** Output some KAT values. This output is generally re-directed to a
file and +** can be compared across platforms to help validate an
implementation on a +** new platform (or compare reference vs.
optimized code, for example). The +** file will be provided as part of
the Skein submission package to NIST. +**
+** When used in conjunction with the debug flag, this will output a
VERY long +** result. The verbose flag is used to output even more
combinations of +**	  <blksize,hashsize,msglen>
+**
+** Note: this function does NOT output the NIST AHS KAT format.
+*/
+void skein_showkat(uint_t blksizemask)
+{
+	enum {
+		DATA_TYPE_ZERO  = 0,
+		DATA_TYPE_INC,
+		DATA_TYPE_RAND,
+		DATA_TYPE_MAC,
+		DATA_TYPE_TREE,
+		DATA_TYPE_CNT,
+
+		MAX_BYTES = 3 * 1024 / 8
+		};
+	static const char *TYPE_NAMES[] = { "zero", "incrementing",
"random", "random+MAC", "tree", NULL };
+	static const uint_t  MSG_BITS[] = {
+		0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 32, 64, 128, 192,
+				   256 - 1, 256, 256 + 1,  384,
+				   512 - 1, 512, 512 + 1,  768,
+				  1024 - 1, 1024, 1024 + 1,
+				  2048 - 1, 2048, 2048 + 1
+				};
+
+/* generate pre-computed IVs for inclusion in Skein C code */
+void skein_generate_IV(void)
+{
+	static const struct
+	{ uint_t blksize, hashbits; }
+			iv_tab[] = /* which pairs to precompute */
+				{ { 256, 128 }, { 256, 160 }, { 256,
224 }, { 256, 256 },
+				  { 512, 128 }, { 512, 160 }, { 512,
224 }, { 512, 256 },
+				  { 512, 384 }, { 512, 512 },
+				  {1024, 384 }, {1024, 512 }, {1024,
1024 }
+				};
+	uint_t	   i, j, blksize, hashbits;
+	hashstate	state;
+	const u64b_t *w;
+	const char   *s;
+
+	printf("#ifndef _skein_iv_h\n"
+		   "#define _skein_iv_h\n\n"
+		   "#include \"skein.h\"	/* get Skein macros
and types */\n\n"
+		   "/*\n"
+		   "***************** Pre-computed Skein IVs
*******************\n"
+		   "**\n"
+		   "** NOTE: these values are not \"magic\" constants,
but\n"
+		   "** are generated using the Threefish block
function.\n"
+		   "** They are pre-computed here only for speed;
i.e., to\n"
+		   "** avoid the need for a Threefish call during
Init().\n"
+		   "**\n"
+		   "** The IV for any fixed hash length may be
pre-computed.\n"
+		   "** Only the most common values are included
here.\n"
+		   "**\n"
+
"************************************************************\n"
+		   "**/\n\n"
+		   "#define mk_64 skein_mk_64\n\n"
+		  );
+	for (i = 0; i < ARRAY_SIZE(iv_tab) / ARRAY_SIZE(iv_tab[0]);
i++) {
+		blksize  = iv_tab[i].blksize;
+		hashbits = iv_tab[i].hashbits;
+		switch (blksize) {
+		case  256:
+			w = state.u.ctx_256.X;
+			s = "_256";
+			break;
+		case  512:
+			w = state.u.ctx_512.X;
+			s = "_512";
+			break;
+		case 1024:
+			w = state.u.ctx1024.X;
+			s = "1024";
+			break;
+		default:
+			fatalerror("Invalid blksize");
+					continue; /* should never
happen, but avoids gcc warning */
+		}
+		if (skein_init(blksize, &state, hashbits) !=
SKEIN_SUCCESS)
+			fatalerror("Error generating IV: blksize = %d,
hashbits = %d", blksize, hashbits);
+		printf("/* blksize = %4d bits. hashsize = %4d bits
*/\n", blksize, hashbits);
+		printf("const u64b_t SKEIN%s_IV_%d[] =\n	{\n",
s, hashbits);
+		for (j = 0; j < blksize / 64; j++)
+			printf("	mk_64(0x%08X,0x%08X)%s\n",
+				   (uint_32t)(w[j] >> 32),
(uint_32t)w[j], (j + 1 == blksize / 64) ? "" : ", ");
+		printf("	};\n\n");
+		}
+	printf("#endif /* _skein_iv_h */\n");
+}
+
+/* qsort routine */
+int compare_uint_32t(const void *aptr, const void *bptr)
+{
+	uint_32t a = *((uint_32t *)aptr);
+	uint_32t b = *((uint_32t *)bptr);
+		if (a > b)
+		return  1;
+	if (a < b)
+		return -1;
+	return 0;
+	}
+
+void show_compiler(const char *CVER)
+{
+	printf(" //:");
+
+/* measure the speed (in CPU clks/byte) for a Skein implementation */
+void skein_measureperformance(const char *target)
+{
+	const uint_t MSG_BYTES[] = {1, 2, 4, 8, 10, 16, 32, 64, 100,
128, 256, 512, 1000, 1024, 2048, 4096, 8192, 10000, 16384, 32768,
100000, 0};
+	enum	 {  TIMER_SAMPLE_cnt = 13, MAX_BUFFER = 1024 *
100, PERF_TIMEOUT_CLKS = 500000 };
+	enum	 {  _256 = 256, _512 = 512 };
+	uint_32t dt[24][3][TIMER_SAMPLE_cnt], t0, t1;
+	uint_32t dt_min = ~0u;
+	uint_t   targetsize = 0;
+	uint_t   repcnt	 = 1;
+	uint_t   i, k, n, r, blksize, msg_bytes;
+	u08b_t   b[MAX_BUFFER], hashval[skein_1024_BLOCK_BYTES * 4];
+	hashstate s;
+
+void givehelp(void)
+{
+	printf("Syntax:  skein_test [options]\n"
+		   "Options: -bNN  = set Skein block size to NN bits\n"
+		   "		 -lNN  = set max test length  to
NN bits\n"
+		   "		 -tNN  = set Skein hash length to
NN bits\n"
+		   "		 -sNN  = set initial random seed\n"
+		   "		 -g	= generate precomputed
IV values to stdout\n"
+		   "		 -k	= output KAT results to
stdout\n"
+		   "		 -p	= output performance
(clks/byte)\n"
+		  );
+	exit(2);
+}
+
+int main(int argc, char *argv[])
+{
+	int	i, n;
+	uint_t testcnt;
+	uint_t dokat   =	0;   /* generate KAT vectors?	*/
+	uint_t blksize =	0;   /* Skein state size in bits */
+	uint_t maxlen  = 1024;   /* max block size   in bits */
+	uint_t hashlen =	0;   /* hash length	  in bits
(0 --> all) */
+	uint_t seed0   = (uint_t)time(NULL); /* randomize based on
time */
+	uint_t oneblk  =	0;   /* test block size */
+
+	for (i = 1; i < argc; i++) {
+		/* process command-line switches */
+		if (argv[i][0] == '-') {
+			switch (toupper(argv[i][1])) {
+			case '?':
+				givehelp();
+				break;
+			case 'B':
+				blksize	   |= atoi(argv[i] + 2);
+				break;
+			case 'L':
+				maxlen		 = atoi(argv[i]
+ 2);
+				break;
+			case 'S':
+				seed0		  = atoi(argv[i]
+ 2);
+				break;
+			case 'T':
+				hashlen		= atoi(argv[i]
+ 2);
+				break;
+			case 'K':
+				dokat		  = 1;
+				break;
+			case 'V':
+				verbose	   |= (argv[i][2]) ?
atoi(argv[i] + 2) : v_kat_long;
+				break;
+			case 'G':
+				skein_generate_IV();
+				return 0;
+			case 'P':
+				skein_measureperformance(argv[i] + 2);
+				return 0;
+			case 'Q':
+				_quiet_		= 1;
+				break;
+			case 'D':
+				switch (toupper(argv[i][2])) {
+#ifdef SKEIN_DEBUG
+				case  0:
+					SKEIN_DEBUG_FLAG |=
SKEIN_DEBUG_DEFAULT;
+					break;
+				case '-':
+					SKEIN_DEBUG_FLAG |=
SKEIN_DEBUG_SHORT;
+					break;
+				case '+':
+					SKEIN_DEBUG_FLAG |=
SKEIN_DEBUG_ALL;
+					break;
+				case 'P':
+					SKEIN_DEBUG_FLAG |=
SKEIN_DEBUG_PERMUTE;
+					break;
+				case 'I':
+					SKEIN_DEBUG_FLAG |=
SKEIN_DEBUG_SHORT |  SKEIN_DEBUG_INJECT;
+					break;
+				case 'C':
+					SKEIN_DEBUG_FLAG |=
SKEIN_DEBUG_SHORT & ~SKEIN_DEBUG_CONFIG;
+					break;
+#endif
+				default:
+					SKEIN_DEBUG_FLAG |=
atoi(argv[i] + 2);
+					break;
+			}
+				break;
+			default:
+				fatalerror("Unsupported command-line
option: %s", argv[i]);
+			break;
+		}
+	} else if {argv[i][0] == '?'}
+		givehelp();
+	else if {isdigit(argv[i][0])}
+		oneblk = atoi(argv[i]);
+	}
+
+	if (blksize == 0)					 /*
default is all block sizes */
+		blksize = 256 | 512 | 1024;
+	if (dokat) {
+		skein_showkat(blksize);
+		} else {
+				if (oneblk == 0)
+				printf("Seed0 = %d. Compiler = %s\n",
seed0, COMPILER_ID);
+				rand_init(skein_mk_64(0xDEADBEEF,
seed0)); /* init PRNG for test data */ +
+				testcnt = 0;
+				for (i = 256; i <= 1024; i *= 2) {
+					if (blksize & i) {
+					if (hashlen == 0)
{			  /* use all hash sizes? */
+						for (n = 0; n <
hash_bits_cnt; n++)
+							testcnt +=
skein_test(i, maxlen, hash_bits[n], 0, oneblk);
+					} else {testcnt +=
skein_test(i, maxlen, hashlen, 0, oneblk); }
+				}
+			}
+		if (oneblk)
+			return 0;
+		if (testcnt)
+			printf("Success: %4d tests\n", testcnt);
+		}
+	/* do a quick final self-consistentcy check test to make sure
nothing is broken */
+	SKEIN_DEBUG_FLAG = 0;		/* no debug output here */
+	for (blksize = 256; blksize <= 1024; blksize *= 2)
+		skein_test(blksize, 16, 0, 1, 0);
+		}
+	return 0;
+}
diff --git a/drivers/staging/skein/skein_test.h
b/drivers/staging/skein/skein_test.h new file mode 100644
index 0000000..ca23bd0
--- /dev/null
+++ b/drivers/staging/skein/skein_test.h
@@ -0,0 +1,516 @@
+#define HASH_BITS_CNT   (sizeof(HASH_BITS) / ARRAY_SIZE(HASH_BITS[0]))
+
+/* bits of the verbose flag word */
+#define V_KAT_LONG	  (1u << 0)
+#define V_KAT_SHORT	 (1u << 1)
+#define V_KAT_NO_TREE   (1u << 2)
+#define V_KAT_NO_SEQ	(1u << 3)
+#define V_KAT_NO_3FISH  (1u << 4)
+#define V_KAT_DO_3FISH  (1u << 5)
+
+/* automatic compiler version number detection */
+#if !defined(compiler_version)
+
+#if   defined(_MSC_VER) && (_MSC_VER >= 1400)
+#define compiler_version (900)
+#elif defined(_MSC_VER) && (_MSC_VER >= 1200)
+#define compiler_version (600)
+#elif defined(_MSC_VER) && (_MSC_VER >= 1000)
+#define compiler_version (420)
+#elif defined(__GNUC__) && defined(__GNUC_MINOR__) &&
defined(__GNUC_PATCHLEVEL__) +#define compiler_version (100 * __GNUC__
+ 10 * __GNUC_MINOR__ + __GNUC_PATCHLEVEL__) +#elif
defined(__BORLANDC__) /* this is in hex */ +#define compiler_version
(100 * (__BORLANDC__ >> 8) + 10 * ((__BORLANDC__ >> 4) & 0xF) +
(__BORLANDC__ & 0xF)) +#endif +
+#endif
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+/* external functions to determine code size (in bytes) */
+size_t  skein_256_process_block_codesize(void);
+size_t  skein_512_process_block_codesize(void);
+size_t  skein_1024_process_block_codesize(void);
+size_t  skein_256_API_codesize(void);
+size_t  skein_512_API_codesize(void);
+size_t  skein_1024_API_codesize(void);
+uint_t  skein_256_unroll_cnt(void);
+uint_t  skein_512_unroll_cnt(void);
+uint_t  skein_1024_unroll_cnt(void);
+#elif defined(SKEIN_LOOP)
+uint_t  skein_256_unroll_cnt(void) { return (SKEIN_LOOP / 100) % 10; }
+uint_t  skein_512_unroll_cnt(void) { return (SKEIN_LOOP /  10) % 10; }
+uint_t  skein_1024_unroll_cnt(void) { return (SKEIN_LOOP) % 10; }
+#else
+uint_t  skein_256_unroll_cnt(void) { return 0; }
+uint_t  skein_512_unroll_cnt(void) { return 0; }
+uint_t  skein_1024_unroll_cnt(void) { return 0; }
+#endif
+
+#ifndef SKEIN_DEBUG
+uint_t SKEIN_DEBUG_FLAG	 =   0;	 /* dummy flags (if not
defined elsewhere) */ +#endif
+
+#define SKEIN_DEBUG_SHORT   (SKEIN_DEBUG_HDR | SKEIN_DEBUG_STATE |
SKEIN_DEBUG_TWEAK | SKEIN_DEBUG_KEY | SKEIN_DEBUG_INPUT_08 |
SKEIN_DEBUG_FINAL) +#define SKEIN_DEBUG_DEFAULT (SKEIN_DEBUG_SHORT) +
+#if defined(_m_ix86) || defined(__i386) || defined(_i386) ||
defined(__i386__) || defined(i386) || \
+	defined(_x86_)   || defined(__x86_64__) || defined(_m_x64) ||
defined(__x86_64) +#define _is_x86_	1
+#endif
+
+#if  defined(_is_x86_) && (!defined(__STRICT_ANSI__)) &&
(defined(__GNUC__) || !defined(__STDC__)) && \
+	(defined(__BORLANDC__) || defined(_MSC_VER) ||
defined(__MINGW_H) || defined(__GNUC__)) +#define
HI_RES_CLK_OK		 1		  /* it's ok to use
RDTSC opcode */ + +#if defined(_MSC_VER) && defined(_m_x64)
+#include <intrin.h>
+#pragma intrinsic(__rdtsc)
+#endif
+
+#endif
+
+#if defined(HI_RES_CLK_OK)
+	uint_32t x[2];
+#if   defined(__BORLANDC__)
+#define COMPILER_ID "BCC"
+	_asm { push edx };
+	__emit__(0x0F, 0x31);	/* RDTSC instruction */
+	_asm { pop  edx };
+	_asm { mov x[0], eax };
+#elif defined(_MSC_VER)
+#define COMPILER_ID "MSC"
+#if defined(_MSC_VER) && defined(_m_x64)
+	x[0] = (uint_32t) __rdtsc();
+#else
+	_asm { push  edx };
+	_asm { _emit 0fh }; _asm { _emit 031h };
+	_asm { pop   edx };
+	_asm { mov x[0], eax };
+#endif
+#elif defined(__MINGW_H) || defined(__GNUC__)
+#define COMPILER_ID "GCC"
+	asm volatile("rdtsc" : "=a"(x[0]), "=d"(x[1]));
+#else
+#error  "HI_RES_CLK_OK -- but no assembler code for this platform (?)"
+#endif
+	return x[0];
+#else
+	/* avoid annoying MSVC 9.0 compiler warning #4720 in ANSI
mode! */ +#if (!defined(_MSC_VER)) || (!defined(__STDC__)) || (_MSC_VER
< 1300)
+	fatalerror("No support for RDTSC on this CPU platform\n");
+#endif
+	return 0;
+#endif /* defined(HI_RES_CLK_OK) */
+	}
+
+/******** OS-specific calls for setting priorities and sleeping ******/
+
+#if defined(__linux) || defined(__linux__) || defined(linux) ||
defined(__gnu_linux__) +#include <unistd.h>
+#define _GOT_OS_sleep		(1)
+void OS_sleep(uint_t m_sec)
+	{
+	usleep(m_sec * 1000);
+	}
+#endif
+
+#ifndef _GOT_OS_SET_PRIORITY
+/* dummy routines if nothing is available */
+int OS_set_high_priority(void)
+	{
+	return 0;
+	}
+int OS_set_normal_priority(void)
+	{
+	return 0;
+	}
+#endif
+
+#ifndef _GOT_OS_sleep
+uint_32t OS_sleep(uint_32t m_sec)
+	{
+	return m_sec;	/* avoid compiler warnings */
+	}
+#endif
+
+#ifndef COMPILER_ID
+#define COMPILER_ID "(unknown)"
+#endif
+
+#if SKEIN_TREE_HASH
+#define MAX_TREE_MSG_LEN  (1 << 12)
+
+#define MSG_BITS_CNT (ARRAY_SIZE(MSG_BITS) / sizeof(MSG_BITS[0]))
+
+	uint_t	  i, j, k, blk_size, data_type, hashbits,
msgbits, keybytes, blkbytes, keytypes;
+	u08b_t	  data[MAX_BYTES + 4], key[MAX_BYTES + 4],
hashval[MAX_BYTES + 4];
+	const char *msg_type;
+	hash_state   s;
+
+	rand_init(SKEIN_MK_64(0xDEADBEEF, 0)); /* init PRNG with
repeatable value */
+	kat_hash = ~0u;
+	keytypes =  0;
+
+#ifdef SKEIN_DEBUG
+	/* first, show some "raw" Threefish + feedforward block calls,
with round-by-round debug info if enabled */
+	if (SKEIN_DEBUG_FLAG && !(verbose & V_KAT_NO_3FISH)) {
+		k =
SKEIN_DEBUG_FLAG;					/* save debug
flag value */
+		SKEIN_DEBUG_FLAG  = THREEFISH_DEBUG_ALL &
~SKEIN_DEBUG_HDR; /* turn on full debug detail, use Threefish name */
+		SKEIN_DEBUG_FLAG |= (k & SKEIN_DEBUG_PERMUTE);
+#else
+	if (verbose & V_KAT_DO_3FISH)
{				   /* allow non-SKEIN_DEBUG testing */
+#endif
+	for (blk_size = 256; blk_size <= 1024; blk_size *= 2) {
+	if (blk_size_mask && (blk_size & blk_size_mask) == 0)
+		continue;
+	for (data_type = DATA_TYPE_ZERO; data_type <= DATA_TYPE_INC;
data_type++) {
+		switch (data_type) {
+		case DATA_TYPE_ZERO:
+			memset(data, 0, sizeof(data));
+			memset(key, 0, sizeof(key));
+			break;
+		case DATA_TYPE_INC:
+			for (i = 0; i < MAX_BYTES; i++) {
+				key[i] = (u08b_t)i;
+				data[i] = (u08b_t)~key[i];
+				}
+			break;
+		default:
+			continue;
+		}
+#ifdef SKEIN_DEBUG
+		switch (blk_size) {
+		case  256:
+			printf("\n:Threefish-256: ");
+			break;
+		case  512:
+			printf("\n:Threefish-512: ");
+			break;
+		case 1024:
+			printf("\n:Threefish-1024:");
+			break;
+		}
+		printf(" encryption + plaintext feedforward
(round-by-round):\n"); +#endif
+		memset(&s, 0, sizeof(s));
+		s. u. h. hashbit_len = blk_size;
+		skein_get64_LSB_first(s.u.h.T, key, 2);	   /*
init T[] */
+		skein_get64_LSB_first(s.u.ctx1024.X, key + 2 * 8,
blk_size / 64);  /* init X[] */
+		switch (blk_size) {
+		case  256:
+			skein_256_process_block(&s.u.ctx_256, data, 1,
0);
+			break;
+		case  512:
+			skein_512_process_block(&s.u.ctx_512, data, 1,
0);
+			break;
+		case 1024:
+			skein_1024_process_block(&s.u.ctx1024, data,
1, 0);
+			break;
+		}
+#ifdef SKEIN_DEBUG
+		printf("++++++++++++++++++++++++++++++++++++++\n");
+#endif
+		}
+	}
+#ifdef SKEIN_DEBUG
+		SKEIN_DEBUG_FLAG = k;
+#endif
+		}
+
+	for (data_type = DATA_TYPE_ZERO; data_type < DATA_TYPE_CNT;
data_type++) {
+		msg_type = TYPE_NAMES[data_type];
+		switch (data_type) {
+		case DATA_TYPE_ZERO:
+			memset(data, 0, sizeof(data));
+			memset(key, 0, sizeof(key));
+			break;
+		case DATA_TYPE_INC:
+			for (i = 0; i < MAX_BYTES; i++) {
+				key[i] = (u08b_t)i;
+				data[i] = (u08b_t)~key[i];
+				}
+			break;
+		case DATA_TYPE_MAC:
+			rand_bytes(key, sizeof(key));
+		case DATA_TYPE_RAND:
+			rand_bytes(data, sizeof(data));
+			break;
+		case DATA_TYPE_TREE:
+			if (verbose & V_KAT_NO_TREE)
+			continue;
+			break;
+		default:	/* should never get here */
+			fatalerror("Invalid data type: %d --> '%s'",
data_type, msg_type);
+			break;
+	}
+		for (blk_size = 256; blk_size <= 1024; blk_size *= 2) {
+			if (blk_size_mask && (blk_size &
blk_size_mask) == 0)
+				continue;
+	if (data_type == DATA_TYPE_TREE) {
+#if SKEIN_TREE_HASH
+		skein_gen_KAT_tree(blk_size);
+#endif
+				continue;
+			}
+	if (verbose & V_KAT_NO_SEQ)
+		continue;
+	blkbytes = blk_size / 8;
+	for (j = 0; j <  MSG_BITS_CNT; j++)
+	for (k = 0; k < HASH_BITS_CNT; k++) {
+		msgbits  =  MSG_BITS[j];  /* message length   */
+		hashbits = HASH_BITS[k];  /* hash result size */
+		assert(MAX_BYTES * 8 >= hashbits && MAX_BYTES * 8 >=
msgbits);
+		if (msgbits != 1024 && hashbits != blk_size
&& !(verbose & V_KAT_LONG))
+			continue;   /* keep the output size
reasonable, unless verbose */
+		if (verbose & V_KAT_SHORT) {
+			/* -v2 ==> generate "short" KAT set by
filtering out most vectors */
+			if (data_type != DATA_TYPE_INC)
+			continue;
+			if (msgbits != 8 && msgbits != blk_size &&
msgbits != 2 * blk_size)
+			continue;
+			if (!short_KAT_OK(blk_size, hashbits))
+			continue;
+			}
+		switch (blk_size) {
+		case  256:
+			printf("\n:Skein-256: ");
+			break;
+		case  512:
+			printf("\n:Skein-512: ");
+			break;
+		case 1024:
+			printf("\n:Skein-1024:");
+			break;
+		}
+		printf(" %4d-bit hash, msgLen =%6d bits", hashbits,
msgbits);
+		if (!(verbose & V_KAT_SHORT))
+			printf(", data = '%s'", msg_type);
+		printf("\n\nMessage data:\n");
+		if (msgbits == 0)
+			printf("	(none)\n");
+		else
+			show_bytes((msgbits + 7) / 8, data);
+		switch (data_type) {
+		default:			/* straight hash value
*/
+				if (skein_hash(blk_size, hashbits,
data, msgbits, hashval) != SKEIN_SUCCESS)
+				fatalerror("Skein_Hash() error!");
+				break;
+		case DATA_TYPE_MAC:
+		/* include some MAC computations in KAT file */
+		switch (keytypes++) {
+		/* sequence thru different MAC key lengths */
+		case 0:
+			keybytes = blkbytes / 2;
+			break;
+		case 1:
+			keybytes = blkbytes;
+			break;
+		case 2:
+			keybytes = blkbytes  + 1;
+			break;
+		case 3:
+			keybytes = blkbytes * 2 + 1;
+			break;
+		default:
+			keybytes = 0;	   /* not actually a MAC
this time, but use init_ext() */
+			keytypes  = 0;	   /* start the cycle
again next time */
+		}
+			printf("MAC key = %4d bytes:\n", keybytes);
+		if (keybytes)		   /* show MAC key, if
any */
+			show_bytes(keybytes, key);
+		else
+			printf("	(none)		  /* use
init_ext() call */\n");
+		if (skein_init_ext(blk_size, &s, hashbits,
SKEIN_CFG_TREE_INFO_SEQUENTIAL, key, keybytes) != SKEIN_SUCCESS)
+			fatalerror("skein_init_ext() error!");
+		if (skein_update(&s, data, msgbits) != SKEIN_SUCCESS)
+			fatalerror("skein_update() error!");
+		if (skein_final(&s, hashval) != SKEIN_SUCCESS)
+			fatalerror("skein_final() error!");
+		break;
+		case DATA_TYPE_TREE:
+			assert(0);
+			break;
+		}
+		printf("Result:\n");
+		show_bytes((hashbits + 7) / 8, hashval);
+		printf("--------------------------------\n");
+		}
+	}
+		}
+	if (!_quiet_)
+		fprintf(stderr, "kat_hash = %08X\n", kat_hash ^
0x150183D2);
+	}
+
+#if defined(SKEIN_XMM)
+	printf(" 32-XMM, ");
+#else
+	printf(" %2u-bit, ", (uint_t)(8 * sizeof(size_t)));
+#endif
+	printf("%s%s", COMPILER_ID, CVER);
+
+	/* do we need to show unroll amount? */
+#if defined(SKEIN_USE_ASM) && SKEIN_USE_ASM
+	printf(" [asm=");
+#define _SC_DO_LOOP_ (1)
+#elif defined(SKEIN_LOOP)
+	printf(" [ C =");
+#define _SC_DO_LOOP_ (1)
+#endif
+
+#ifdef _SC_DO_LOOP_
+	printf("%c", (skein_256_unroll_cnt()) ? '0' +
skein_256_unroll_cnt() : '.');
+	printf("%c", (skein_512_unroll_cnt()) ? '0' +
skein_512_unroll_cnt() : '.');
+	printf("%c", (skein_1024_unroll_cnt()) ? '0' +
skein_1024_unroll_cnt() : '.');
+	printf("]");
+#endif
+	}
+
+#ifdef compiler_version
+	char	 CVER[20];			  /* avoid ANSI
compiler warnings for sprintf()! :-(( */ +
+	n		  = compiler_version;
+	CVER[0]	= '_';
+	CVER[1]	= 'v';
+	CVER[2]	= (char)('0' + ((n / 100) % 10));
+	CVER[3]	= '.';
+	CVER[4]	= (char)('0' + ((n / 10) % 10));
+	CVER[5]	= (char)('0' + ((n / 1) % 10));
+	CVER[6]	= 0;
+#else
+#define CVER ""
+#endif
+	if (target && target[0]) {
+		target_size = atoi(target);
+		for (i = 0; target[i]; i++)
+			if (target[i] == '.') {
+				repcnt = atoi(target + i + 1);
+				break;
+				}
+			if (repcnt == 0)
+				repcnt = 1;
+		}
+
+	assert(sizeof(dt) / (3 * TIMER_SAMPLE_CNT *
sizeof(dt[0][0][0])) >=
+		   ARRAY_SIZE(MSG_BYTES) / ARRAY_SIZE(MSG_BYTES[0]));
+	if (OS_set_high_priority())
+		printf("Unable to set thread to high priority\n");
+	fflush(stdout);			 /* let things calm
down */
+	OS_sleep(200);			  /* let things settle
down for a bit */
+	memset(dt, 0, sizeof(dt));
+	rand_bytes(b, sizeof(b));	 /* use random data for
testing */
+	for (i = 0; i < 4 * TIMER_SAMPLE_CNT; i++) { /* calibrate the
overhead for measuring time */
+		t0 = high_res_time();
+		t1 = high_res_time();
+		if (dt_min > t1 - t0)	  /* keep only the
minimum time */
+			dt_min = t1 - t0;
+		}
+	for (r = 0; r < repcnt; r++) {
+		/* first take all the data and store it in dt, with no
printf() activity */
+		for (n = 0; n < ARRAY_SIZE(MSG_BYTES) /
sizeof(MSG_BYTES[0]); n++) {
+			msg_bytes = MSG_BYTES[n];		/*
pick the message size (in bits) */
+		if (msg_bytes > MAX_BUFFER || msg_bytes == 0)
+			break;
+		if (target_size && target_size != msg_bytes)
+			continue;
+		for (k = 0; k < 3; k++) {
+				/* cycle thru the different block
sizes */
+			blk_size = 256 << k;
+			t0 = high_res_time();
+			t1 = high_res_time();
+#define one_test(BITS)					   \
+		skein##BITS##_init(&s.u.ctx##BITS, BITS);	  \
+		skein##BITS##_update(&s.u.ctx##BITS, b, msg_bytes);\
+		skein##BITS##_final(&s.u.ctx##BITS, hashval);
+
+			OS_sleep(0);			/* yield
the time slice to OS */
+			for (i = 0; i < TIMER_SAMPLE_CNT; i++) {
+
high_res_time();			/* prime the pump */
+				switch (blk_size) {
+				case  256:
+
one_test(_256);		  /* prime the pump */
+					t0 = high_res_time();
+
one_test(_256);		  /* do it twice for some averaging */
+					one_test(_256);
+					t1 = high_res_time();
+					break;
+				case  512:
+					one_test(_512);
+					t0 = high_res_time();
+					one_test(_512);
+					one_test(_512);
+					t1 = high_res_time();
+					break;
+				case 1024:
+					one_test(1024);
+					t0 = high_res_time();
+					one_test(1024);
+					one_test(1024);
+					t1 = high_res_time();
+					break;
+				}
+				dt[n][k][i] = ((t1 - t0) - dt_min) /
2; /* adjust for high_res_time() overhead */
+				}
+			}
+	}
+		OS_set_normal_priority();
+
+	if (target_size == 0) {
+	printf("\nSkein performance, in clks per byte, dt_min = %4d
clks.\n", dt_min);
+	printf("		 [compiled %s,%s  by  '%s%s',
%u-bit]\n", __TIME__, __DATE__, COMPILER_ID, CVER, (uint_t)(8 *
sizeof(size_t)));
+	printf("
=================================================================\n");
+	printf("		 ||			   Skein
block size			   |\n");
+	printf("
||--------------------------------------------------------------|\n");
+	printf(" Message ||	   256 bits	 |	   512
bits	 |	  1024 bits	 |\n");
+	printf(" Length
||====================|====================|====================|\n");
+	printf(" (bytes) ||		min		median
|	 min	median  |	 min	 median  |\n");
+
printf("=========||====================|====================|====================|\n");
+	}
+
+		/* now display the results */
+	for (n = 0; n < ARRAY_SIZE(MSG_BYTES) / sizeof(MSG_BYTES[0]);
n++) {
+		msg_bytes = MSG_BYTES[n];	   /* pick the
message size (in bits) */
+	if (msg_bytes > MAX_BUFFER || msg_bytes == 0)
+		break;
+	if (target_size && target_size != msg_bytes)
+		continue;
+	printf("%7d_ ||", msg_bytes);
+	for (k = 0; k < 3; k++) {
+		/* cycle thru the different Skein block sizes */
+		/* here with dt[n][k][] full of time differences */
+		/* discard high/low, then show min/median of the rest,
in clks/byte */
+		qsort(dt[n][k], TIMER_SAMPLE_CNT, sizeof(dt[0][0][0]),
compare_uint_32t);
+		printf(" %8.2f %8.2f  |", dt[n][k][1] /
(double)msg_bytes, dt[n][k][TIMER_SAMPLE_CNT / 2] / (double)msg_bytes);
+		}
+	show_compiler(CVER);
+	printf("\n");
+	if (target_size == 0 && target && target[0] && repcnt == 1) {
+				/* show the details */
+		for (k = 0; k < 3; k++) {
+			printf("%4d: ", 256 << k);
+			for (i = 0; i < TIMER_SAMPLE_CNT; i++)
+			printf("%8d", dt[n][k][i]);
+			printf("\n");
+			}
+		}
+	}
+		}
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+	if (target_size == 0) {
+
printf("=========||====================|====================|====================|\n");
+		printf("Code
Size||
|					|
|\n");
+
printf("=========||====================|====================|====================|\n");
+		printf("	API  ||		%12d
bytes	   | 	%12d bytes		|
%12d bytes	 |",
+				(int)skein_256_API_codesize(),
+				(int)skein_512_API_codesize(),
+				(int)skein_1024_API_codesize());
+		show_compiler(CVER);
+		printf("\n");
+		printf("  Block  || %12d bytes | %12d bytes | %12d
bytes |",
+
(int)skein_256_process_block_codesize(),
+
(int)skein_512_process_block_codesize(),
+
(int)skein_1024_process_block_codesize());
+		show_compiler(CVER);
+		printf("\n");
+		}
+#endif
+	}
-- 
2.5.0



More information about the devel mailing list