1294328Sdes/* $OpenBSD: umac.c,v 1.11 2014/07/22 07:13:42 guenther Exp $ */ 2180744Sdes/* ----------------------------------------------------------------------- 3180744Sdes * 4180744Sdes * umac.c -- C Implementation UMAC Message Authentication 5180744Sdes * 6180744Sdes * Version 0.93b of rfc4418.txt -- 2006 July 18 7180744Sdes * 8180744Sdes * For a full description of UMAC message authentication see the UMAC 9180744Sdes * world-wide-web page at http://www.cs.ucdavis.edu/~rogaway/umac 10180744Sdes * Please report bugs and suggestions to the UMAC webpage. 11180744Sdes * 12180744Sdes * Copyright (c) 1999-2006 Ted Krovetz 13180744Sdes * 14180744Sdes * Permission to use, copy, modify, and distribute this software and 15180744Sdes * its documentation for any purpose and with or without fee, is hereby 16180744Sdes * granted provided that the above copyright notice appears in all copies 17180744Sdes * and in supporting documentation, and that the name of the copyright 18180744Sdes * holder not be used in advertising or publicity pertaining to 19180744Sdes * distribution of the software without specific, written prior permission. 20180744Sdes * 21180744Sdes * Comments should be directed to Ted Krovetz (tdk@acm.org) 22180744Sdes * 23180744Sdes * ---------------------------------------------------------------------- */ 24180744Sdes 25180744Sdes /* ////////////////////// IMPORTANT NOTES ///////////////////////////////// 26180744Sdes * 27180744Sdes * 1) This version does not work properly on messages larger than 16MB 28180744Sdes * 29180744Sdes * 2) If you set the switch to use SSE2, then all data must be 16-byte 30180744Sdes * aligned 31180744Sdes * 32180744Sdes * 3) When calling the function umac(), it is assumed that msg is in 33180744Sdes * a writable buffer of length divisible by 32 bytes. The message itself 34180744Sdes * does not have to fill the entire buffer, but bytes beyond msg may be 35180744Sdes * zeroed. 36180744Sdes * 37180744Sdes * 4) Three free AES implementations are supported by this implementation of 38180744Sdes * UMAC. Paulo Barreto's version is in the public domain and can be found 39180744Sdes * at http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ (search for 40180744Sdes * "Barreto"). The only two files needed are rijndael-alg-fst.c and 41180744Sdes * rijndael-alg-fst.h. Brian Gladman's version is distributed with the GNU 42180744Sdes * Public lisence at http://fp.gladman.plus.com/AES/index.htm. It 43180744Sdes * includes a fast IA-32 assembly version. The OpenSSL crypo library is 44180744Sdes * the third. 45180744Sdes * 46180744Sdes * 5) With FORCE_C_ONLY flags set to 0, incorrect results are sometimes 47180744Sdes * produced under gcc with optimizations set -O3 or higher. Dunno why. 48180744Sdes * 49180744Sdes /////////////////////////////////////////////////////////////////////// */ 50180744Sdes 51180744Sdes/* ---------------------------------------------------------------------- */ 52180744Sdes/* --- User Switches ---------------------------------------------------- */ 53180744Sdes/* ---------------------------------------------------------------------- */ 54180744Sdes 55248619Sdes#ifndef UMAC_OUTPUT_LEN 56180744Sdes#define UMAC_OUTPUT_LEN 8 /* Alowable: 4, 8, 12, 16 */ 57248619Sdes#endif 58248619Sdes 59248619Sdes#if UMAC_OUTPUT_LEN != 4 && UMAC_OUTPUT_LEN != 8 && \ 60248619Sdes UMAC_OUTPUT_LEN != 12 && UMAC_OUTPUT_LEN != 16 61248619Sdes# error UMAC_OUTPUT_LEN must be defined to 4, 8, 12 or 16 62248619Sdes#endif 63248619Sdes 64180744Sdes/* #define FORCE_C_ONLY 1 ANSI C and 64-bit integers req'd */ 65180744Sdes/* #define AES_IMPLEMENTAION 1 1 = OpenSSL, 2 = Barreto, 3 = Gladman */ 66180744Sdes/* #define SSE2 0 Is SSE2 is available? */ 67180744Sdes/* #define RUN_TESTS 0 Run basic correctness/speed tests */ 68180744Sdes/* #define UMAC_AE_SUPPORT 0 Enable auhthenticated encrytion */ 69180744Sdes 70180744Sdes/* ---------------------------------------------------------------------- */ 71180744Sdes/* -- Global Includes --------------------------------------------------- */ 72180744Sdes/* ---------------------------------------------------------------------- */ 73180744Sdes 74180744Sdes#include "includes.h" 75180744Sdes#include <sys/types.h> 76180744Sdes#include <string.h> 77294328Sdes#include <stdio.h> 78180744Sdes#include <stdlib.h> 79180744Sdes#include <stddef.h> 80180744Sdes 81294328Sdes#include "xmalloc.h" 82294328Sdes#include "umac.h" 83294328Sdes#include "misc.h" 84294328Sdes 85180744Sdes/* ---------------------------------------------------------------------- */ 86180744Sdes/* --- Primitive Data Types --- */ 87180744Sdes/* ---------------------------------------------------------------------- */ 88180744Sdes 89180744Sdes/* The following assumptions may need change on your system */ 90180744Sdestypedef u_int8_t UINT8; /* 1 byte */ 91180744Sdestypedef u_int16_t UINT16; /* 2 byte */ 92180744Sdestypedef u_int32_t UINT32; /* 4 byte */ 93180744Sdestypedef u_int64_t UINT64; /* 8 bytes */ 94180744Sdestypedef unsigned int UWORD; /* Register */ 95180744Sdes 96180744Sdes/* ---------------------------------------------------------------------- */ 97180744Sdes/* --- Constants -------------------------------------------------------- */ 98180744Sdes/* ---------------------------------------------------------------------- */ 99180744Sdes 100180744Sdes#define UMAC_KEY_LEN 16 /* UMAC takes 16 bytes of external key */ 101180744Sdes 102180744Sdes/* Message "words" are read from memory in an endian-specific manner. */ 103180744Sdes/* For this implementation to behave correctly, __LITTLE_ENDIAN__ must */ 104180744Sdes/* be set true if the host computer is little-endian. */ 105180744Sdes 106180744Sdes#if BYTE_ORDER == LITTLE_ENDIAN 107180744Sdes#define __LITTLE_ENDIAN__ 1 108180744Sdes#else 109180744Sdes#define __LITTLE_ENDIAN__ 0 110180744Sdes#endif 111180744Sdes 112180744Sdes/* ---------------------------------------------------------------------- */ 113180744Sdes/* ---------------------------------------------------------------------- */ 114180744Sdes/* ----- Architecture Specific ------------------------------------------ */ 115180744Sdes/* ---------------------------------------------------------------------- */ 116180744Sdes/* ---------------------------------------------------------------------- */ 117180744Sdes 118180744Sdes 119180744Sdes/* ---------------------------------------------------------------------- */ 120180744Sdes/* ---------------------------------------------------------------------- */ 121180744Sdes/* ----- Primitive Routines --------------------------------------------- */ 122180744Sdes/* ---------------------------------------------------------------------- */ 123180744Sdes/* ---------------------------------------------------------------------- */ 124180744Sdes 125180744Sdes 126180744Sdes/* ---------------------------------------------------------------------- */ 127180744Sdes/* --- 32-bit by 32-bit to 64-bit Multiplication ------------------------ */ 128180744Sdes/* ---------------------------------------------------------------------- */ 129180744Sdes 130180744Sdes#define MUL64(a,b) ((UINT64)((UINT64)(UINT32)(a) * (UINT64)(UINT32)(b))) 131180744Sdes 132180744Sdes/* ---------------------------------------------------------------------- */ 133180744Sdes/* --- Endian Conversion --- Forcing assembly on some platforms */ 134180744Sdes/* ---------------------------------------------------------------------- */ 135180744Sdes 136180744Sdes#if (__LITTLE_ENDIAN__) 137294328Sdes#define LOAD_UINT32_REVERSED(p) get_u32(p) 138294328Sdes#define STORE_UINT32_REVERSED(p,v) put_u32(p,v) 139180744Sdes#else 140294328Sdes#define LOAD_UINT32_REVERSED(p) get_u32_le(p) 141294328Sdes#define STORE_UINT32_REVERSED(p,v) put_u32_le(p,v) 142180744Sdes#endif 143180744Sdes 144294328Sdes#define LOAD_UINT32_LITTLE(p) (get_u32_le(p)) 145294328Sdes#define STORE_UINT32_BIG(p,v) put_u32(p, v) 146294328Sdes 147180744Sdes/* ---------------------------------------------------------------------- */ 148180744Sdes/* ---------------------------------------------------------------------- */ 149180744Sdes/* ----- Begin KDF & PDF Section ---------------------------------------- */ 150180744Sdes/* ---------------------------------------------------------------------- */ 151180744Sdes/* ---------------------------------------------------------------------- */ 152180744Sdes 153180744Sdes/* UMAC uses AES with 16 byte block and key lengths */ 154180744Sdes#define AES_BLOCK_LEN 16 155180744Sdes 156180744Sdes/* OpenSSL's AES */ 157294328Sdes#ifdef WITH_OPENSSL 158180744Sdes#include "openbsd-compat/openssl-compat.h" 159180744Sdes#ifndef USE_BUILTIN_RIJNDAEL 160180744Sdes# include <openssl/aes.h> 161180744Sdes#endif 162180744Sdestypedef AES_KEY aes_int_key[1]; 163180744Sdes#define aes_encryption(in,out,int_key) \ 164180744Sdes AES_encrypt((u_char *)(in),(u_char *)(out),(AES_KEY *)int_key) 165180744Sdes#define aes_key_setup(key,int_key) \ 166255767Sdes AES_set_encrypt_key((const u_char *)(key),UMAC_KEY_LEN*8,int_key) 167294328Sdes#else 168294328Sdes#include "rijndael.h" 169294328Sdes#define AES_ROUNDS ((UMAC_KEY_LEN / 4) + 6) 170294328Sdestypedef UINT8 aes_int_key[AES_ROUNDS+1][4][4]; /* AES internal */ 171294328Sdes#define aes_encryption(in,out,int_key) \ 172294328Sdes rijndaelEncrypt((u32 *)(int_key), AES_ROUNDS, (u8 *)(in), (u8 *)(out)) 173294328Sdes#define aes_key_setup(key,int_key) \ 174294328Sdes rijndaelKeySetupEnc((u32 *)(int_key), (const unsigned char *)(key), \ 175294328Sdes UMAC_KEY_LEN*8) 176294328Sdes#endif 177180744Sdes 178180744Sdes/* The user-supplied UMAC key is stretched using AES in a counter 179180744Sdes * mode to supply all random bits needed by UMAC. The kdf function takes 180180744Sdes * an AES internal key representation 'key' and writes a stream of 181180750Sdes * 'nbytes' bytes to the memory pointed at by 'bufp'. Each distinct 182180744Sdes * 'ndx' causes a distinct byte stream. 183180744Sdes */ 184180750Sdesstatic void kdf(void *bufp, aes_int_key key, UINT8 ndx, int nbytes) 185180744Sdes{ 186180744Sdes UINT8 in_buf[AES_BLOCK_LEN] = {0}; 187180744Sdes UINT8 out_buf[AES_BLOCK_LEN]; 188180750Sdes UINT8 *dst_buf = (UINT8 *)bufp; 189180744Sdes int i; 190180744Sdes 191180744Sdes /* Setup the initial value */ 192180744Sdes in_buf[AES_BLOCK_LEN-9] = ndx; 193180744Sdes in_buf[AES_BLOCK_LEN-1] = i = 1; 194180744Sdes 195180744Sdes while (nbytes >= AES_BLOCK_LEN) { 196180744Sdes aes_encryption(in_buf, out_buf, key); 197180744Sdes memcpy(dst_buf,out_buf,AES_BLOCK_LEN); 198180744Sdes in_buf[AES_BLOCK_LEN-1] = ++i; 199180744Sdes nbytes -= AES_BLOCK_LEN; 200180744Sdes dst_buf += AES_BLOCK_LEN; 201180744Sdes } 202180744Sdes if (nbytes) { 203180744Sdes aes_encryption(in_buf, out_buf, key); 204180744Sdes memcpy(dst_buf,out_buf,nbytes); 205180744Sdes } 206180744Sdes} 207180744Sdes 208180744Sdes/* The final UHASH result is XOR'd with the output of a pseudorandom 209180744Sdes * function. Here, we use AES to generate random output and 210180744Sdes * xor the appropriate bytes depending on the last bits of nonce. 211180744Sdes * This scheme is optimized for sequential, increasing big-endian nonces. 212180744Sdes */ 213180744Sdes 214180744Sdestypedef struct { 215180744Sdes UINT8 cache[AES_BLOCK_LEN]; /* Previous AES output is saved */ 216180744Sdes UINT8 nonce[AES_BLOCK_LEN]; /* The AES input making above cache */ 217180744Sdes aes_int_key prf_key; /* Expanded AES key for PDF */ 218180744Sdes} pdf_ctx; 219180744Sdes 220180744Sdesstatic void pdf_init(pdf_ctx *pc, aes_int_key prf_key) 221180744Sdes{ 222180744Sdes UINT8 buf[UMAC_KEY_LEN]; 223180744Sdes 224180744Sdes kdf(buf, prf_key, 0, UMAC_KEY_LEN); 225180744Sdes aes_key_setup(buf, pc->prf_key); 226180744Sdes 227180744Sdes /* Initialize pdf and cache */ 228180744Sdes memset(pc->nonce, 0, sizeof(pc->nonce)); 229180744Sdes aes_encryption(pc->nonce, pc->cache, pc->prf_key); 230180744Sdes} 231180744Sdes 232255767Sdesstatic void pdf_gen_xor(pdf_ctx *pc, const UINT8 nonce[8], UINT8 buf[8]) 233180744Sdes{ 234180744Sdes /* 'ndx' indicates that we'll be using the 0th or 1st eight bytes 235180744Sdes * of the AES output. If last time around we returned the ndx-1st 236180744Sdes * element, then we may have the result in the cache already. 237180744Sdes */ 238180744Sdes 239180744Sdes#if (UMAC_OUTPUT_LEN == 4) 240180744Sdes#define LOW_BIT_MASK 3 241180744Sdes#elif (UMAC_OUTPUT_LEN == 8) 242180744Sdes#define LOW_BIT_MASK 1 243180744Sdes#elif (UMAC_OUTPUT_LEN > 8) 244180744Sdes#define LOW_BIT_MASK 0 245180744Sdes#endif 246255767Sdes union { 247255767Sdes UINT8 tmp_nonce_lo[4]; 248255767Sdes UINT32 align; 249255767Sdes } t; 250180744Sdes#if LOW_BIT_MASK != 0 251180744Sdes int ndx = nonce[7] & LOW_BIT_MASK; 252180744Sdes#endif 253255767Sdes *(UINT32 *)t.tmp_nonce_lo = ((const UINT32 *)nonce)[1]; 254255767Sdes t.tmp_nonce_lo[3] &= ~LOW_BIT_MASK; /* zero last bit */ 255180744Sdes 256255767Sdes if ( (((UINT32 *)t.tmp_nonce_lo)[0] != ((UINT32 *)pc->nonce)[1]) || 257255767Sdes (((const UINT32 *)nonce)[0] != ((UINT32 *)pc->nonce)[0]) ) 258180744Sdes { 259255767Sdes ((UINT32 *)pc->nonce)[0] = ((const UINT32 *)nonce)[0]; 260255767Sdes ((UINT32 *)pc->nonce)[1] = ((UINT32 *)t.tmp_nonce_lo)[0]; 261180744Sdes aes_encryption(pc->nonce, pc->cache, pc->prf_key); 262180744Sdes } 263180744Sdes 264180744Sdes#if (UMAC_OUTPUT_LEN == 4) 265180744Sdes *((UINT32 *)buf) ^= ((UINT32 *)pc->cache)[ndx]; 266180744Sdes#elif (UMAC_OUTPUT_LEN == 8) 267180744Sdes *((UINT64 *)buf) ^= ((UINT64 *)pc->cache)[ndx]; 268180744Sdes#elif (UMAC_OUTPUT_LEN == 12) 269180744Sdes ((UINT64 *)buf)[0] ^= ((UINT64 *)pc->cache)[0]; 270180744Sdes ((UINT32 *)buf)[2] ^= ((UINT32 *)pc->cache)[2]; 271180744Sdes#elif (UMAC_OUTPUT_LEN == 16) 272180744Sdes ((UINT64 *)buf)[0] ^= ((UINT64 *)pc->cache)[0]; 273180744Sdes ((UINT64 *)buf)[1] ^= ((UINT64 *)pc->cache)[1]; 274180744Sdes#endif 275180744Sdes} 276180744Sdes 277180744Sdes/* ---------------------------------------------------------------------- */ 278180744Sdes/* ---------------------------------------------------------------------- */ 279180744Sdes/* ----- Begin NH Hash Section ------------------------------------------ */ 280180744Sdes/* ---------------------------------------------------------------------- */ 281180744Sdes/* ---------------------------------------------------------------------- */ 282180744Sdes 283180744Sdes/* The NH-based hash functions used in UMAC are described in the UMAC paper 284180744Sdes * and specification, both of which can be found at the UMAC website. 285180744Sdes * The interface to this implementation has two 286180744Sdes * versions, one expects the entire message being hashed to be passed 287180744Sdes * in a single buffer and returns the hash result immediately. The second 288180744Sdes * allows the message to be passed in a sequence of buffers. In the 289180744Sdes * muliple-buffer interface, the client calls the routine nh_update() as 290180744Sdes * many times as necessary. When there is no more data to be fed to the 291180744Sdes * hash, the client calls nh_final() which calculates the hash output. 292180744Sdes * Before beginning another hash calculation the nh_reset() routine 293180744Sdes * must be called. The single-buffer routine, nh(), is equivalent to 294180744Sdes * the sequence of calls nh_update() and nh_final(); however it is 295180744Sdes * optimized and should be prefered whenever the multiple-buffer interface 296180744Sdes * is not necessary. When using either interface, it is the client's 297180744Sdes * responsability to pass no more than L1_KEY_LEN bytes per hash result. 298180744Sdes * 299180744Sdes * The routine nh_init() initializes the nh_ctx data structure and 300180744Sdes * must be called once, before any other PDF routine. 301180744Sdes */ 302180744Sdes 303180744Sdes /* The "nh_aux" routines do the actual NH hashing work. They 304180744Sdes * expect buffers to be multiples of L1_PAD_BOUNDARY. These routines 305180744Sdes * produce output for all STREAMS NH iterations in one call, 306180744Sdes * allowing the parallel implementation of the streams. 307180744Sdes */ 308180744Sdes 309180744Sdes#define STREAMS (UMAC_OUTPUT_LEN / 4) /* Number of times hash is applied */ 310180744Sdes#define L1_KEY_LEN 1024 /* Internal key bytes */ 311180744Sdes#define L1_KEY_SHIFT 16 /* Toeplitz key shift between streams */ 312180744Sdes#define L1_PAD_BOUNDARY 32 /* pad message to boundary multiple */ 313180744Sdes#define ALLOC_BOUNDARY 16 /* Keep buffers aligned to this */ 314180744Sdes#define HASH_BUF_BYTES 64 /* nh_aux_hb buffer multiple */ 315180744Sdes 316180744Sdestypedef struct { 317180744Sdes UINT8 nh_key [L1_KEY_LEN + L1_KEY_SHIFT * (STREAMS - 1)]; /* NH Key */ 318240075Sdes UINT8 data [HASH_BUF_BYTES]; /* Incoming data buffer */ 319180744Sdes int next_data_empty; /* Bookeeping variable for data buffer. */ 320180744Sdes int bytes_hashed; /* Bytes (out of L1_KEY_LEN) incorperated. */ 321180744Sdes UINT64 state[STREAMS]; /* on-line state */ 322180744Sdes} nh_ctx; 323180744Sdes 324180744Sdes 325180744Sdes#if (UMAC_OUTPUT_LEN == 4) 326180744Sdes 327255767Sdesstatic void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen) 328180744Sdes/* NH hashing primitive. Previous (partial) hash result is loaded and 329180744Sdes* then stored via hp pointer. The length of the data pointed at by "dp", 330180744Sdes* "dlen", is guaranteed to be divisible by L1_PAD_BOUNDARY (32). Key 331180744Sdes* is expected to be endian compensated in memory at key setup. 332180744Sdes*/ 333180744Sdes{ 334180744Sdes UINT64 h; 335180744Sdes UWORD c = dlen / 32; 336180744Sdes UINT32 *k = (UINT32 *)kp; 337255767Sdes const UINT32 *d = (const UINT32 *)dp; 338180744Sdes UINT32 d0,d1,d2,d3,d4,d5,d6,d7; 339180744Sdes UINT32 k0,k1,k2,k3,k4,k5,k6,k7; 340180744Sdes 341180744Sdes h = *((UINT64 *)hp); 342180744Sdes do { 343180744Sdes d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1); 344180744Sdes d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3); 345180744Sdes d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5); 346180744Sdes d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7); 347180744Sdes k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3); 348180744Sdes k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7); 349180744Sdes h += MUL64((k0 + d0), (k4 + d4)); 350180744Sdes h += MUL64((k1 + d1), (k5 + d5)); 351180744Sdes h += MUL64((k2 + d2), (k6 + d6)); 352180744Sdes h += MUL64((k3 + d3), (k7 + d7)); 353180744Sdes 354180744Sdes d += 8; 355180744Sdes k += 8; 356180744Sdes } while (--c); 357180744Sdes *((UINT64 *)hp) = h; 358180744Sdes} 359180744Sdes 360180744Sdes#elif (UMAC_OUTPUT_LEN == 8) 361180744Sdes 362255767Sdesstatic void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen) 363180744Sdes/* Same as previous nh_aux, but two streams are handled in one pass, 364180744Sdes * reading and writing 16 bytes of hash-state per call. 365180744Sdes */ 366180744Sdes{ 367180744Sdes UINT64 h1,h2; 368180744Sdes UWORD c = dlen / 32; 369180744Sdes UINT32 *k = (UINT32 *)kp; 370255767Sdes const UINT32 *d = (const UINT32 *)dp; 371180744Sdes UINT32 d0,d1,d2,d3,d4,d5,d6,d7; 372180744Sdes UINT32 k0,k1,k2,k3,k4,k5,k6,k7, 373180744Sdes k8,k9,k10,k11; 374180744Sdes 375180744Sdes h1 = *((UINT64 *)hp); 376180744Sdes h2 = *((UINT64 *)hp + 1); 377180744Sdes k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3); 378180744Sdes do { 379180744Sdes d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1); 380180744Sdes d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3); 381180744Sdes d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5); 382180744Sdes d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7); 383180744Sdes k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7); 384180744Sdes k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11); 385180744Sdes 386180744Sdes h1 += MUL64((k0 + d0), (k4 + d4)); 387180744Sdes h2 += MUL64((k4 + d0), (k8 + d4)); 388180744Sdes 389180744Sdes h1 += MUL64((k1 + d1), (k5 + d5)); 390180744Sdes h2 += MUL64((k5 + d1), (k9 + d5)); 391180744Sdes 392180744Sdes h1 += MUL64((k2 + d2), (k6 + d6)); 393180744Sdes h2 += MUL64((k6 + d2), (k10 + d6)); 394180744Sdes 395180744Sdes h1 += MUL64((k3 + d3), (k7 + d7)); 396180744Sdes h2 += MUL64((k7 + d3), (k11 + d7)); 397180744Sdes 398180744Sdes k0 = k8; k1 = k9; k2 = k10; k3 = k11; 399180744Sdes 400180744Sdes d += 8; 401180744Sdes k += 8; 402180744Sdes } while (--c); 403180744Sdes ((UINT64 *)hp)[0] = h1; 404180744Sdes ((UINT64 *)hp)[1] = h2; 405180744Sdes} 406180744Sdes 407180744Sdes#elif (UMAC_OUTPUT_LEN == 12) 408180744Sdes 409255767Sdesstatic void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen) 410180744Sdes/* Same as previous nh_aux, but two streams are handled in one pass, 411180744Sdes * reading and writing 24 bytes of hash-state per call. 412180744Sdes*/ 413180744Sdes{ 414180744Sdes UINT64 h1,h2,h3; 415180744Sdes UWORD c = dlen / 32; 416180744Sdes UINT32 *k = (UINT32 *)kp; 417255767Sdes const UINT32 *d = (const UINT32 *)dp; 418180744Sdes UINT32 d0,d1,d2,d3,d4,d5,d6,d7; 419180744Sdes UINT32 k0,k1,k2,k3,k4,k5,k6,k7, 420180744Sdes k8,k9,k10,k11,k12,k13,k14,k15; 421180744Sdes 422180744Sdes h1 = *((UINT64 *)hp); 423180744Sdes h2 = *((UINT64 *)hp + 1); 424180744Sdes h3 = *((UINT64 *)hp + 2); 425180744Sdes k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3); 426180744Sdes k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7); 427180744Sdes do { 428180744Sdes d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1); 429180744Sdes d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3); 430180744Sdes d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5); 431180744Sdes d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7); 432180744Sdes k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11); 433180744Sdes k12 = *(k+12); k13 = *(k+13); k14 = *(k+14); k15 = *(k+15); 434180744Sdes 435180744Sdes h1 += MUL64((k0 + d0), (k4 + d4)); 436180744Sdes h2 += MUL64((k4 + d0), (k8 + d4)); 437180744Sdes h3 += MUL64((k8 + d0), (k12 + d4)); 438180744Sdes 439180744Sdes h1 += MUL64((k1 + d1), (k5 + d5)); 440180744Sdes h2 += MUL64((k5 + d1), (k9 + d5)); 441180744Sdes h3 += MUL64((k9 + d1), (k13 + d5)); 442180744Sdes 443180744Sdes h1 += MUL64((k2 + d2), (k6 + d6)); 444180744Sdes h2 += MUL64((k6 + d2), (k10 + d6)); 445180744Sdes h3 += MUL64((k10 + d2), (k14 + d6)); 446180744Sdes 447180744Sdes h1 += MUL64((k3 + d3), (k7 + d7)); 448180744Sdes h2 += MUL64((k7 + d3), (k11 + d7)); 449180744Sdes h3 += MUL64((k11 + d3), (k15 + d7)); 450180744Sdes 451180744Sdes k0 = k8; k1 = k9; k2 = k10; k3 = k11; 452180744Sdes k4 = k12; k5 = k13; k6 = k14; k7 = k15; 453180744Sdes 454180744Sdes d += 8; 455180744Sdes k += 8; 456180744Sdes } while (--c); 457180744Sdes ((UINT64 *)hp)[0] = h1; 458180744Sdes ((UINT64 *)hp)[1] = h2; 459180744Sdes ((UINT64 *)hp)[2] = h3; 460180744Sdes} 461180744Sdes 462180744Sdes#elif (UMAC_OUTPUT_LEN == 16) 463180744Sdes 464255767Sdesstatic void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen) 465180744Sdes/* Same as previous nh_aux, but two streams are handled in one pass, 466180744Sdes * reading and writing 24 bytes of hash-state per call. 467180744Sdes*/ 468180744Sdes{ 469180744Sdes UINT64 h1,h2,h3,h4; 470180744Sdes UWORD c = dlen / 32; 471180744Sdes UINT32 *k = (UINT32 *)kp; 472255767Sdes const UINT32 *d = (const UINT32 *)dp; 473180744Sdes UINT32 d0,d1,d2,d3,d4,d5,d6,d7; 474180744Sdes UINT32 k0,k1,k2,k3,k4,k5,k6,k7, 475180744Sdes k8,k9,k10,k11,k12,k13,k14,k15, 476180744Sdes k16,k17,k18,k19; 477180744Sdes 478180744Sdes h1 = *((UINT64 *)hp); 479180744Sdes h2 = *((UINT64 *)hp + 1); 480180744Sdes h3 = *((UINT64 *)hp + 2); 481180744Sdes h4 = *((UINT64 *)hp + 3); 482180744Sdes k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3); 483180744Sdes k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7); 484180744Sdes do { 485180744Sdes d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1); 486180744Sdes d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3); 487180744Sdes d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5); 488180744Sdes d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7); 489180744Sdes k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11); 490180744Sdes k12 = *(k+12); k13 = *(k+13); k14 = *(k+14); k15 = *(k+15); 491180744Sdes k16 = *(k+16); k17 = *(k+17); k18 = *(k+18); k19 = *(k+19); 492180744Sdes 493180744Sdes h1 += MUL64((k0 + d0), (k4 + d4)); 494180744Sdes h2 += MUL64((k4 + d0), (k8 + d4)); 495180744Sdes h3 += MUL64((k8 + d0), (k12 + d4)); 496180744Sdes h4 += MUL64((k12 + d0), (k16 + d4)); 497180744Sdes 498180744Sdes h1 += MUL64((k1 + d1), (k5 + d5)); 499180744Sdes h2 += MUL64((k5 + d1), (k9 + d5)); 500180744Sdes h3 += MUL64((k9 + d1), (k13 + d5)); 501180744Sdes h4 += MUL64((k13 + d1), (k17 + d5)); 502180744Sdes 503180744Sdes h1 += MUL64((k2 + d2), (k6 + d6)); 504180744Sdes h2 += MUL64((k6 + d2), (k10 + d6)); 505180744Sdes h3 += MUL64((k10 + d2), (k14 + d6)); 506180744Sdes h4 += MUL64((k14 + d2), (k18 + d6)); 507180744Sdes 508180744Sdes h1 += MUL64((k3 + d3), (k7 + d7)); 509180744Sdes h2 += MUL64((k7 + d3), (k11 + d7)); 510180744Sdes h3 += MUL64((k11 + d3), (k15 + d7)); 511180744Sdes h4 += MUL64((k15 + d3), (k19 + d7)); 512180744Sdes 513180744Sdes k0 = k8; k1 = k9; k2 = k10; k3 = k11; 514180744Sdes k4 = k12; k5 = k13; k6 = k14; k7 = k15; 515180744Sdes k8 = k16; k9 = k17; k10 = k18; k11 = k19; 516180744Sdes 517180744Sdes d += 8; 518180744Sdes k += 8; 519180744Sdes } while (--c); 520180744Sdes ((UINT64 *)hp)[0] = h1; 521180744Sdes ((UINT64 *)hp)[1] = h2; 522180744Sdes ((UINT64 *)hp)[2] = h3; 523180744Sdes ((UINT64 *)hp)[3] = h4; 524180744Sdes} 525180744Sdes 526180744Sdes/* ---------------------------------------------------------------------- */ 527180744Sdes#endif /* UMAC_OUTPUT_LENGTH */ 528180744Sdes/* ---------------------------------------------------------------------- */ 529180744Sdes 530180744Sdes 531180744Sdes/* ---------------------------------------------------------------------- */ 532180744Sdes 533255767Sdesstatic void nh_transform(nh_ctx *hc, const UINT8 *buf, UINT32 nbytes) 534180744Sdes/* This function is a wrapper for the primitive NH hash functions. It takes 535180744Sdes * as argument "hc" the current hash context and a buffer which must be a 536180744Sdes * multiple of L1_PAD_BOUNDARY. The key passed to nh_aux is offset 537180744Sdes * appropriately according to how much message has been hashed already. 538180744Sdes */ 539180744Sdes{ 540180744Sdes UINT8 *key; 541180744Sdes 542180744Sdes key = hc->nh_key + hc->bytes_hashed; 543180744Sdes nh_aux(key, buf, hc->state, nbytes); 544180744Sdes} 545180744Sdes 546180744Sdes/* ---------------------------------------------------------------------- */ 547180744Sdes 548180750Sdes#if (__LITTLE_ENDIAN__) 549180744Sdesstatic void endian_convert(void *buf, UWORD bpw, UINT32 num_bytes) 550180744Sdes/* We endian convert the keys on little-endian computers to */ 551180744Sdes/* compensate for the lack of big-endian memory reads during hashing. */ 552180744Sdes{ 553180744Sdes UWORD iters = num_bytes / bpw; 554180744Sdes if (bpw == 4) { 555180744Sdes UINT32 *p = (UINT32 *)buf; 556180744Sdes do { 557180744Sdes *p = LOAD_UINT32_REVERSED(p); 558180744Sdes p++; 559180744Sdes } while (--iters); 560180744Sdes } else if (bpw == 8) { 561180744Sdes UINT32 *p = (UINT32 *)buf; 562180744Sdes UINT32 t; 563180744Sdes do { 564180744Sdes t = LOAD_UINT32_REVERSED(p+1); 565180744Sdes p[1] = LOAD_UINT32_REVERSED(p); 566180744Sdes p[0] = t; 567180744Sdes p += 2; 568180744Sdes } while (--iters); 569180744Sdes } 570180744Sdes} 571180744Sdes#define endian_convert_if_le(x,y,z) endian_convert((x),(y),(z)) 572180744Sdes#else 573180744Sdes#define endian_convert_if_le(x,y,z) do{}while(0) /* Do nothing */ 574180744Sdes#endif 575180744Sdes 576180744Sdes/* ---------------------------------------------------------------------- */ 577180744Sdes 578180744Sdesstatic void nh_reset(nh_ctx *hc) 579180744Sdes/* Reset nh_ctx to ready for hashing of new data */ 580180744Sdes{ 581180744Sdes hc->bytes_hashed = 0; 582180744Sdes hc->next_data_empty = 0; 583180744Sdes hc->state[0] = 0; 584180744Sdes#if (UMAC_OUTPUT_LEN >= 8) 585180744Sdes hc->state[1] = 0; 586180744Sdes#endif 587180744Sdes#if (UMAC_OUTPUT_LEN >= 12) 588180744Sdes hc->state[2] = 0; 589180744Sdes#endif 590180744Sdes#if (UMAC_OUTPUT_LEN == 16) 591180744Sdes hc->state[3] = 0; 592180744Sdes#endif 593180744Sdes 594180744Sdes} 595180744Sdes 596180744Sdes/* ---------------------------------------------------------------------- */ 597180744Sdes 598180744Sdesstatic void nh_init(nh_ctx *hc, aes_int_key prf_key) 599180744Sdes/* Generate nh_key, endian convert and reset to be ready for hashing. */ 600180744Sdes{ 601180744Sdes kdf(hc->nh_key, prf_key, 1, sizeof(hc->nh_key)); 602180744Sdes endian_convert_if_le(hc->nh_key, 4, sizeof(hc->nh_key)); 603180744Sdes nh_reset(hc); 604180744Sdes} 605180744Sdes 606180744Sdes/* ---------------------------------------------------------------------- */ 607180744Sdes 608255767Sdesstatic void nh_update(nh_ctx *hc, const UINT8 *buf, UINT32 nbytes) 609180744Sdes/* Incorporate nbytes of data into a nh_ctx, buffer whatever is not an */ 610180744Sdes/* even multiple of HASH_BUF_BYTES. */ 611180744Sdes{ 612180744Sdes UINT32 i,j; 613180744Sdes 614180744Sdes j = hc->next_data_empty; 615180744Sdes if ((j + nbytes) >= HASH_BUF_BYTES) { 616180744Sdes if (j) { 617180744Sdes i = HASH_BUF_BYTES - j; 618180744Sdes memcpy(hc->data+j, buf, i); 619180744Sdes nh_transform(hc,hc->data,HASH_BUF_BYTES); 620180744Sdes nbytes -= i; 621180744Sdes buf += i; 622180744Sdes hc->bytes_hashed += HASH_BUF_BYTES; 623180744Sdes } 624180744Sdes if (nbytes >= HASH_BUF_BYTES) { 625180744Sdes i = nbytes & ~(HASH_BUF_BYTES - 1); 626180744Sdes nh_transform(hc, buf, i); 627180744Sdes nbytes -= i; 628180744Sdes buf += i; 629180744Sdes hc->bytes_hashed += i; 630180744Sdes } 631180744Sdes j = 0; 632180744Sdes } 633180744Sdes memcpy(hc->data + j, buf, nbytes); 634180744Sdes hc->next_data_empty = j + nbytes; 635180744Sdes} 636180744Sdes 637180744Sdes/* ---------------------------------------------------------------------- */ 638180744Sdes 639180744Sdesstatic void zero_pad(UINT8 *p, int nbytes) 640180744Sdes{ 641180744Sdes/* Write "nbytes" of zeroes, beginning at "p" */ 642180744Sdes if (nbytes >= (int)sizeof(UWORD)) { 643180744Sdes while ((ptrdiff_t)p % sizeof(UWORD)) { 644180744Sdes *p = 0; 645180744Sdes nbytes--; 646180744Sdes p++; 647180744Sdes } 648180744Sdes while (nbytes >= (int)sizeof(UWORD)) { 649180744Sdes *(UWORD *)p = 0; 650180744Sdes nbytes -= sizeof(UWORD); 651180744Sdes p += sizeof(UWORD); 652180744Sdes } 653180744Sdes } 654180744Sdes while (nbytes) { 655180744Sdes *p = 0; 656180744Sdes nbytes--; 657180744Sdes p++; 658180744Sdes } 659180744Sdes} 660180744Sdes 661180744Sdes/* ---------------------------------------------------------------------- */ 662180744Sdes 663180744Sdesstatic void nh_final(nh_ctx *hc, UINT8 *result) 664180744Sdes/* After passing some number of data buffers to nh_update() for integration 665180744Sdes * into an NH context, nh_final is called to produce a hash result. If any 666180744Sdes * bytes are in the buffer hc->data, incorporate them into the 667180744Sdes * NH context. Finally, add into the NH accumulation "state" the total number 668180744Sdes * of bits hashed. The resulting numbers are written to the buffer "result". 669180744Sdes * If nh_update was never called, L1_PAD_BOUNDARY zeroes are incorporated. 670180744Sdes */ 671180744Sdes{ 672180744Sdes int nh_len, nbits; 673180744Sdes 674180744Sdes if (hc->next_data_empty != 0) { 675180744Sdes nh_len = ((hc->next_data_empty + (L1_PAD_BOUNDARY - 1)) & 676180744Sdes ~(L1_PAD_BOUNDARY - 1)); 677180744Sdes zero_pad(hc->data + hc->next_data_empty, 678180744Sdes nh_len - hc->next_data_empty); 679180744Sdes nh_transform(hc, hc->data, nh_len); 680180744Sdes hc->bytes_hashed += hc->next_data_empty; 681180744Sdes } else if (hc->bytes_hashed == 0) { 682180744Sdes nh_len = L1_PAD_BOUNDARY; 683180744Sdes zero_pad(hc->data, L1_PAD_BOUNDARY); 684180744Sdes nh_transform(hc, hc->data, nh_len); 685180744Sdes } 686180744Sdes 687180744Sdes nbits = (hc->bytes_hashed << 3); 688180744Sdes ((UINT64 *)result)[0] = ((UINT64 *)hc->state)[0] + nbits; 689180744Sdes#if (UMAC_OUTPUT_LEN >= 8) 690180744Sdes ((UINT64 *)result)[1] = ((UINT64 *)hc->state)[1] + nbits; 691180744Sdes#endif 692180744Sdes#if (UMAC_OUTPUT_LEN >= 12) 693180744Sdes ((UINT64 *)result)[2] = ((UINT64 *)hc->state)[2] + nbits; 694180744Sdes#endif 695180744Sdes#if (UMAC_OUTPUT_LEN == 16) 696180744Sdes ((UINT64 *)result)[3] = ((UINT64 *)hc->state)[3] + nbits; 697180744Sdes#endif 698180744Sdes nh_reset(hc); 699180744Sdes} 700180744Sdes 701180744Sdes/* ---------------------------------------------------------------------- */ 702180744Sdes 703255767Sdesstatic void nh(nh_ctx *hc, const UINT8 *buf, UINT32 padded_len, 704180744Sdes UINT32 unpadded_len, UINT8 *result) 705180744Sdes/* All-in-one nh_update() and nh_final() equivalent. 706180744Sdes * Assumes that padded_len is divisible by L1_PAD_BOUNDARY and result is 707180744Sdes * well aligned 708180744Sdes */ 709180744Sdes{ 710180744Sdes UINT32 nbits; 711180744Sdes 712180744Sdes /* Initialize the hash state */ 713180744Sdes nbits = (unpadded_len << 3); 714180744Sdes 715180744Sdes ((UINT64 *)result)[0] = nbits; 716180744Sdes#if (UMAC_OUTPUT_LEN >= 8) 717180744Sdes ((UINT64 *)result)[1] = nbits; 718180744Sdes#endif 719180744Sdes#if (UMAC_OUTPUT_LEN >= 12) 720180744Sdes ((UINT64 *)result)[2] = nbits; 721180744Sdes#endif 722180744Sdes#if (UMAC_OUTPUT_LEN == 16) 723180744Sdes ((UINT64 *)result)[3] = nbits; 724180744Sdes#endif 725180744Sdes 726180744Sdes nh_aux(hc->nh_key, buf, result, padded_len); 727180744Sdes} 728180744Sdes 729180744Sdes/* ---------------------------------------------------------------------- */ 730180744Sdes/* ---------------------------------------------------------------------- */ 731180744Sdes/* ----- Begin UHASH Section -------------------------------------------- */ 732180744Sdes/* ---------------------------------------------------------------------- */ 733180744Sdes/* ---------------------------------------------------------------------- */ 734180744Sdes 735180744Sdes/* UHASH is a multi-layered algorithm. Data presented to UHASH is first 736180744Sdes * hashed by NH. The NH output is then hashed by a polynomial-hash layer 737180744Sdes * unless the initial data to be hashed is short. After the polynomial- 738180744Sdes * layer, an inner-product hash is used to produce the final UHASH output. 739180744Sdes * 740180744Sdes * UHASH provides two interfaces, one all-at-once and another where data 741180744Sdes * buffers are presented sequentially. In the sequential interface, the 742180744Sdes * UHASH client calls the routine uhash_update() as many times as necessary. 743180744Sdes * When there is no more data to be fed to UHASH, the client calls 744180744Sdes * uhash_final() which 745180744Sdes * calculates the UHASH output. Before beginning another UHASH calculation 746180744Sdes * the uhash_reset() routine must be called. The all-at-once UHASH routine, 747180744Sdes * uhash(), is equivalent to the sequence of calls uhash_update() and 748180744Sdes * uhash_final(); however it is optimized and should be 749180744Sdes * used whenever the sequential interface is not necessary. 750180744Sdes * 751180744Sdes * The routine uhash_init() initializes the uhash_ctx data structure and 752180744Sdes * must be called once, before any other UHASH routine. 753180744Sdes */ 754180744Sdes 755180744Sdes/* ---------------------------------------------------------------------- */ 756180744Sdes/* ----- Constants and uhash_ctx ---------------------------------------- */ 757180744Sdes/* ---------------------------------------------------------------------- */ 758180744Sdes 759180744Sdes/* ---------------------------------------------------------------------- */ 760180744Sdes/* ----- Poly hash and Inner-Product hash Constants --------------------- */ 761180744Sdes/* ---------------------------------------------------------------------- */ 762180744Sdes 763180744Sdes/* Primes and masks */ 764180744Sdes#define p36 ((UINT64)0x0000000FFFFFFFFBull) /* 2^36 - 5 */ 765180744Sdes#define p64 ((UINT64)0xFFFFFFFFFFFFFFC5ull) /* 2^64 - 59 */ 766180744Sdes#define m36 ((UINT64)0x0000000FFFFFFFFFull) /* The low 36 of 64 bits */ 767180744Sdes 768180744Sdes 769180744Sdes/* ---------------------------------------------------------------------- */ 770180744Sdes 771180744Sdestypedef struct uhash_ctx { 772180744Sdes nh_ctx hash; /* Hash context for L1 NH hash */ 773180744Sdes UINT64 poly_key_8[STREAMS]; /* p64 poly keys */ 774180744Sdes UINT64 poly_accum[STREAMS]; /* poly hash result */ 775180744Sdes UINT64 ip_keys[STREAMS*4]; /* Inner-product keys */ 776180744Sdes UINT32 ip_trans[STREAMS]; /* Inner-product translation */ 777180744Sdes UINT32 msg_len; /* Total length of data passed */ 778180744Sdes /* to uhash */ 779180744Sdes} uhash_ctx; 780180744Sdestypedef struct uhash_ctx *uhash_ctx_t; 781180744Sdes 782180744Sdes/* ---------------------------------------------------------------------- */ 783180744Sdes 784180744Sdes 785180744Sdes/* The polynomial hashes use Horner's rule to evaluate a polynomial one 786180744Sdes * word at a time. As described in the specification, poly32 and poly64 787180744Sdes * require keys from special domains. The following implementations exploit 788180744Sdes * the special domains to avoid overflow. The results are not guaranteed to 789180744Sdes * be within Z_p32 and Z_p64, but the Inner-Product hash implementation 790180744Sdes * patches any errant values. 791180744Sdes */ 792180744Sdes 793180744Sdesstatic UINT64 poly64(UINT64 cur, UINT64 key, UINT64 data) 794180744Sdes{ 795180744Sdes UINT32 key_hi = (UINT32)(key >> 32), 796180744Sdes key_lo = (UINT32)key, 797180744Sdes cur_hi = (UINT32)(cur >> 32), 798180744Sdes cur_lo = (UINT32)cur, 799180744Sdes x_lo, 800180744Sdes x_hi; 801180744Sdes UINT64 X,T,res; 802180744Sdes 803180744Sdes X = MUL64(key_hi, cur_lo) + MUL64(cur_hi, key_lo); 804180744Sdes x_lo = (UINT32)X; 805180744Sdes x_hi = (UINT32)(X >> 32); 806180744Sdes 807180744Sdes res = (MUL64(key_hi, cur_hi) + x_hi) * 59 + MUL64(key_lo, cur_lo); 808180744Sdes 809180744Sdes T = ((UINT64)x_lo << 32); 810180744Sdes res += T; 811180744Sdes if (res < T) 812180744Sdes res += 59; 813180744Sdes 814180744Sdes res += data; 815180744Sdes if (res < data) 816180744Sdes res += 59; 817180744Sdes 818180744Sdes return res; 819180744Sdes} 820180744Sdes 821180744Sdes 822180744Sdes/* Although UMAC is specified to use a ramped polynomial hash scheme, this 823180744Sdes * implementation does not handle all ramp levels. Because we don't handle 824180744Sdes * the ramp up to p128 modulus in this implementation, we are limited to 825180744Sdes * 2^14 poly_hash() invocations per stream (for a total capacity of 2^24 826180744Sdes * bytes input to UMAC per tag, ie. 16MB). 827180744Sdes */ 828180744Sdesstatic void poly_hash(uhash_ctx_t hc, UINT32 data_in[]) 829180744Sdes{ 830180744Sdes int i; 831180744Sdes UINT64 *data=(UINT64*)data_in; 832180744Sdes 833180744Sdes for (i = 0; i < STREAMS; i++) { 834180744Sdes if ((UINT32)(data[i] >> 32) == 0xfffffffful) { 835180744Sdes hc->poly_accum[i] = poly64(hc->poly_accum[i], 836180744Sdes hc->poly_key_8[i], p64 - 1); 837180744Sdes hc->poly_accum[i] = poly64(hc->poly_accum[i], 838180744Sdes hc->poly_key_8[i], (data[i] - 59)); 839180744Sdes } else { 840180744Sdes hc->poly_accum[i] = poly64(hc->poly_accum[i], 841180744Sdes hc->poly_key_8[i], data[i]); 842180744Sdes } 843180744Sdes } 844180744Sdes} 845180744Sdes 846180744Sdes 847180744Sdes/* ---------------------------------------------------------------------- */ 848180744Sdes 849180744Sdes 850180744Sdes/* The final step in UHASH is an inner-product hash. The poly hash 851180744Sdes * produces a result not neccesarily WORD_LEN bytes long. The inner- 852180744Sdes * product hash breaks the polyhash output into 16-bit chunks and 853180744Sdes * multiplies each with a 36 bit key. 854180744Sdes */ 855180744Sdes 856180744Sdesstatic UINT64 ip_aux(UINT64 t, UINT64 *ipkp, UINT64 data) 857180744Sdes{ 858180744Sdes t = t + ipkp[0] * (UINT64)(UINT16)(data >> 48); 859180744Sdes t = t + ipkp[1] * (UINT64)(UINT16)(data >> 32); 860180744Sdes t = t + ipkp[2] * (UINT64)(UINT16)(data >> 16); 861180744Sdes t = t + ipkp[3] * (UINT64)(UINT16)(data); 862180744Sdes 863180744Sdes return t; 864180744Sdes} 865180744Sdes 866180744Sdesstatic UINT32 ip_reduce_p36(UINT64 t) 867180744Sdes{ 868180744Sdes/* Divisionless modular reduction */ 869180744Sdes UINT64 ret; 870180744Sdes 871180744Sdes ret = (t & m36) + 5 * (t >> 36); 872180744Sdes if (ret >= p36) 873180744Sdes ret -= p36; 874180744Sdes 875180744Sdes /* return least significant 32 bits */ 876180744Sdes return (UINT32)(ret); 877180744Sdes} 878180744Sdes 879180744Sdes 880180744Sdes/* If the data being hashed by UHASH is no longer than L1_KEY_LEN, then 881180744Sdes * the polyhash stage is skipped and ip_short is applied directly to the 882180744Sdes * NH output. 883180744Sdes */ 884180744Sdesstatic void ip_short(uhash_ctx_t ahc, UINT8 *nh_res, u_char *res) 885180744Sdes{ 886180744Sdes UINT64 t; 887180744Sdes UINT64 *nhp = (UINT64 *)nh_res; 888180744Sdes 889180744Sdes t = ip_aux(0,ahc->ip_keys, nhp[0]); 890180744Sdes STORE_UINT32_BIG((UINT32 *)res+0, ip_reduce_p36(t) ^ ahc->ip_trans[0]); 891180744Sdes#if (UMAC_OUTPUT_LEN >= 8) 892180744Sdes t = ip_aux(0,ahc->ip_keys+4, nhp[1]); 893180744Sdes STORE_UINT32_BIG((UINT32 *)res+1, ip_reduce_p36(t) ^ ahc->ip_trans[1]); 894180744Sdes#endif 895180744Sdes#if (UMAC_OUTPUT_LEN >= 12) 896180744Sdes t = ip_aux(0,ahc->ip_keys+8, nhp[2]); 897180744Sdes STORE_UINT32_BIG((UINT32 *)res+2, ip_reduce_p36(t) ^ ahc->ip_trans[2]); 898180744Sdes#endif 899180744Sdes#if (UMAC_OUTPUT_LEN == 16) 900180744Sdes t = ip_aux(0,ahc->ip_keys+12, nhp[3]); 901180744Sdes STORE_UINT32_BIG((UINT32 *)res+3, ip_reduce_p36(t) ^ ahc->ip_trans[3]); 902180744Sdes#endif 903180744Sdes} 904180744Sdes 905180744Sdes/* If the data being hashed by UHASH is longer than L1_KEY_LEN, then 906180744Sdes * the polyhash stage is not skipped and ip_long is applied to the 907180744Sdes * polyhash output. 908180744Sdes */ 909180744Sdesstatic void ip_long(uhash_ctx_t ahc, u_char *res) 910180744Sdes{ 911180744Sdes int i; 912180744Sdes UINT64 t; 913180744Sdes 914180744Sdes for (i = 0; i < STREAMS; i++) { 915180744Sdes /* fix polyhash output not in Z_p64 */ 916180744Sdes if (ahc->poly_accum[i] >= p64) 917180744Sdes ahc->poly_accum[i] -= p64; 918180744Sdes t = ip_aux(0,ahc->ip_keys+(i*4), ahc->poly_accum[i]); 919180744Sdes STORE_UINT32_BIG((UINT32 *)res+i, 920180744Sdes ip_reduce_p36(t) ^ ahc->ip_trans[i]); 921180744Sdes } 922180744Sdes} 923180744Sdes 924180744Sdes 925180744Sdes/* ---------------------------------------------------------------------- */ 926180744Sdes 927180744Sdes/* ---------------------------------------------------------------------- */ 928180744Sdes 929180744Sdes/* Reset uhash context for next hash session */ 930180744Sdesstatic int uhash_reset(uhash_ctx_t pc) 931180744Sdes{ 932180744Sdes nh_reset(&pc->hash); 933180744Sdes pc->msg_len = 0; 934180744Sdes pc->poly_accum[0] = 1; 935180744Sdes#if (UMAC_OUTPUT_LEN >= 8) 936180744Sdes pc->poly_accum[1] = 1; 937180744Sdes#endif 938180744Sdes#if (UMAC_OUTPUT_LEN >= 12) 939180744Sdes pc->poly_accum[2] = 1; 940180744Sdes#endif 941180744Sdes#if (UMAC_OUTPUT_LEN == 16) 942180744Sdes pc->poly_accum[3] = 1; 943180744Sdes#endif 944180744Sdes return 1; 945180744Sdes} 946180744Sdes 947180744Sdes/* ---------------------------------------------------------------------- */ 948180744Sdes 949180744Sdes/* Given a pointer to the internal key needed by kdf() and a uhash context, 950180744Sdes * initialize the NH context and generate keys needed for poly and inner- 951180744Sdes * product hashing. All keys are endian adjusted in memory so that native 952180744Sdes * loads cause correct keys to be in registers during calculation. 953180744Sdes */ 954180744Sdesstatic void uhash_init(uhash_ctx_t ahc, aes_int_key prf_key) 955180744Sdes{ 956180744Sdes int i; 957180744Sdes UINT8 buf[(8*STREAMS+4)*sizeof(UINT64)]; 958180744Sdes 959180744Sdes /* Zero the entire uhash context */ 960180744Sdes memset(ahc, 0, sizeof(uhash_ctx)); 961180744Sdes 962180744Sdes /* Initialize the L1 hash */ 963180744Sdes nh_init(&ahc->hash, prf_key); 964180744Sdes 965180744Sdes /* Setup L2 hash variables */ 966180744Sdes kdf(buf, prf_key, 2, sizeof(buf)); /* Fill buffer with index 1 key */ 967180744Sdes for (i = 0; i < STREAMS; i++) { 968180744Sdes /* Fill keys from the buffer, skipping bytes in the buffer not 969180744Sdes * used by this implementation. Endian reverse the keys if on a 970180744Sdes * little-endian computer. 971180744Sdes */ 972180744Sdes memcpy(ahc->poly_key_8+i, buf+24*i, 8); 973180744Sdes endian_convert_if_le(ahc->poly_key_8+i, 8, 8); 974180744Sdes /* Mask the 64-bit keys to their special domain */ 975180744Sdes ahc->poly_key_8[i] &= ((UINT64)0x01ffffffu << 32) + 0x01ffffffu; 976180744Sdes ahc->poly_accum[i] = 1; /* Our polyhash prepends a non-zero word */ 977180744Sdes } 978180744Sdes 979180744Sdes /* Setup L3-1 hash variables */ 980180744Sdes kdf(buf, prf_key, 3, sizeof(buf)); /* Fill buffer with index 2 key */ 981180744Sdes for (i = 0; i < STREAMS; i++) 982180744Sdes memcpy(ahc->ip_keys+4*i, buf+(8*i+4)*sizeof(UINT64), 983180744Sdes 4*sizeof(UINT64)); 984180744Sdes endian_convert_if_le(ahc->ip_keys, sizeof(UINT64), 985180744Sdes sizeof(ahc->ip_keys)); 986180744Sdes for (i = 0; i < STREAMS*4; i++) 987180744Sdes ahc->ip_keys[i] %= p36; /* Bring into Z_p36 */ 988180744Sdes 989180744Sdes /* Setup L3-2 hash variables */ 990180744Sdes /* Fill buffer with index 4 key */ 991180744Sdes kdf(ahc->ip_trans, prf_key, 4, STREAMS * sizeof(UINT32)); 992180744Sdes endian_convert_if_le(ahc->ip_trans, sizeof(UINT32), 993180744Sdes STREAMS * sizeof(UINT32)); 994180744Sdes} 995180744Sdes 996180744Sdes/* ---------------------------------------------------------------------- */ 997180744Sdes 998180744Sdes#if 0 999180744Sdesstatic uhash_ctx_t uhash_alloc(u_char key[]) 1000180744Sdes{ 1001180744Sdes/* Allocate memory and force to a 16-byte boundary. */ 1002180744Sdes uhash_ctx_t ctx; 1003180744Sdes u_char bytes_to_add; 1004180744Sdes aes_int_key prf_key; 1005180744Sdes 1006180744Sdes ctx = (uhash_ctx_t)malloc(sizeof(uhash_ctx)+ALLOC_BOUNDARY); 1007180744Sdes if (ctx) { 1008180744Sdes if (ALLOC_BOUNDARY) { 1009180744Sdes bytes_to_add = ALLOC_BOUNDARY - 1010180744Sdes ((ptrdiff_t)ctx & (ALLOC_BOUNDARY -1)); 1011180744Sdes ctx = (uhash_ctx_t)((u_char *)ctx + bytes_to_add); 1012180744Sdes *((u_char *)ctx - 1) = bytes_to_add; 1013180744Sdes } 1014180744Sdes aes_key_setup(key,prf_key); 1015180744Sdes uhash_init(ctx, prf_key); 1016180744Sdes } 1017180744Sdes return (ctx); 1018180744Sdes} 1019180744Sdes#endif 1020180744Sdes 1021180744Sdes/* ---------------------------------------------------------------------- */ 1022180744Sdes 1023180744Sdes#if 0 1024180744Sdesstatic int uhash_free(uhash_ctx_t ctx) 1025180744Sdes{ 1026180744Sdes/* Free memory allocated by uhash_alloc */ 1027180744Sdes u_char bytes_to_sub; 1028180744Sdes 1029180744Sdes if (ctx) { 1030180744Sdes if (ALLOC_BOUNDARY) { 1031180744Sdes bytes_to_sub = *((u_char *)ctx - 1); 1032180744Sdes ctx = (uhash_ctx_t)((u_char *)ctx - bytes_to_sub); 1033180744Sdes } 1034180744Sdes free(ctx); 1035180744Sdes } 1036180744Sdes return (1); 1037180744Sdes} 1038180744Sdes#endif 1039180744Sdes/* ---------------------------------------------------------------------- */ 1040180744Sdes 1041255767Sdesstatic int uhash_update(uhash_ctx_t ctx, const u_char *input, long len) 1042180744Sdes/* Given len bytes of data, we parse it into L1_KEY_LEN chunks and 1043180744Sdes * hash each one with NH, calling the polyhash on each NH output. 1044180744Sdes */ 1045180744Sdes{ 1046180744Sdes UWORD bytes_hashed, bytes_remaining; 1047180750Sdes UINT64 result_buf[STREAMS]; 1048180750Sdes UINT8 *nh_result = (UINT8 *)&result_buf; 1049180744Sdes 1050180744Sdes if (ctx->msg_len + len <= L1_KEY_LEN) { 1051255767Sdes nh_update(&ctx->hash, (const UINT8 *)input, len); 1052180744Sdes ctx->msg_len += len; 1053180744Sdes } else { 1054180744Sdes 1055180744Sdes bytes_hashed = ctx->msg_len % L1_KEY_LEN; 1056180744Sdes if (ctx->msg_len == L1_KEY_LEN) 1057180744Sdes bytes_hashed = L1_KEY_LEN; 1058180744Sdes 1059180744Sdes if (bytes_hashed + len >= L1_KEY_LEN) { 1060180744Sdes 1061180744Sdes /* If some bytes have been passed to the hash function */ 1062180744Sdes /* then we want to pass at most (L1_KEY_LEN - bytes_hashed) */ 1063180744Sdes /* bytes to complete the current nh_block. */ 1064180744Sdes if (bytes_hashed) { 1065180744Sdes bytes_remaining = (L1_KEY_LEN - bytes_hashed); 1066255767Sdes nh_update(&ctx->hash, (const UINT8 *)input, bytes_remaining); 1067180744Sdes nh_final(&ctx->hash, nh_result); 1068180744Sdes ctx->msg_len += bytes_remaining; 1069180744Sdes poly_hash(ctx,(UINT32 *)nh_result); 1070180744Sdes len -= bytes_remaining; 1071180744Sdes input += bytes_remaining; 1072180744Sdes } 1073180744Sdes 1074180744Sdes /* Hash directly from input stream if enough bytes */ 1075180744Sdes while (len >= L1_KEY_LEN) { 1076255767Sdes nh(&ctx->hash, (const UINT8 *)input, L1_KEY_LEN, 1077180744Sdes L1_KEY_LEN, nh_result); 1078180744Sdes ctx->msg_len += L1_KEY_LEN; 1079180744Sdes len -= L1_KEY_LEN; 1080180744Sdes input += L1_KEY_LEN; 1081180744Sdes poly_hash(ctx,(UINT32 *)nh_result); 1082180744Sdes } 1083180744Sdes } 1084180744Sdes 1085180744Sdes /* pass remaining < L1_KEY_LEN bytes of input data to NH */ 1086180744Sdes if (len) { 1087255767Sdes nh_update(&ctx->hash, (const UINT8 *)input, len); 1088180744Sdes ctx->msg_len += len; 1089180744Sdes } 1090180744Sdes } 1091180744Sdes 1092180744Sdes return (1); 1093180744Sdes} 1094180744Sdes 1095180744Sdes/* ---------------------------------------------------------------------- */ 1096180744Sdes 1097180744Sdesstatic int uhash_final(uhash_ctx_t ctx, u_char *res) 1098180744Sdes/* Incorporate any pending data, pad, and generate tag */ 1099180744Sdes{ 1100180750Sdes UINT64 result_buf[STREAMS]; 1101180750Sdes UINT8 *nh_result = (UINT8 *)&result_buf; 1102180744Sdes 1103180744Sdes if (ctx->msg_len > L1_KEY_LEN) { 1104180744Sdes if (ctx->msg_len % L1_KEY_LEN) { 1105180744Sdes nh_final(&ctx->hash, nh_result); 1106180744Sdes poly_hash(ctx,(UINT32 *)nh_result); 1107180744Sdes } 1108180744Sdes ip_long(ctx, res); 1109180744Sdes } else { 1110180744Sdes nh_final(&ctx->hash, nh_result); 1111180744Sdes ip_short(ctx,nh_result, res); 1112180744Sdes } 1113180744Sdes uhash_reset(ctx); 1114180744Sdes return (1); 1115180744Sdes} 1116180744Sdes 1117180744Sdes/* ---------------------------------------------------------------------- */ 1118180744Sdes 1119180744Sdes#if 0 1120180744Sdesstatic int uhash(uhash_ctx_t ahc, u_char *msg, long len, u_char *res) 1121180744Sdes/* assumes that msg is in a writable buffer of length divisible by */ 1122180744Sdes/* L1_PAD_BOUNDARY. Bytes beyond msg[len] may be zeroed. */ 1123180744Sdes{ 1124180744Sdes UINT8 nh_result[STREAMS*sizeof(UINT64)]; 1125180744Sdes UINT32 nh_len; 1126180744Sdes int extra_zeroes_needed; 1127180744Sdes 1128180744Sdes /* If the message to be hashed is no longer than L1_HASH_LEN, we skip 1129180744Sdes * the polyhash. 1130180744Sdes */ 1131180744Sdes if (len <= L1_KEY_LEN) { 1132180744Sdes if (len == 0) /* If zero length messages will not */ 1133180744Sdes nh_len = L1_PAD_BOUNDARY; /* be seen, comment out this case */ 1134180744Sdes else 1135180744Sdes nh_len = ((len + (L1_PAD_BOUNDARY - 1)) & ~(L1_PAD_BOUNDARY - 1)); 1136180744Sdes extra_zeroes_needed = nh_len - len; 1137180744Sdes zero_pad((UINT8 *)msg + len, extra_zeroes_needed); 1138180744Sdes nh(&ahc->hash, (UINT8 *)msg, nh_len, len, nh_result); 1139180744Sdes ip_short(ahc,nh_result, res); 1140180744Sdes } else { 1141180744Sdes /* Otherwise, we hash each L1_KEY_LEN chunk with NH, passing the NH 1142180744Sdes * output to poly_hash(). 1143180744Sdes */ 1144180744Sdes do { 1145180744Sdes nh(&ahc->hash, (UINT8 *)msg, L1_KEY_LEN, L1_KEY_LEN, nh_result); 1146180744Sdes poly_hash(ahc,(UINT32 *)nh_result); 1147180744Sdes len -= L1_KEY_LEN; 1148180744Sdes msg += L1_KEY_LEN; 1149180744Sdes } while (len >= L1_KEY_LEN); 1150180744Sdes if (len) { 1151180744Sdes nh_len = ((len + (L1_PAD_BOUNDARY - 1)) & ~(L1_PAD_BOUNDARY - 1)); 1152180744Sdes extra_zeroes_needed = nh_len - len; 1153180744Sdes zero_pad((UINT8 *)msg + len, extra_zeroes_needed); 1154180744Sdes nh(&ahc->hash, (UINT8 *)msg, nh_len, len, nh_result); 1155180744Sdes poly_hash(ahc,(UINT32 *)nh_result); 1156180744Sdes } 1157180744Sdes 1158180744Sdes ip_long(ahc, res); 1159180744Sdes } 1160180744Sdes 1161180744Sdes uhash_reset(ahc); 1162180744Sdes return 1; 1163180744Sdes} 1164180744Sdes#endif 1165180744Sdes 1166180744Sdes/* ---------------------------------------------------------------------- */ 1167180744Sdes/* ---------------------------------------------------------------------- */ 1168180744Sdes/* ----- Begin UMAC Section --------------------------------------------- */ 1169180744Sdes/* ---------------------------------------------------------------------- */ 1170180744Sdes/* ---------------------------------------------------------------------- */ 1171180744Sdes 1172180744Sdes/* The UMAC interface has two interfaces, an all-at-once interface where 1173180744Sdes * the entire message to be authenticated is passed to UMAC in one buffer, 1174180744Sdes * and a sequential interface where the message is presented a little at a 1175180744Sdes * time. The all-at-once is more optimaized than the sequential version and 1176180744Sdes * should be preferred when the sequential interface is not required. 1177180744Sdes */ 1178180744Sdesstruct umac_ctx { 1179180744Sdes uhash_ctx hash; /* Hash function for message compression */ 1180180744Sdes pdf_ctx pdf; /* PDF for hashed output */ 1181180744Sdes void *free_ptr; /* Address to free this struct via */ 1182180744Sdes} umac_ctx; 1183180744Sdes 1184180744Sdes/* ---------------------------------------------------------------------- */ 1185180744Sdes 1186180744Sdes#if 0 1187180744Sdesint umac_reset(struct umac_ctx *ctx) 1188180744Sdes/* Reset the hash function to begin a new authentication. */ 1189180744Sdes{ 1190180744Sdes uhash_reset(&ctx->hash); 1191180744Sdes return (1); 1192180744Sdes} 1193180744Sdes#endif 1194180744Sdes 1195180744Sdes/* ---------------------------------------------------------------------- */ 1196180744Sdes 1197180744Sdesint umac_delete(struct umac_ctx *ctx) 1198180744Sdes/* Deallocate the ctx structure */ 1199180744Sdes{ 1200180744Sdes if (ctx) { 1201180744Sdes if (ALLOC_BOUNDARY) 1202180744Sdes ctx = (struct umac_ctx *)ctx->free_ptr; 1203255767Sdes free(ctx); 1204180744Sdes } 1205180744Sdes return (1); 1206180744Sdes} 1207180744Sdes 1208180744Sdes/* ---------------------------------------------------------------------- */ 1209180744Sdes 1210255767Sdesstruct umac_ctx *umac_new(const u_char key[]) 1211180744Sdes/* Dynamically allocate a umac_ctx struct, initialize variables, 1212180744Sdes * generate subkeys from key. Align to 16-byte boundary. 1213180744Sdes */ 1214180744Sdes{ 1215180744Sdes struct umac_ctx *ctx, *octx; 1216180744Sdes size_t bytes_to_add; 1217180744Sdes aes_int_key prf_key; 1218180744Sdes 1219257954Sdelphij octx = ctx = xcalloc(1, sizeof(*ctx) + ALLOC_BOUNDARY); 1220180744Sdes if (ctx) { 1221180744Sdes if (ALLOC_BOUNDARY) { 1222180744Sdes bytes_to_add = ALLOC_BOUNDARY - 1223180744Sdes ((ptrdiff_t)ctx & (ALLOC_BOUNDARY - 1)); 1224180744Sdes ctx = (struct umac_ctx *)((u_char *)ctx + bytes_to_add); 1225180744Sdes } 1226180744Sdes ctx->free_ptr = octx; 1227255767Sdes aes_key_setup(key, prf_key); 1228180744Sdes pdf_init(&ctx->pdf, prf_key); 1229180744Sdes uhash_init(&ctx->hash, prf_key); 1230180744Sdes } 1231180744Sdes 1232180744Sdes return (ctx); 1233180744Sdes} 1234180744Sdes 1235180744Sdes/* ---------------------------------------------------------------------- */ 1236180744Sdes 1237255767Sdesint umac_final(struct umac_ctx *ctx, u_char tag[], const u_char nonce[8]) 1238180744Sdes/* Incorporate any pending data, pad, and generate tag */ 1239180744Sdes{ 1240180744Sdes uhash_final(&ctx->hash, (u_char *)tag); 1241255767Sdes pdf_gen_xor(&ctx->pdf, (const UINT8 *)nonce, (UINT8 *)tag); 1242180744Sdes 1243180744Sdes return (1); 1244180744Sdes} 1245180744Sdes 1246180744Sdes/* ---------------------------------------------------------------------- */ 1247180744Sdes 1248255767Sdesint umac_update(struct umac_ctx *ctx, const u_char *input, long len) 1249180744Sdes/* Given len bytes of data, we parse it into L1_KEY_LEN chunks and */ 1250180744Sdes/* hash each one, calling the PDF on the hashed output whenever the hash- */ 1251180744Sdes/* output buffer is full. */ 1252180744Sdes{ 1253180744Sdes uhash_update(&ctx->hash, input, len); 1254180744Sdes return (1); 1255180744Sdes} 1256180744Sdes 1257180744Sdes/* ---------------------------------------------------------------------- */ 1258180744Sdes 1259180744Sdes#if 0 1260180744Sdesint umac(struct umac_ctx *ctx, u_char *input, 1261180744Sdes long len, u_char tag[], 1262180744Sdes u_char nonce[8]) 1263180744Sdes/* All-in-one version simply calls umac_update() and umac_final(). */ 1264180744Sdes{ 1265180744Sdes uhash(&ctx->hash, input, len, (u_char *)tag); 1266180744Sdes pdf_gen_xor(&ctx->pdf, (UINT8 *)nonce, (UINT8 *)tag); 1267180744Sdes 1268180744Sdes return (1); 1269180744Sdes} 1270180744Sdes#endif 1271180744Sdes 1272180744Sdes/* ---------------------------------------------------------------------- */ 1273180744Sdes/* ---------------------------------------------------------------------- */ 1274180744Sdes/* ----- End UMAC Section ----------------------------------------------- */ 1275180744Sdes/* ---------------------------------------------------------------------- */ 1276180744Sdes/* ---------------------------------------------------------------------- */ 1277