1/* 2 BLAKE2 reference source code package - optimized C implementations 3 4 Written in 2012 by Samuel Neves <sneves@dei.uc.pt> 5 6 To the extent possible under law, the author(s) have dedicated all copyright 7 and related and neighboring rights to this software to the public domain 8 worldwide. This software is distributed without any warranty. 9 10 You should have received a copy of the CC0 Public Domain Dedication along with 11 this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>. 12*/ 13 14#include <stdint.h> 15#include <string.h> 16#include <stdio.h> 17 18#include "blake2.h" 19#include "blake2-impl.h" 20 21#include "blake2-config.h" 22 23#if defined(_MSC_VER) 24#include <intrin.h> 25#endif 26 27#if defined(HAVE_SSE2) 28#include <emmintrin.h> 29// MSVC only defines _mm_set_epi64x for x86_64... 30#if defined(_MSC_VER) && !defined(_M_X64) 31static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) 32{ 33 return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 ); 34} 35#endif 36#endif 37 38#if defined(HAVE_SSSE3) 39#include <tmmintrin.h> 40#endif 41#if defined(HAVE_SSE4_1) 42#include <smmintrin.h> 43#endif 44#if defined(HAVE_AVX) 45#include <immintrin.h> 46#endif 47#if defined(HAVE_XOP) && !defined(_MSC_VER) 48#include <x86intrin.h> 49#endif 50 51 52 53#include "blake2b-round.h" 54 55static const uint64_t blake2b_IV[8] = 56{ 57 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 58 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, 59 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, 60 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL 61}; 62 63static const uint8_t blake2b_sigma[12][16] = 64{ 65 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 66 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , 67 { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , 68 { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , 69 { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , 70 { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , 71 { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , 72 { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , 73 { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , 74 { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , 75 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 76 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } 77}; 78 79 80/* Some helper functions, not necessarily useful */ 81static inline int blake2b_set_lastnode( blake2b_state *S ) 82{ 83 S->f[1] = ~0ULL; 84 return 0; 85} 86 87static inline int blake2b_clear_lastnode( blake2b_state *S ) 88{ 89 S->f[1] = 0ULL; 90 return 0; 91} 92 93static inline int blake2b_set_lastblock( blake2b_state *S ) 94{ 95 if( S->last_node ) blake2b_set_lastnode( S ); 96 97 S->f[0] = ~0ULL; 98 return 0; 99} 100 101static inline int blake2b_clear_lastblock( blake2b_state *S ) 102{ 103 if( S->last_node ) blake2b_clear_lastnode( S ); 104 105 S->f[0] = 0ULL; 106 return 0; 107} 108 109 110static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) 111{ 112#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) 113 // ADD/ADC chain 114 __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0]; 115 t += inc; 116 S->t[0] = ( uint64_t )( t >> 0 ); 117 S->t[1] = ( uint64_t )( t >> 64 ); 118#else 119 S->t[0] += inc; 120 S->t[1] += ( S->t[0] < inc ); 121#endif 122 return 0; 123} 124 125 126// Parameter-related functions 127static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) 128{ 129 P->digest_length = digest_length; 130 return 0; 131} 132 133static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) 134{ 135 P->fanout = fanout; 136 return 0; 137} 138 139static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) 140{ 141 P->depth = depth; 142 return 0; 143} 144 145static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) 146{ 147 P->leaf_length = leaf_length; 148 return 0; 149} 150 151static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) 152{ 153 P->node_offset = node_offset; 154 return 0; 155} 156 157static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) 158{ 159 P->node_depth = node_depth; 160 return 0; 161} 162 163static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) 164{ 165 P->inner_length = inner_length; 166 return 0; 167} 168 169static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) 170{ 171 memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); 172 return 0; 173} 174 175static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) 176{ 177 memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); 178 return 0; 179} 180 181static inline int blake2b_init0( blake2b_state *S ) 182{ 183 memset( S, 0, sizeof( blake2b_state ) ); 184 185 for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; 186 187 return 0; 188} 189 190 191 192#define blake2b_init BLAKE2_IMPL_NAME(blake2b_init) 193#define blake2b_init_param BLAKE2_IMPL_NAME(blake2b_init_param) 194#define blake2b_init_key BLAKE2_IMPL_NAME(blake2b_init_key) 195#define blake2b_update BLAKE2_IMPL_NAME(blake2b_update) 196#define blake2b_final BLAKE2_IMPL_NAME(blake2b_final) 197#define blake2b BLAKE2_IMPL_NAME(blake2b) 198 199#if defined(__cplusplus) 200extern "C" { 201#endif 202 int blake2b_init( blake2b_state *S, size_t outlen ); 203 int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); 204 int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 205 int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); 206 int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); 207 int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 208#if defined(__cplusplus) 209} 210#endif 211 212/* init xors IV with input parameter block */ 213int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) 214{ 215 uint8_t *p, *h, *v; 216 //blake2b_init0( S ); 217 v = ( uint8_t * )( blake2b_IV ); 218 h = ( uint8_t * )( S->h ); 219 p = ( uint8_t * )( P ); 220 /* IV XOR ParamBlock */ 221 memset( S, 0, sizeof( blake2b_state ) ); 222 223 for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; 224 225 S->outlen = P->digest_length; 226 return 0; 227} 228 229 230/* Some sort of default parameter block initialization, for sequential blake2b */ 231 232int blake2b_init( blake2b_state *S, size_t outlen ) 233{ 234 if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; 235 236 const blake2b_param P = 237 { 238 ( uint8_t ) outlen, 239 0, 240 1, 241 1, 242 0, 243 0, 244 0, 245 0, 246 {0}, 247 {0}, 248 {0} 249 }; 250 return blake2b_init_param( S, &P ); 251} 252 253int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) 254{ 255 if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; 256 257 if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1; 258 259 const blake2b_param P = 260 { 261 ( uint8_t ) outlen, 262 ( uint8_t ) keylen, 263 1, 264 1, 265 0, 266 0, 267 0, 268 0, 269 {0}, 270 {0}, 271 {0} 272 }; 273 274 if( blake2b_init_param( S, &P ) < 0 ) 275 return 0; 276 277 { 278 uint8_t block[BLAKE2B_BLOCKBYTES]; 279 memset( block, 0, BLAKE2B_BLOCKBYTES ); 280 memcpy( block, key, keylen ); 281 blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); 282 secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ 283 } 284 return 0; 285} 286 287static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) 288{ 289 __m128i row1l, row1h; 290 __m128i row2l, row2h; 291 __m128i row3l, row3h; 292 __m128i row4l, row4h; 293 __m128i b0, b1; 294 __m128i t0, t1; 295#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) 296 const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ); 297 const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ); 298#endif 299#if defined(HAVE_SSE4_1) 300 const __m128i m0 = LOADU( block + 00 ); 301 const __m128i m1 = LOADU( block + 16 ); 302 const __m128i m2 = LOADU( block + 32 ); 303 const __m128i m3 = LOADU( block + 48 ); 304 const __m128i m4 = LOADU( block + 64 ); 305 const __m128i m5 = LOADU( block + 80 ); 306 const __m128i m6 = LOADU( block + 96 ); 307 const __m128i m7 = LOADU( block + 112 ); 308#else 309 const uint64_t m0 = ( ( uint64_t * )block )[ 0]; 310 const uint64_t m1 = ( ( uint64_t * )block )[ 1]; 311 const uint64_t m2 = ( ( uint64_t * )block )[ 2]; 312 const uint64_t m3 = ( ( uint64_t * )block )[ 3]; 313 const uint64_t m4 = ( ( uint64_t * )block )[ 4]; 314 const uint64_t m5 = ( ( uint64_t * )block )[ 5]; 315 const uint64_t m6 = ( ( uint64_t * )block )[ 6]; 316 const uint64_t m7 = ( ( uint64_t * )block )[ 7]; 317 const uint64_t m8 = ( ( uint64_t * )block )[ 8]; 318 const uint64_t m9 = ( ( uint64_t * )block )[ 9]; 319 const uint64_t m10 = ( ( uint64_t * )block )[10]; 320 const uint64_t m11 = ( ( uint64_t * )block )[11]; 321 const uint64_t m12 = ( ( uint64_t * )block )[12]; 322 const uint64_t m13 = ( ( uint64_t * )block )[13]; 323 const uint64_t m14 = ( ( uint64_t * )block )[14]; 324 const uint64_t m15 = ( ( uint64_t * )block )[15]; 325#endif 326 row1l = LOADU( &S->h[0] ); 327 row1h = LOADU( &S->h[2] ); 328 row2l = LOADU( &S->h[4] ); 329 row2h = LOADU( &S->h[6] ); 330 row3l = LOADU( &blake2b_IV[0] ); 331 row3h = LOADU( &blake2b_IV[2] ); 332 row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) ); 333 row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) ); 334 ROUND( 0 ); 335 ROUND( 1 ); 336 ROUND( 2 ); 337 ROUND( 3 ); 338 ROUND( 4 ); 339 ROUND( 5 ); 340 ROUND( 6 ); 341 ROUND( 7 ); 342 ROUND( 8 ); 343 ROUND( 9 ); 344 ROUND( 10 ); 345 ROUND( 11 ); 346 row1l = _mm_xor_si128( row3l, row1l ); 347 row1h = _mm_xor_si128( row3h, row1h ); 348 STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) ); 349 STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) ); 350 row2l = _mm_xor_si128( row4l, row2l ); 351 row2h = _mm_xor_si128( row4h, row2h ); 352 STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); 353 STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); 354 return 0; 355} 356 357 358int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) 359{ 360 while( inlen > 0 ) 361 { 362 uint32_t left = S->buflen; 363 uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left; 364 365 if( inlen > fill ) 366 { 367 memcpy( S->buf + left, in, fill ); // Fill buffer 368 S->buflen += fill; 369 blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); 370 blake2b_compress( S, S->buf ); // Compress 371 memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left 372 S->buflen -= BLAKE2B_BLOCKBYTES; 373 in += fill; 374 inlen -= fill; 375 } 376 else // inlen <= fill 377 { 378 memcpy( S->buf + left, in, inlen ); 379 S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress 380 in += inlen; 381 inlen -= inlen; 382 } 383 } 384 385 return 0; 386} 387 388 389int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) 390{ 391 if(S->outlen != outlen) return -1; 392 393 if( S->buflen > BLAKE2B_BLOCKBYTES ) 394 { 395 blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); 396 blake2b_compress( S, S->buf ); 397 S->buflen -= BLAKE2B_BLOCKBYTES; 398 memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); 399 } 400 401 blake2b_increment_counter( S, S->buflen ); 402 blake2b_set_lastblock( S ); 403 memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ 404 blake2b_compress( S, S->buf ); 405 memcpy( out, &S->h[0], outlen ); 406 return 0; 407} 408 409 410int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 411{ 412 blake2b_state S[1]; 413 414 /* Verify parameters */ 415 if ( NULL == in && inlen > 0 ) return -1; 416 417 if ( NULL == out ) return -1; 418 419 if( NULL == key && keylen > 0 ) return -1; 420 421 if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; 422 423 if( keylen > BLAKE2B_KEYBYTES ) return -1; 424 425 if( keylen ) 426 { 427 if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; 428 } 429 else 430 { 431 if( blake2b_init( S, outlen ) < 0 ) return -1; 432 } 433 434 if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1; 435 return blake2b_final( S, out, outlen ); 436} 437 438#if defined(SUPERCOP) 439int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) 440{ 441 return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); 442} 443#endif 444