Crypto++ 8.6
Free C++ class library of cryptographic schemes
sha.cpp
1// sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
2
3// Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey
4// Walton implemented Intel SHA extensions based on Intel articles and code
5// by Sean Gulley. Jeffrey Walton implemented ARM SHA-1 and SHA-256 based
6// on ARM code and code from Johannes Schneiders, Skip Hovsmith and
7// Barry O'Rourke. Jeffrey Walton and Bill Schmidt implemented Power8
8// SHA-256 and SHA-512. All code is in the public domain.
9
10// In August 2017 JW reworked the internals to align all the
11// implementations. Formerly all hashes were software based, IterHashBase
12// handled endian conversions, and IterHashBase dispatched a single to
13// block SHA{N}::Transform. SHA{N}::Transform then performed the single
14// block hashing. It was repeated for multiple blocks.
15//
16// The rework added SHA{N}::HashMultipleBlocks (class) and
17// SHA{N}_HashMultipleBlocks (free standing). There are also hardware
18// accelerated variations. Callers enter SHA{N}::HashMultipleBlocks (class)
19// and the function calls SHA{N}_HashMultipleBlocks (free standing) or
20// SHA{N}_HashBlock (free standing) as a fallback.
21//
22// An added wrinkle is hardware is little endian, C++ is big endian, and
23// callers use big endian, so SHA{N}_HashMultipleBlock accepts a ByteOrder
24// for the incoming data arrangement. Hardware based SHA{N}_HashMultipleBlock
25// can often perform the endian swap much easier by setting an EPI mask.
26// Endian swap incurs no penalty on Intel SHA, and 4-instruction penalty on
27// ARM SHA. Under C++ the full software based swap penalty is incurred due
28// to use of ReverseBytes().
29//
30// In May 2019 JW added Cryptogams ARMv7 and NEON implementations for SHA1,
31// SHA256 and SHA512. The Cryptogams code closed a performance gap on modern
32// 32-bit ARM devices. Cryptogams is Andy Polyakov's project used to create
33// high speed crypto algorithms and share them with other developers. Andy's
34// code runs 30% to 50% faster than C/C++ code. The Cryptogams code can be
35// disabled in config_asm.h. An example of integrating Andy's code is at
36// https://wiki.openssl.org/index.php/Cryptogams_SHA.
37
38// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code
39
40#include "pch.h"
41#include "config.h"
42
43#if CRYPTOPP_MSC_VERSION
44# pragma warning(disable: 4100 4731)
45#endif
46
47#ifndef CRYPTOPP_IMPORTS
48#ifndef CRYPTOPP_GENERATE_X64_MASM
49
50#include "secblock.h"
51#include "sha.h"
52#include "misc.h"
53#include "cpu.h"
54
55#if defined(CRYPTOPP_DISABLE_SHA_ASM)
56# undef CRYPTOPP_X86_ASM_AVAILABLE
57# undef CRYPTOPP_X32_ASM_AVAILABLE
58# undef CRYPTOPP_X64_ASM_AVAILABLE
59# undef CRYPTOPP_SSE2_ASM_AVAILABLE
60#endif
61
62NAMESPACE_BEGIN(CryptoPP)
63
64#if CRYPTOPP_SHANI_AVAILABLE
65extern void SHA1_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
66extern void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
67#endif
68
69#if CRYPTOGAMS_ARM_SHA1
70extern "C" void cryptogams_sha1_block_data_order(word32* state, const word32 *data, size_t blocks);
71extern "C" void cryptogams_sha1_block_data_order_neon(word32* state, const word32 *data, size_t blocks);
72#endif
73
74#if CRYPTOPP_ARM_SHA1_AVAILABLE
75extern void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
76#endif
77
78#if CRYPTOPP_ARM_SHA2_AVAILABLE
79extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
80#endif
81
82#if CRYPTOGAMS_ARM_SHA256
83extern "C" void cryptogams_sha256_block_data_order(word32* state, const word32 *data, size_t blocks);
84extern "C" void cryptogams_sha256_block_data_order_neon(word32* state, const word32 *data, size_t blocks);
85#endif
86
87#if CRYPTOPP_ARM_SHA512_AVAILABLE
88extern void SHA512_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
89#endif
90
91#if CRYPTOPP_POWER8_SHA_AVAILABLE
92extern void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t length, ByteOrder order);
93extern void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t length, ByteOrder order);
94#endif
95
96#if CRYPTOGAMS_ARM_SHA512
97extern "C" void cryptogams_sha512_block_data_order(word64* state, const word64 *data, size_t blocks);
98extern "C" void cryptogams_sha512_block_data_order_neon(word64* state, const word64 *data, size_t blocks);
99#endif
100
101// We add extern to export table to sha_simd.cpp, but it
102// cleared http://github.com/weidai11/cryptopp/issues/502
103extern const word32 SHA256_K[64];
104extern const word64 SHA512_K[80];
105
106CRYPTOPP_ALIGN_DATA(16)
107const word64 SHA512_K[80] = {
108 W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
109 W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
110 W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
111 W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
112 W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
113 W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
114 W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
115 W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
116 W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
117 W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
118 W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
119 W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
120 W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
121 W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
122 W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
123 W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
124 W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
125 W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
126 W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
127 W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
128 W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
129 W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
130 W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
131 W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
132 W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
133 W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
134 W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
135 W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
136 W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
137 W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
138 W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
139 W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
140 W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
141 W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
142 W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
143 W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
144 W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
145 W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
146 W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
147 W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
148};
149
150CRYPTOPP_ALIGN_DATA(16)
151const word32 SHA256_K[64] = {
152
153 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
154 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
155 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
156 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
157 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
158 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
159 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
160 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
161 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
162 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
163 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
164 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
165 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
166 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
167 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
168 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
169};
170
171////////////////////////////////
172// start of Steve Reid's code //
173////////////////////////////////
174
175ANONYMOUS_NAMESPACE_BEGIN
176
177#define blk0(i) (W[i] = data[i])
178#define blk1(i) (W[i&15] = rotlConstant<1>(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15]))
179
180#define f1(x,y,z) (z^(x&(y^z)))
181#define f2(x,y,z) (x^y^z)
182#define f3(x,y,z) ((x&y)|(z&(x|y)))
183#define f4(x,y,z) (x^y^z)
184
185/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
186#define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlConstant<5>(v);w=rotlConstant<30>(w);
187#define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlConstant<5>(v);w=rotlConstant<30>(w);
188#define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlConstant<5>(v);w=rotlConstant<30>(w);
189#define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlConstant<5>(v);w=rotlConstant<30>(w);
190#define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlConstant<5>(v);w=rotlConstant<30>(w);
191
192void SHA1_HashBlock_CXX(word32 *state, const word32 *data)
193{
194 CRYPTOPP_ASSERT(state);
195 CRYPTOPP_ASSERT(data);
196
197 word32 W[16];
198 /* Copy context->state[] to working vars */
199 word32 a = state[0];
200 word32 b = state[1];
201 word32 c = state[2];
202 word32 d = state[3];
203 word32 e = state[4];
204 /* 4 rounds of 20 operations each. Loop unrolled. */
205 R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
206 R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
207 R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
208 R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
209 R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
210 R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
211 R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
212 R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
213 R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
214 R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
215 R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
216 R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
217 R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
218 R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
219 R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
220 R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
221 R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
222 R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
223 R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
224 R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
225 /* Add the working vars back into context.state[] */
226 state[0] += a;
227 state[1] += b;
228 state[2] += c;
229 state[3] += d;
230 state[4] += e;
231}
232
233#undef blk0
234#undef blk1
235#undef f1
236#undef f2
237#undef f3
238#undef f4
239#undef R1
240#undef R2
241#undef R3
242#undef R4
243
244ANONYMOUS_NAMESPACE_END
245
246//////////////////////////////
247// end of Steve Reid's code //
248//////////////////////////////
249
250std::string SHA1::AlgorithmProvider() const
251{
252#if CRYPTOPP_SHANI_AVAILABLE
253 if (HasSHA())
254 return "SHANI";
255#endif
256#if CRYPTOPP_SSE2_ASM_AVAILABLE
257 if (HasSSE2())
258 return "SSE2";
259#endif
260#if CRYPTOGAMS_ARM_SHA1
261 if (HasNEON())
262 return "NEON";
263 if (HasARMv7())
264 return "ARMv7";
265#endif
266#if CRYPTOPP_ARM_SHA1_AVAILABLE
267 if (HasSHA1())
268 return "ARMv8";
269#endif
270 return "C++";
271}
272
273void SHA1::InitState(HashWordType *state)
274{
275 state[0] = 0x67452301;
276 state[1] = 0xEFCDAB89;
277 state[2] = 0x98BADCFE;
278 state[3] = 0x10325476;
279 state[4] = 0xC3D2E1F0;
280}
281
282void SHA1::Transform(word32 *state, const word32 *data)
283{
284 CRYPTOPP_ASSERT(state);
285 CRYPTOPP_ASSERT(data);
286
287#if CRYPTOPP_SHANI_AVAILABLE
288 if (HasSHA())
289 {
290 SHA1_HashMultipleBlocks_SHANI(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
291 return;
292 }
293#endif
294#if CRYPTOGAMS_ARM_SHA1 && 0
295 if (HasNEON())
296 {
297# if defined(CRYPTOPP_LITTLE_ENDIAN)
298 word32 dataBuf[16];
299 ByteReverse(dataBuf, data, SHA1::BLOCKSIZE);
300 cryptogams_sha1_block_data_order_neon(state, data, 1);
301# else
302 cryptogams_sha1_block_data_order_neon(state, data, 1);
303# endif
304 return;
305 }
306 if (HasARMv7())
307 {
308# if defined(CRYPTOPP_LITTLE_ENDIAN)
309 word32 dataBuf[16];
310 ByteReverse(dataBuf, data, SHA1::BLOCKSIZE);
311 cryptogams_sha1_block_data_order(state, data, 1);
312# else
313 cryptogams_sha1_block_data_order(state, data, 1);
314# endif
315 return;
316 }
317#endif
318#if CRYPTOPP_ARM_SHA1_AVAILABLE
319 if (HasSHA1())
320 {
321 SHA1_HashMultipleBlocks_ARMV8(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
322 return;
323 }
324#endif
325
326 SHA1_HashBlock_CXX(state, data);
327}
328
329size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
330{
331 CRYPTOPP_ASSERT(input);
332 CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE);
333
334#if CRYPTOPP_SHANI_AVAILABLE
335 if (HasSHA())
336 {
337 SHA1_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
338 return length & (SHA1::BLOCKSIZE - 1);
339 }
340#endif
341#if CRYPTOGAMS_ARM_SHA1
342 if (HasNEON())
343 {
344 cryptogams_sha1_block_data_order_neon(m_state, input, length / SHA1::BLOCKSIZE);
345 return length & (SHA1::BLOCKSIZE - 1);
346 }
347 if (HasARMv7())
348 {
349 cryptogams_sha1_block_data_order(m_state, input, length / SHA1::BLOCKSIZE);
350 return length & (SHA1::BLOCKSIZE - 1);
351 }
352#endif
353#if CRYPTOPP_ARM_SHA1_AVAILABLE
354 if (HasSHA1())
355 {
356 SHA1_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
357 return length & (SHA1::BLOCKSIZE - 1);
358 }
359#endif
360
361 const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
362 word32 *dataBuf = this->DataBuf();
363 do
364 {
365 if (noReverse)
366 {
367 SHA1_HashBlock_CXX(m_state, input);
368 }
369 else
370 {
371 ByteReverse(dataBuf, input, SHA1::BLOCKSIZE);
372 SHA1_HashBlock_CXX(m_state, dataBuf);
373 }
374
375 input += SHA1::BLOCKSIZE/sizeof(word32);
376 length -= SHA1::BLOCKSIZE;
377 }
378 while (length >= SHA1::BLOCKSIZE);
379 return length;
380}
381
382// *************************************************************
383
384ANONYMOUS_NAMESPACE_BEGIN
385
386#define a(i) T[(0-i)&7]
387#define b(i) T[(1-i)&7]
388#define c(i) T[(2-i)&7]
389#define d(i) T[(3-i)&7]
390#define e(i) T[(4-i)&7]
391#define f(i) T[(5-i)&7]
392#define g(i) T[(6-i)&7]
393#define h(i) T[(7-i)&7]
394
395#define blk0(i) (W[i] = data[i])
396#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
397
398#define Ch(x,y,z) (z^(x&(y^z)))
399#define Maj(x,y,z) (y^((x^y)&(y^z)))
400
401#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
402 d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
403
404// for SHA256
405#define s0(x) (rotrConstant<7>(x)^rotrConstant<18>(x)^(x>>3))
406#define s1(x) (rotrConstant<17>(x)^rotrConstant<19>(x)^(x>>10))
407#define S0(x) (rotrConstant<2>(x)^rotrConstant<13>(x)^rotrConstant<22>(x))
408#define S1(x) (rotrConstant<6>(x)^rotrConstant<11>(x)^rotrConstant<25>(x))
409
410void SHA256_HashBlock_CXX(word32 *state, const word32 *data)
411{
412 word32 W[16]={0}, T[8];
413 /* Copy context->state[] to working vars */
414 memcpy(T, state, sizeof(T));
415 /* 64 operations, partially loop unrolled */
416 for (unsigned int j=0; j<64; j+=16)
417 {
418 R( 0); R( 1); R( 2); R( 3);
419 R( 4); R( 5); R( 6); R( 7);
420 R( 8); R( 9); R(10); R(11);
421 R(12); R(13); R(14); R(15);
422 }
423 /* Add the working vars back into context.state[] */
424 state[0] += a(0);
425 state[1] += b(0);
426 state[2] += c(0);
427 state[3] += d(0);
428 state[4] += e(0);
429 state[5] += f(0);
430 state[6] += g(0);
431 state[7] += h(0);
432}
433
434#undef Ch
435#undef Maj
436#undef s0
437#undef s1
438#undef S0
439#undef S1
440#undef blk0
441#undef blk1
442#undef blk2
443#undef R
444
445#undef a
446#undef b
447#undef c
448#undef d
449#undef e
450#undef f
451#undef g
452#undef h
453
454ANONYMOUS_NAMESPACE_END
455
456std::string SHA256_AlgorithmProvider()
457{
458#if CRYPTOPP_SHANI_AVAILABLE
459 if (HasSHA())
460 return "SHANI";
461#endif
462#if CRYPTOPP_SSE2_ASM_AVAILABLE
463 if (HasSSE2())
464 return "SSE2";
465#endif
466#if CRYPTOGAMS_ARM_SHA256
467 if (HasNEON())
468 return "NEON";
469 if (HasARMv7())
470 return "ARMv7";
471#endif
472#if CRYPTOPP_ARM_SHA2_AVAILABLE
473 if (HasSHA2())
474 return "ARMv8";
475#endif
476#if (CRYPTOPP_POWER8_SHA_AVAILABLE)
477 if (HasSHA256())
478 return "Power8";
479#endif
480 return "C++";
481}
482
483std::string SHA224::AlgorithmProvider() const
484{
485 return SHA256_AlgorithmProvider();
486}
487
488void SHA224::InitState(HashWordType *state)
489{
490 static const word32 s[8] = {
491 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
492 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
493 memcpy(state, s, sizeof(s));
494}
495
496void SHA256::InitState(HashWordType *state)
497{
498 static const word32 s[8] = {
499 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
500 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
501 memcpy(state, s, sizeof(s));
502}
503#endif // Not CRYPTOPP_GENERATE_X64_MASM
504
505#if defined(CRYPTOPP_X86_ASM_AVAILABLE)
506
507ANONYMOUS_NAMESPACE_BEGIN
508
509void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len)
510{
511 #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
512 #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
513 #define G(i) H(i+1)
514 #define F(i) H(i+2)
515 #define E(i) H(i+3)
516 #define D(i) H(i+4)
517 #define C(i) H(i+5)
518 #define B(i) H(i+6)
519 #define A(i) H(i+7)
520 #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
521 #define Wt_2(i) Wt((i)-2)
522 #define Wt_15(i) Wt((i)-15)
523 #define Wt_7(i) Wt((i)-7)
524 #define K_END [BASE+8*4+16*4+0*WORD_SZ]
525 #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
526 #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
527 #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
528 #define Kt(i) WORD_REG(si)+(i)*4
529#if CRYPTOPP_BOOL_X86
530 #define BASE esp+4
531#elif defined(__GNUC__)
532 #define BASE r8
533#else
534 #define BASE rsp
535#endif
536
537#define RA0(i, edx, edi) \
538 AS2( add edx, [Kt(i)] )\
539 AS2( add edx, [Wt(i)] )\
540 AS2( add edx, H(i) )\
541
542#define RA1(i, edx, edi)
543
544#define RB0(i, edx, edi)
545
546#define RB1(i, edx, edi) \
547 AS2( mov AS_REG_7d, [Wt_2(i)] )\
548 AS2( mov edi, [Wt_15(i)])\
549 AS2( mov ebx, AS_REG_7d )\
550 AS2( shr AS_REG_7d, 10 )\
551 AS2( ror ebx, 17 )\
552 AS2( xor AS_REG_7d, ebx )\
553 AS2( ror ebx, 2 )\
554 AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
555 AS2( add ebx, [Wt_7(i)])\
556 AS2( mov AS_REG_7d, edi )\
557 AS2( shr AS_REG_7d, 3 )\
558 AS2( ror edi, 7 )\
559 AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
560 AS2( xor AS_REG_7d, edi )\
561 AS2( add edx, [Kt(i)])\
562 AS2( ror edi, 11 )\
563 AS2( add edx, H(i) )\
564 AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
565 AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
566 AS2( mov [Wt(i)], AS_REG_7d)\
567 AS2( add edx, AS_REG_7d )\
568
569#define ROUND(i, r, eax, ecx, edi, edx)\
570 /* in: edi = E */\
571 /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
572 AS2( mov edx, F(i) )\
573 AS2( xor edx, G(i) )\
574 AS2( and edx, edi )\
575 AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
576 AS2( mov AS_REG_7d, edi )\
577 AS2( ror edi, 6 )\
578 AS2( ror AS_REG_7d, 25 )\
579 RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
580 AS2( xor AS_REG_7d, edi )\
581 AS2( ror edi, 5 )\
582 AS2( xor AS_REG_7d, edi )/* S1(E) */\
583 AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
584 RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
585 /* in: ecx = A, eax = B^C, edx = T1 */\
586 /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
587 AS2( mov ebx, ecx )\
588 AS2( xor ecx, B(i) )/* A^B */\
589 AS2( and eax, ecx )\
590 AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
591 AS2( mov AS_REG_7d, ebx )\
592 AS2( ror ebx, 2 )\
593 AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
594 AS2( add edx, D(i) )\
595 AS2( mov D(i), edx )\
596 AS2( ror AS_REG_7d, 22 )\
597 AS2( xor AS_REG_7d, ebx )\
598 AS2( ror ebx, 11 )\
599 AS2( xor AS_REG_7d, ebx )\
600 AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
601 AS2( mov H(i), eax )\
602
603// Unroll the use of CRYPTOPP_BOOL_X64 in assembler math. The GAS assembler on X32 (version 2.25)
604// complains "Error: invalid operands (*ABS* and *UND* sections) for `*` and `-`"
605#if CRYPTOPP_BOOL_X64
606#define SWAP_COPY(i) \
607 AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
608 AS1( bswap WORD_REG(bx))\
609 AS2( mov [Wt(i*2+1)], WORD_REG(bx))
610#else // X86 and X32
611#define SWAP_COPY(i) \
612 AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
613 AS1( bswap WORD_REG(bx))\
614 AS2( mov [Wt(i)], WORD_REG(bx))
615#endif
616
617#if defined(__GNUC__)
618 #if CRYPTOPP_BOOL_X64
620 #endif
621 __asm__ __volatile__
622 (
624 "lea %4, %%r8;"
625 #endif
626 INTEL_NOPREFIX
627#elif defined(CRYPTOPP_GENERATE_X64_MASM)
628 ALIGN 8
629 SHA256_HashMultipleBlocks_SSE2 PROC FRAME
630 rex_push_reg rsi
631 push_reg rdi
632 push_reg rbx
633 push_reg rbp
634 alloc_stack(LOCALS_SIZE+8)
635 .endprolog
636 mov rdi, r8
637 lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
638#endif
639
641 #ifndef __GNUC__
642 AS2( mov edi, [len])
643 AS2( lea WORD_REG(si), [SHA256_K+48*4])
644 #endif
645 #if !defined(_MSC_VER) || (_MSC_VER < 1400)
646 AS_PUSH_IF86(bx)
647 #endif
648
649 AS_PUSH_IF86(bp)
650 AS2( mov ebx, esp)
651 AS2( and esp, -16)
652 AS2( sub WORD_REG(sp), LOCALS_SIZE)
653 AS_PUSH_IF86(bx)
654#endif
655 AS2( mov STATE_SAVE, WORD_REG(cx))
656 AS2( mov DATA_SAVE, WORD_REG(dx))
657 AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
658 AS2( mov DATA_END, WORD_REG(ax))
659 AS2( mov K_END, WORD_REG(si))
660
661#if CRYPTOPP_SSE2_ASM_AVAILABLE
663 AS2( test edi, 1)
664 ASJ( jnz, 2, f)
665 AS1( dec DWORD PTR K_END)
666#endif
667 AS2( movdqu xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
668 AS2( movdqu xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
669#endif
670
672#if CRYPTOPP_SSE2_ASM_AVAILABLE
673 ASJ( jmp, 0, f)
674#endif
675 ASL(2) // non-SSE2
676 AS2( mov esi, ecx)
677 AS2( lea edi, A(0))
678 AS2( mov ecx, 8)
679ATT_NOPREFIX
680 AS1( rep movsd)
681INTEL_NOPREFIX
682 AS2( mov esi, K_END)
683 ASJ( jmp, 3, f)
684#endif
685
686#if CRYPTOPP_SSE2_ASM_AVAILABLE
687 ASL(0)
688 AS2( movdqu E(0), xmm1)
689 AS2( movdqu A(0), xmm0)
690#endif
692 ASL(3)
693#endif
694 AS2( sub WORD_REG(si), 48*4)
695 SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
696 SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
698 SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
699 SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
700#endif
701 AS2( mov edi, E(0)) // E
702 AS2( mov eax, B(0)) // B
703 AS2( xor eax, C(0)) // B^C
704 AS2( mov ecx, A(0)) // A
705
706 ROUND(0, 0, eax, ecx, edi, edx)
707 ROUND(1, 0, ecx, eax, edx, edi)
708 ROUND(2, 0, eax, ecx, edi, edx)
709 ROUND(3, 0, ecx, eax, edx, edi)
710 ROUND(4, 0, eax, ecx, edi, edx)
711 ROUND(5, 0, ecx, eax, edx, edi)
712 ROUND(6, 0, eax, ecx, edi, edx)
713 ROUND(7, 0, ecx, eax, edx, edi)
714 ROUND(8, 0, eax, ecx, edi, edx)
715 ROUND(9, 0, ecx, eax, edx, edi)
716 ROUND(10, 0, eax, ecx, edi, edx)
717 ROUND(11, 0, ecx, eax, edx, edi)
718 ROUND(12, 0, eax, ecx, edi, edx)
719 ROUND(13, 0, ecx, eax, edx, edi)
720 ROUND(14, 0, eax, ecx, edi, edx)
721 ROUND(15, 0, ecx, eax, edx, edi)
722
723 ASL(1)
724 AS2(add WORD_REG(si), 4*16)
725 ROUND(0, 1, eax, ecx, edi, edx)
726 ROUND(1, 1, ecx, eax, edx, edi)
727 ROUND(2, 1, eax, ecx, edi, edx)
728 ROUND(3, 1, ecx, eax, edx, edi)
729 ROUND(4, 1, eax, ecx, edi, edx)
730 ROUND(5, 1, ecx, eax, edx, edi)
731 ROUND(6, 1, eax, ecx, edi, edx)
732 ROUND(7, 1, ecx, eax, edx, edi)
733 ROUND(8, 1, eax, ecx, edi, edx)
734 ROUND(9, 1, ecx, eax, edx, edi)
735 ROUND(10, 1, eax, ecx, edi, edx)
736 ROUND(11, 1, ecx, eax, edx, edi)
737 ROUND(12, 1, eax, ecx, edi, edx)
738 ROUND(13, 1, ecx, eax, edx, edi)
739 ROUND(14, 1, eax, ecx, edi, edx)
740 ROUND(15, 1, ecx, eax, edx, edi)
741 AS2( cmp WORD_REG(si), K_END)
742 ATT_NOPREFIX
743 ASJ( jb, 1, b)
744 INTEL_NOPREFIX
745
746 AS2( mov WORD_REG(dx), DATA_SAVE)
747 AS2( add WORD_REG(dx), 64)
748 AS2( mov AS_REG_7, STATE_SAVE)
749 AS2( mov DATA_SAVE, WORD_REG(dx))
750
751#if CRYPTOPP_SSE2_ASM_AVAILABLE
753 AS2( test DWORD PTR K_END, 1)
754 ASJ( jz, 4, f)
755#endif
756 AS2( movdqu xmm1, XMMWORD_PTR [AS_REG_7+1*16])
757 AS2( movdqu xmm0, XMMWORD_PTR [AS_REG_7+0*16])
758 AS2( paddd xmm1, E(0))
759 AS2( paddd xmm0, A(0))
760 AS2( movdqu [AS_REG_7+1*16], xmm1)
761 AS2( movdqu [AS_REG_7+0*16], xmm0)
762 AS2( cmp WORD_REG(dx), DATA_END)
763 ATT_NOPREFIX
764 ASJ( jb, 0, b)
765 INTEL_NOPREFIX
766#endif
767
769#if CRYPTOPP_SSE2_ASM_AVAILABLE
770 ASJ( jmp, 5, f)
771 ASL(4) // non-SSE2
772#endif
773 AS2( add [AS_REG_7+0*4], ecx) // A
774 AS2( add [AS_REG_7+4*4], edi) // E
775 AS2( mov eax, B(0))
776 AS2( mov ebx, C(0))
777 AS2( mov ecx, D(0))
778 AS2( add [AS_REG_7+1*4], eax)
779 AS2( add [AS_REG_7+2*4], ebx)
780 AS2( add [AS_REG_7+3*4], ecx)
781 AS2( mov eax, F(0))
782 AS2( mov ebx, G(0))
783 AS2( mov ecx, H(0))
784 AS2( add [AS_REG_7+5*4], eax)
785 AS2( add [AS_REG_7+6*4], ebx)
786 AS2( add [AS_REG_7+7*4], ecx)
787 AS2( mov ecx, AS_REG_7d)
788 AS2( cmp WORD_REG(dx), DATA_END)
789 ASJ( jb, 2, b)
790#if CRYPTOPP_SSE2_ASM_AVAILABLE
791 ASL(5)
792#endif
793#endif
794
795 AS_POP_IF86(sp)
796 AS_POP_IF86(bp)
797 #if !defined(_MSC_VER) || (_MSC_VER < 1400)
798 AS_POP_IF86(bx)
799 #endif
800
801#ifdef CRYPTOPP_GENERATE_X64_MASM
802 add rsp, LOCALS_SIZE+8
803 pop rbp
804 pop rbx
805 pop rdi
806 pop rsi
807 ret
808 SHA256_HashMultipleBlocks_SSE2 ENDP
809#endif
810
811#ifdef __GNUC__
812 ATT_PREFIX
813 :
814 : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
816 , "m" (workspace[0])
817 #endif
818 : "memory", "cc", "%eax"
820 , "%rbx", "%r8", "%r10"
821 #endif
822 );
823#endif
824}
825
826ANONYMOUS_NAMESPACE_END
827
828#endif // CRYPTOPP_X86_ASM_AVAILABLE
829
830#ifndef CRYPTOPP_GENERATE_X64_MASM
831
832#ifdef CRYPTOPP_X64_MASM_AVAILABLE
833extern "C" {
834void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len);
835}
836#endif
837
838std::string SHA256::AlgorithmProvider() const
839{
840 return SHA256_AlgorithmProvider();
841}
842
843void SHA256::Transform(word32 *state, const word32 *data)
844{
845 CRYPTOPP_ASSERT(state);
846 CRYPTOPP_ASSERT(data);
847
848#if CRYPTOPP_SHANI_AVAILABLE
849 if (HasSHA())
850 {
851 SHA256_HashMultipleBlocks_SHANI(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
852 return;
853 }
854#endif
855#if CRYPTOGAMS_ARM_SHA256 && 0
856 if (HasNEON())
857 {
858# if defined(CRYPTOPP_LITTLE_ENDIAN)
859 word32 dataBuf[16];
860 ByteReverse(dataBuf, data, SHA256::BLOCKSIZE);
861 cryptogams_sha256_block_data_order_neon(state, data, 1);
862# else
863 cryptogams_sha256_block_data_order_neon(state, data, 1);
864# endif
865 return;
866 }
867 if (HasARMv7())
868 {
869# if defined(CRYPTOPP_LITTLE_ENDIAN)
870 word32 dataBuf[16];
871 ByteReverse(dataBuf, data, SHA256::BLOCKSIZE);
872 cryptogams_sha256_block_data_order(state, data, 1);
873# else
874 cryptogams_sha256_block_data_order(state, data, 1);
875# endif
876 return;
877 }
878#endif
879#if CRYPTOPP_ARM_SHA2_AVAILABLE
880 if (HasSHA2())
881 {
882 SHA256_HashMultipleBlocks_ARMV8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
883 return;
884 }
885#endif
886#if CRYPTOPP_POWER8_SHA_AVAILABLE
887 if (HasSHA256())
888 {
889 SHA256_HashMultipleBlocks_POWER8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
890 return;
891 }
892#endif
893
894 SHA256_HashBlock_CXX(state, data);
895}
896
897size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
898{
899 CRYPTOPP_ASSERT(input);
900 CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
901
902#if CRYPTOPP_SHANI_AVAILABLE
903 if (HasSHA())
904 {
905 SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
906 return length & (SHA256::BLOCKSIZE - 1);
907 }
908#endif
909#if CRYPTOPP_SSE2_ASM_AVAILABLE || CRYPTOPP_X64_MASM_AVAILABLE
910 if (HasSSE2())
911 {
912 const size_t res = length & (SHA256::BLOCKSIZE - 1);
913 SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res);
914 return res;
915 }
916#endif
917#if CRYPTOGAMS_ARM_SHA256
918 if (HasNEON())
919 {
920 cryptogams_sha256_block_data_order_neon(m_state, input, length / SHA256::BLOCKSIZE);
921 return length & (SHA256::BLOCKSIZE - 1);
922 }
923 if (HasARMv7())
924 {
925 cryptogams_sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE);
926 return length & (SHA256::BLOCKSIZE - 1);
927 }
928#endif
929#if CRYPTOPP_ARM_SHA2_AVAILABLE
930 if (HasSHA2())
931 {
932 SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
933 return length & (SHA256::BLOCKSIZE - 1);
934 }
935#endif
936#if CRYPTOPP_POWER8_SHA_AVAILABLE
937 if (HasSHA256())
938 {
939 SHA256_HashMultipleBlocks_POWER8(m_state, input, length, BIG_ENDIAN_ORDER);
940 return length & (SHA256::BLOCKSIZE - 1);
941 }
942#endif
943
944 const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
945 word32 *dataBuf = this->DataBuf();
946 do
947 {
948 if (noReverse)
949 {
950 SHA256_HashBlock_CXX(m_state, input);
951 }
952 else
953 {
954 ByteReverse(dataBuf, input, SHA256::BLOCKSIZE);
955 SHA256_HashBlock_CXX(m_state, dataBuf);
956 }
957
958 input += SHA256::BLOCKSIZE/sizeof(word32);
959 length -= SHA256::BLOCKSIZE;
960 }
961 while (length >= SHA256::BLOCKSIZE);
962 return length;
963}
964
965size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
966{
967 CRYPTOPP_ASSERT(input);
968 CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
969
970#if CRYPTOPP_SHANI_AVAILABLE
971 if (HasSHA())
972 {
973 SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
974 return length & (SHA256::BLOCKSIZE - 1);
975 }
976#endif
977#if CRYPTOPP_SSE2_ASM_AVAILABLE || CRYPTOPP_X64_MASM_AVAILABLE
978 if (HasSSE2())
979 {
980 const size_t res = length & (SHA256::BLOCKSIZE - 1);
981 SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res);
982 return res;
983 }
984#endif
985#if CRYPTOGAMS_ARM_SHA256
986 if (HasNEON())
987 {
988 cryptogams_sha256_block_data_order_neon(m_state, input, length / SHA256::BLOCKSIZE);
989 return length & (SHA256::BLOCKSIZE - 1);
990 }
991 if (HasARMv7())
992 {
993 cryptogams_sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE);
994 return length & (SHA256::BLOCKSIZE - 1);
995 }
996#endif
997#if CRYPTOPP_ARM_SHA2_AVAILABLE
998 if (HasSHA2())
999 {
1000 SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
1001 return length & (SHA256::BLOCKSIZE - 1);
1002 }
1003#endif
1004#if CRYPTOPP_POWER8_SHA_AVAILABLE
1005 if (HasSHA256())
1006 {
1007 SHA256_HashMultipleBlocks_POWER8(m_state, input, length, BIG_ENDIAN_ORDER);
1008 return length & (SHA256::BLOCKSIZE - 1);
1009 }
1010#endif
1011
1012 const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
1013 word32 *dataBuf = this->DataBuf();
1014 do
1015 {
1016 if (noReverse)
1017 {
1018 SHA256_HashBlock_CXX(m_state, input);
1019 }
1020 else
1021 {
1022 ByteReverse(dataBuf, input, SHA256::BLOCKSIZE);
1023 SHA256_HashBlock_CXX(m_state, dataBuf);
1024 }
1025
1026 input += SHA256::BLOCKSIZE/sizeof(word32);
1027 length -= SHA256::BLOCKSIZE;
1028 }
1029 while (length >= SHA256::BLOCKSIZE);
1030 return length;
1031}
1032
1033// *************************************************************
1034
1035std::string SHA512_AlgorithmProvider()
1036{
1037#if CRYPTOPP_SSE2_ASM_AVAILABLE
1038 if (HasSSE2())
1039 return "SSE2";
1040#endif
1041#if CRYPTOGAMS_ARM_SHA512
1042 if (HasNEON())
1043 return "NEON";
1044 if (HasARMv7())
1045 return "ARMv7";
1046#endif
1047#if (CRYPTOPP_POWER8_SHA_AVAILABLE)
1048 if (HasSHA512())
1049 return "Power8";
1050#endif
1051 return "C++";
1052}
1053
1054std::string SHA384::AlgorithmProvider() const
1055{
1056 return SHA512_AlgorithmProvider();
1057}
1058
1059std::string SHA512::AlgorithmProvider() const
1060{
1061 return SHA512_AlgorithmProvider();
1062}
1063
1064void SHA384::InitState(HashWordType *state)
1065{
1066 const word64 s[8] = {
1067 W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507),
1068 W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939),
1069 W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511),
1070 W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)};
1071 memcpy(state, s, sizeof(s));
1072}
1073
1074void SHA512::InitState(HashWordType *state)
1075{
1076 const word64 s[8] = {
1077 W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b),
1078 W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1),
1079 W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f),
1080 W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)};
1081 memcpy(state, s, sizeof(s));
1082}
1083
1084#if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86)
1085
1086ANONYMOUS_NAMESPACE_BEGIN
1087
1088// No inlining due to https://github.com/weidai11/cryptopp/issues/684
1089// g++ -DNDEBUG -g2 -O3 -pthread -pipe -c sha.cpp
1090// sha.cpp: Assembler messages:
1091// sha.cpp:1155: Error: symbol `SHA512_Round' is already defined
1092// sha.cpp:1155: Error: symbol `SHA512_Round' is already defined
1093
1094CRYPTOPP_NOINLINE CRYPTOPP_NAKED
1095void CRYPTOPP_FASTCALL SHA512_HashBlock_SSE2(word64 *state, const word64 *data)
1096{
1097#ifdef __GNUC__
1098 __asm__ __volatile__
1099 (
1100 INTEL_NOPREFIX
1101 AS_PUSH_IF86( bx)
1102 AS2( mov ebx, eax)
1103#else
1104 AS1( push ebx)
1105 AS1( push esi)
1106 AS1( push edi)
1107 AS2( lea ebx, SHA512_K)
1108#endif
1109
1110 AS2( mov eax, esp)
1111 AS2( and esp, 0xfffffff0)
1112 AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state
1113 AS_PUSH_IF86( ax)
1114 AS2( xor eax, eax)
1115
1116 AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
1117 AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8
1118
1119 AS2( movdqu xmm0, [ecx+0*16])
1120 AS2( movdq2q mm4, xmm0)
1121 AS2( movdqu [edi+0*16], xmm0)
1122 AS2( movdqu xmm0, [ecx+1*16])
1123 AS2( movdqu [edi+1*16], xmm0)
1124 AS2( movdqu xmm0, [ecx+2*16])
1125 AS2( movdq2q mm5, xmm0)
1126 AS2( movdqu [edi+2*16], xmm0)
1127 AS2( movdqu xmm0, [ecx+3*16])
1128 AS2( movdqu [edi+3*16], xmm0)
1129 ASJ( jmp, 0, f)
1130
1131#define SSE2_S0_S1(r, a, b, c) \
1132 AS2( movq mm6, r)\
1133 AS2( psrlq r, a)\
1134 AS2( movq mm7, r)\
1135 AS2( psllq mm6, 64-c)\
1136 AS2( pxor mm7, mm6)\
1137 AS2( psrlq r, b-a)\
1138 AS2( pxor mm7, r)\
1139 AS2( psllq mm6, c-b)\
1140 AS2( pxor mm7, mm6)\
1141 AS2( psrlq r, c-b)\
1142 AS2( pxor r, mm7)\
1143 AS2( psllq mm6, b-a)\
1144 AS2( pxor r, mm6)
1145
1146#define SSE2_s0(r, a, b, c) \
1147 AS2( movdqu xmm6, r)\
1148 AS2( psrlq r, a)\
1149 AS2( movdqu xmm7, r)\
1150 AS2( psllq xmm6, 64-c)\
1151 AS2( pxor xmm7, xmm6)\
1152 AS2( psrlq r, b-a)\
1153 AS2( pxor xmm7, r)\
1154 AS2( psrlq r, c-b)\
1155 AS2( pxor r, xmm7)\
1156 AS2( psllq xmm6, c-a)\
1157 AS2( pxor r, xmm6)
1158
1159#define SSE2_s1(r, a, b, c) \
1160 AS2( movdqu xmm6, r)\
1161 AS2( psrlq r, a)\
1162 AS2( movdqu xmm7, r)\
1163 AS2( psllq xmm6, 64-c)\
1164 AS2( pxor xmm7, xmm6)\
1165 AS2( psrlq r, b-a)\
1166 AS2( pxor xmm7, r)\
1167 AS2( psllq xmm6, c-b)\
1168 AS2( pxor xmm7, xmm6)\
1169 AS2( psrlq r, c-b)\
1170 AS2( pxor r, xmm7)
1171 ASL(SHA512_Round)
1172
1173 // k + w is in mm0, a is in mm4, e is in mm5
1174 AS2( paddq mm0, [edi+7*8]) // h
1175 AS2( movq mm2, [edi+5*8]) // f
1176 AS2( movq mm3, [edi+6*8]) // g
1177 AS2( pxor mm2, mm3)
1178 AS2( pand mm2, mm5)
1179 SSE2_S0_S1(mm5,14,18,41)
1180 AS2( pxor mm2, mm3)
1181 AS2( paddq mm0, mm2) // h += Ch(e,f,g)
1182 AS2( paddq mm5, mm0) // h += S1(e)
1183 AS2( movq mm2, [edi+1*8]) // b
1184 AS2( movq mm1, mm2)
1185 AS2( por mm2, mm4)
1186 AS2( pand mm2, [edi+2*8]) // c
1187 AS2( pand mm1, mm4)
1188 AS2( por mm1, mm2)
1189 AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c)
1190 AS2( paddq mm5, [edi+3*8]) // e = d + h
1191 AS2( movq [edi+3*8], mm5)
1192 AS2( movq [edi+11*8], mm5)
1193 SSE2_S0_S1(mm4,28,34,39) // S0(a)
1194 AS2( paddq mm4, mm1) // a = temp + S0(a)
1195 AS2( movq [edi-8], mm4)
1196 AS2( movq [edi+7*8], mm4)
1197 AS1( ret)
1198
1199 // first 16 rounds
1200 ASL(0)
1201 AS2( movq mm0, [edx+eax*8])
1202 AS2( movq [esi+eax*8], mm0)
1203 AS2( movq [esi+eax*8+16*8], mm0)
1204 AS2( paddq mm0, [ebx+eax*8])
1205 ASC( call, SHA512_Round)
1206
1207 AS1( inc eax)
1208 AS2( sub edi, 8)
1209 AS2( test eax, 7)
1210 ASJ( jnz, 0, b)
1211 AS2( add edi, 8*8)
1212 AS2( cmp eax, 16)
1213 ASJ( jne, 0, b)
1214
1215 // rest of the rounds
1216 AS2( movdqu xmm0, [esi+(16-2)*8])
1217 ASL(1)
1218 // data expansion, W[i-2] already in xmm0
1219 AS2( movdqu xmm3, [esi])
1220 AS2( paddq xmm3, [esi+(16-7)*8])
1221 AS2( movdqu xmm2, [esi+(16-15)*8])
1222 SSE2_s1(xmm0, 6, 19, 61)
1223 AS2( paddq xmm0, xmm3)
1224 SSE2_s0(xmm2, 1, 7, 8)
1225 AS2( paddq xmm0, xmm2)
1226 AS2( movdq2q mm0, xmm0)
1227 AS2( movhlps xmm1, xmm0)
1228 AS2( paddq mm0, [ebx+eax*8])
1229 AS2( movlps [esi], xmm0)
1230 AS2( movlps [esi+8], xmm1)
1231 AS2( movlps [esi+8*16], xmm0)
1232 AS2( movlps [esi+8*17], xmm1)
1233 // 2 rounds
1234 ASC( call, SHA512_Round)
1235 AS2( sub edi, 8)
1236 AS2( movdq2q mm0, xmm1)
1237 AS2( paddq mm0, [ebx+eax*8+8])
1238 ASC( call, SHA512_Round)
1239 // update indices and loop
1240 AS2( add esi, 16)
1241 AS2( add eax, 2)
1242 AS2( sub edi, 8)
1243 AS2( test eax, 7)
1244 ASJ( jnz, 1, b)
1245 // do housekeeping every 8 rounds
1246 AS2( mov esi, 0xf)
1247 AS2( and esi, eax)
1248 AS2( lea esi, [esp+4+20*8+8+esi*8])
1249 AS2( add edi, 8*8)
1250 AS2( cmp eax, 80)
1251 ASJ( jne, 1, b)
1252
1253#define SSE2_CombineState(i) \
1254 AS2( movdqu xmm0, [edi+i*16])\
1255 AS2( paddq xmm0, [ecx+i*16])\
1256 AS2( movdqu [ecx+i*16], xmm0)
1257
1258 SSE2_CombineState(0)
1259 SSE2_CombineState(1)
1260 SSE2_CombineState(2)
1261 SSE2_CombineState(3)
1262
1263 AS_POP_IF86( sp)
1264 AS1( emms)
1265
1266#if defined(__GNUC__)
1267 AS_POP_IF86( bx)
1268 ATT_PREFIX
1269 :
1270 : "a" (SHA512_K), "c" (state), "d" (data)
1271 : "%esi", "%edi", "memory", "cc"
1272 );
1273#else
1274 AS1( pop edi)
1275 AS1( pop esi)
1276 AS1( pop ebx)
1277 AS1( ret)
1278#endif
1279}
1280
1281ANONYMOUS_NAMESPACE_END
1282
1283#endif // CRYPTOPP_SSE2_ASM_AVAILABLE
1284
1285ANONYMOUS_NAMESPACE_BEGIN
1286
1287#define a(i) T[(0-i)&7]
1288#define b(i) T[(1-i)&7]
1289#define c(i) T[(2-i)&7]
1290#define d(i) T[(3-i)&7]
1291#define e(i) T[(4-i)&7]
1292#define f(i) T[(5-i)&7]
1293#define g(i) T[(6-i)&7]
1294#define h(i) T[(7-i)&7]
1295
1296#define blk0(i) (W[i]=data[i])
1297#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
1298
1299#define Ch(x,y,z) (z^(x&(y^z)))
1300#define Maj(x,y,z) (y^((x^y)&(y^z)))
1301
1302#define s0(x) (rotrConstant<1>(x)^rotrConstant<8>(x)^(x>>7))
1303#define s1(x) (rotrConstant<19>(x)^rotrConstant<61>(x)^(x>>6))
1304#define S0(x) (rotrConstant<28>(x)^rotrConstant<34>(x)^rotrConstant<39>(x))
1305#define S1(x) (rotrConstant<14>(x)^rotrConstant<18>(x)^rotrConstant<41>(x))
1306
1307#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+\
1308 (j?blk2(i):blk0(i));d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
1309
1310void SHA512_HashBlock_CXX(word64 *state, const word64 *data)
1311{
1312 CRYPTOPP_ASSERT(state);
1313 CRYPTOPP_ASSERT(data);
1314
1315 word64 W[16]={0}, T[8];
1316
1317 /* Copy context->state[] to working vars */
1318 std::memcpy(T, state, sizeof(T));
1319
1320 /* 80 operations, partially loop unrolled */
1321 for (unsigned int j=0; j<80; j+=16)
1322 {
1323 R( 0); R( 1); R( 2); R( 3);
1324 R( 4); R( 5); R( 6); R( 7);
1325 R( 8); R( 9); R(10); R(11);
1326 R(12); R(13); R(14); R(15);
1327 }
1328
1329 state[0] += a(0);
1330 state[1] += b(0);
1331 state[2] += c(0);
1332 state[3] += d(0);
1333 state[4] += e(0);
1334 state[5] += f(0);
1335 state[6] += g(0);
1336 state[7] += h(0);
1337}
1338
1339ANONYMOUS_NAMESPACE_END
1340
1341void SHA512::Transform(word64 *state, const word64 *data)
1342{
1343 CRYPTOPP_ASSERT(state);
1344 CRYPTOPP_ASSERT(data);
1345
1346#if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86)
1347 if (HasSSE2())
1348 {
1349 SHA512_HashBlock_SSE2(state, data);
1350 return;
1351 }
1352#endif
1353#if CRYPTOGAMS_ARM_SHA512
1354 if (HasNEON())
1355 {
1356# if (CRYPTOPP_LITTLE_ENDIAN)
1357 word64 dataBuf[16];
1358 ByteReverse(dataBuf, data, SHA512::BLOCKSIZE);
1359 cryptogams_sha512_block_data_order_neon(state, dataBuf, 1);
1360# else
1361 cryptogams_sha512_block_data_order_neon(state, data, 1);
1362# endif
1363 return;
1364 }
1365 if (HasARMv7())
1366 {
1367# if (CRYPTOPP_LITTLE_ENDIAN)
1368 word64 dataBuf[16];
1369 ByteReverse(dataBuf, data, SHA512::BLOCKSIZE);
1370 cryptogams_sha512_block_data_order(state, dataBuf, 1);
1371# else
1372 cryptogams_sha512_block_data_order(state, data, 1);
1373# endif
1374 return;
1375 }
1376#endif
1377#if CRYPTOPP_POWER8_SHA_AVAILABLE
1378 if (HasSHA512())
1379 {
1380 SHA512_HashMultipleBlocks_POWER8(state, data, SHA512::BLOCKSIZE, BIG_ENDIAN_ORDER);
1381 return;
1382 }
1383#endif
1384
1385 SHA512_HashBlock_CXX(state, data);
1386}
1387
1388#undef Ch
1389#undef Maj
1390
1391#undef s0
1392#undef s1
1393#undef S0
1394#undef S1
1395
1396#undef blk0
1397#undef blk1
1398#undef blk2
1399
1400#undef R
1401
1402#undef a
1403#undef b
1404#undef c
1405#undef d
1406#undef e
1407#undef f
1408#undef g
1409#undef h
1410
1411NAMESPACE_END
1412
1413#endif // Not CRYPTOPP_GENERATE_X64_MASM
1414#endif // Not CRYPTOPP_IMPORTS
Fixed size stack-based SecBlock with 16-byte alignment.
Definition secblock.h:1259
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
static void InitState(HashWordType *state)
Initialize state array.
std::string AlgorithmProvider() const
Retrieve the provider of this algorithm.
static void InitState(HashWordType *state)
Initialize state array.
std::string AlgorithmProvider() const
Retrieve the provider of this algorithm.
static void InitState(HashWordType *state)
Initialize state array.
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
std::string AlgorithmProvider() const
Retrieve the provider of this algorithm.
std::string AlgorithmProvider() const
Retrieve the provider of this algorithm.
static void InitState(HashWordType *state)
Initialize state array.
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
std::string AlgorithmProvider() const
Retrieve the provider of this algorithm.
static void InitState(HashWordType *state)
Initialize state array.
Library configuration file.
#define CRYPTOPP_BOOL_X86
32-bit x86 platform
Definition config_cpu.h:52
#define CRYPTOPP_BOOL_X64
32-bit x86 platform
Definition config_cpu.h:48
#define W64LIT(x)
Declare an unsigned word64.
Definition config_int.h:119
unsigned int word32
32-bit unsigned datatype
Definition config_int.h:62
unsigned long long word64
64-bit unsigned datatype
Definition config_int.h:91
Functions for CPU features and intrinsics.
ByteOrder
Provides the byte ordering.
Definition cryptlib.h:143
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition cryptlib.h:145
@ BIG_ENDIAN_ORDER
byte order is big-endian
Definition cryptlib.h:147
Utility functions for the Crypto++ library.
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition misc.h:2021
bool NativeByteOrderIs(ByteOrder order)
Determines whether order follows native byte ordering.
Definition misc.h:1271
Crypto++ library namespace.
Precompiled header file.
Classes and functions for secure memory allocations.
Classes for SHA-1 and SHA-2 family of message digests.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition trap.h:68