Crypto++ 8.6
Free C++ class library of cryptographic schemes
xts.cpp
1// xts.cpp - written and placed in the public domain by Jeffrey Walton
2
3// Aarch32, Aarch64, Altivec and X86_64 include SIMD as part of the
4// base architecture. We can use the SIMD code below without an
5// architecture option. No runtime tests are required. Unfortunately,
6// we can't use it on Altivec because an architecture switch is required.
7// The updated XorBuffer gains 0.3 to 1.5 cpb on the architectures for
8// 16-byte block sizes.
9
10#include "pch.h"
11
12#include "xts.h"
13#include "misc.h"
14#include "modes.h"
15#include "cpu.h"
16
17#if defined(CRYPTOPP_DEBUG)
18# include "aes.h"
19# include "threefish.h"
20#endif
21
22// 0.3 to 0.4 cpb profit
23#if defined(__SSE2__) || defined(_M_X64)
24# include <emmintrin.h>
25#endif
26
27#if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64)
28# if (CRYPTOPP_ARM_NEON_HEADER)
29# include <arm_neon.h>
30# endif
31#endif
32
33#if defined(__ALTIVEC__)
34# include "ppc_simd.h"
35#endif
36
37ANONYMOUS_NAMESPACE_BEGIN
38
39using namespace CryptoPP;
40
41#if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING)
42
43using CryptoPP::AES;
44using CryptoPP::XTS_Mode;
45using CryptoPP::Threefish512;
46
47void Modes_TestInstantiations()
48{
49 XTS_Mode<AES>::Encryption m0;
50 XTS_Mode<AES>::Decryption m1;
51 XTS_Mode<AES>::Encryption m2;
52 XTS_Mode<AES>::Decryption m3;
53
54#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
55 XTS_Mode<Threefish512>::Encryption m4;
56 XTS_Mode<Threefish512>::Decryption m5;
57#endif
58}
59#endif // CRYPTOPP_DEBUG
60
61inline void XorBuffer(byte *output, const byte *input, const byte *mask, size_t count)
62{
63 CRYPTOPP_ASSERT(count >= 16 && (count % 16 == 0));
64
65#if defined(CRYPTOPP_DISABLE_ASM)
66 xorbuf(output, input, mask, count);
67
68#elif defined(__SSE2__) || defined(_M_X64)
69 for (size_t i=0; i<count; i+=16)
70 _mm_storeu_si128(M128_CAST(output+i),
71 _mm_xor_si128(
72 _mm_loadu_si128(CONST_M128_CAST(input+i)),
73 _mm_loadu_si128(CONST_M128_CAST(mask+i))));
74
75#elif defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64)
76 for (size_t i=0; i<count; i+=16)
77 vst1q_u8(output+i, veorq_u8(vld1q_u8(input+i), vld1q_u8(mask+i)));
78
79#elif defined(__ALTIVEC__)
80 for (size_t i=0; i<count; i+=16)
81 VecStore(VecXor(VecLoad(input+i), VecLoad(mask+i)), output+i);
82
83#else
84 xorbuf(output, input, mask, count);
85#endif
86}
87
88inline void XorBuffer(byte *buf, const byte *mask, size_t count)
89{
90 XorBuffer(buf, buf, mask, count);
91}
92
93// Borrowed from CMAC, but little-endian representation
94inline void GF_Double(byte *out, const byte* in, unsigned int len)
95{
96#if defined(_M_X64) || defined(_M_ARM64) || defined(_LP64) || defined(__LP64__)
97 word64 carry = 0, x;
98 for (size_t i=0, idx=0; i<len/8; ++i, idx+=8)
99 {
100 x = GetWord<word64>(false, LITTLE_ENDIAN_ORDER, in+idx);
101 word64 y = (x >> 63); x = (x << 1) + carry;
102 PutWord<word64>(false, LITTLE_ENDIAN_ORDER, out+idx, x);
103 carry = y;
104 }
105#else
106 word32 carry = 0, x;
107 for (size_t i=0, idx=0; i<len/4; ++i, idx+=4)
108 {
109 x = GetWord<word32>(false, LITTLE_ENDIAN_ORDER, in+idx);
110 word32 y = (x >> 31); x = (x << 1) + carry;
111 PutWord<word32>(false, LITTLE_ENDIAN_ORDER, out+idx, x);
112 carry = y;
113 }
114#endif
115
116#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
117
119 CRYPTOPP_ASSERT(len >= 16);
120 CRYPTOPP_ASSERT(len <= 128);
121
122 byte* k = out;
123 if (carry)
124 {
125 switch (len)
126 {
127 case 16:
128 {
129 const size_t LEIDX = 16-1;
130 k[LEIDX-15] ^= 0x87;
131 break;
132 }
133 case 32:
134 {
135 // https://crypto.stackexchange.com/q/9815/10496
136 // Polynomial x^256 + x^10 + x^5 + x^2 + 1
137 const size_t LEIDX = 32-1;
138 k[LEIDX-30] ^= 4;
139 k[LEIDX-31] ^= 0x25;
140 break;
141 }
142 case 64:
143 {
144 // https://crypto.stackexchange.com/q/9815/10496
145 // Polynomial x^512 + x^8 + x^5 + x^2 + 1
146 const size_t LEIDX = 64-1;
147 k[LEIDX-62] ^= 1;
148 k[LEIDX-63] ^= 0x25;
149 break;
150 }
151 case 128:
152 {
153 // https://crypto.stackexchange.com/q/9815/10496
154 // Polynomial x^1024 + x^19 + x^6 + x + 1
155 const size_t LEIDX = 128-1;
156 k[LEIDX-125] ^= 8;
157 k[LEIDX-126] ^= 0x00;
158 k[LEIDX-127] ^= 0x43;
159 break;
160 }
161 default:
163 }
164 }
165#else
166 CRYPTOPP_ASSERT(len == 16);
167
168 byte* k = out;
169 if (carry)
170 {
171 k[0] ^= 0x87;
172 return;
173 }
174#endif // CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
175}
176
177inline void GF_Double(byte *inout, unsigned int len)
178{
179 GF_Double(inout, inout, len);
180}
181
182ANONYMOUS_NAMESPACE_END
183
184NAMESPACE_BEGIN(CryptoPP)
185
186void XTS_ModeBase::ThrowIfInvalidBlockSize(size_t length)
187{
188#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
189 CRYPTOPP_ASSERT(length >= 16 && length <= 128 && IsPowerOf2(length));
190 if (length < 16 || length > 128 || !IsPowerOf2(length))
191 throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not valid");
192#else
193 CRYPTOPP_ASSERT(length == 16);
194 if (length != 16)
195 throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
196#endif
197}
198
200{
201 CRYPTOPP_ASSERT(length % 2 == 0);
202 if (!GetBlockCipher().IsValidKeyLength((length+1)/2))
203 throw InvalidKeyLength(AlgorithmName(), length);
204}
205
206void XTS_ModeBase::SetKey(const byte *key, size_t length, const NameValuePairs &params)
207{
210
211 const size_t klen = length/2;
212 AccessBlockCipher().SetKey(key+0, klen, params);
213 AccessTweakCipher().SetKey(key+klen, klen, params);
214
215 ResizeBuffers();
216
217 size_t ivLength;
218 const byte *iv = GetIVAndThrowIfInvalid(params, ivLength);
219 Resynchronize(iv, (int)ivLength);
220}
221
222void XTS_ModeBase::Resynchronize(const byte *iv, int ivLength)
223{
225 std::memcpy(m_xregister, m_register, ivLength);
226 GetTweakCipher().ProcessBlock(m_xregister);
227}
228
230{
231 SecByteBlock iv(GetTweakCipher().BlockSize());
232 PutWord<word64>(false, order, iv, sector);
233 std::memset(iv+8, 0x00, iv.size()-8);
234
236 std::memcpy(m_xregister, iv, iv.size());
237 GetTweakCipher().ProcessBlock(m_xregister);
238}
239
240void XTS_ModeBase::ResizeBuffers()
241{
242 BlockOrientedCipherModeBase::ResizeBuffers();
243 m_xworkspace.New(GetBlockCipher().BlockSize()*ParallelBlocks);
244 m_xregister.New(GetBlockCipher().BlockSize()*ParallelBlocks);
245}
246
247// ProcessData runs either 12-4-1 blocks, 8-2-1 or 4-1 blocks. Which is
248// selected depends on ParallelBlocks in the header file. 12-4-1 or 8-2-1
249// can be used on Aarch64 and PowerPC. Intel should use 4-1 due to lack
250// of registers. The unneeded code paths should be removed by optimizer.
251// The extra gyrations save us 1.8 cpb on Aarch64 and 2.1 cpb on PowerPC.
252void XTS_ModeBase::ProcessData(byte *outString, const byte *inString, size_t length)
253{
254 // data unit is multiple of 16 bytes
255 CRYPTOPP_ASSERT(length % BlockSize() == 0);
256
257 enum { lastParallelBlock = ParallelBlocks-1 };
258 const unsigned int blockSize = GetBlockCipher().BlockSize();
259 const size_t parallelSize = blockSize*ParallelBlocks;
260
261 // encrypt the data unit, optimal size at a time
262 while (length >= parallelSize)
263 {
264 // m_xregister[0] always points to the next tweak.
265 GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
266 GF_Double(m_xregister+2*blockSize, m_xregister+1*blockSize, blockSize);
267 GF_Double(m_xregister+3*blockSize, m_xregister+2*blockSize, blockSize);
268
269 if (ParallelBlocks > 4)
270 {
271 GF_Double(m_xregister+4*blockSize, m_xregister+3*blockSize, blockSize);
272 GF_Double(m_xregister+5*blockSize, m_xregister+4*blockSize, blockSize);
273 GF_Double(m_xregister+6*blockSize, m_xregister+5*blockSize, blockSize);
274 GF_Double(m_xregister+7*blockSize, m_xregister+6*blockSize, blockSize);
275 }
276 if (ParallelBlocks > 8)
277 {
278 GF_Double(m_xregister+8*blockSize, m_xregister+7*blockSize, blockSize);
279 GF_Double(m_xregister+9*blockSize, m_xregister+8*blockSize, blockSize);
280 GF_Double(m_xregister+10*blockSize, m_xregister+9*blockSize, blockSize);
281 GF_Double(m_xregister+11*blockSize, m_xregister+10*blockSize, blockSize);
282 }
283
284 // merge the tweak into the input block
285 XorBuffer(m_xworkspace, inString, m_xregister, parallelSize);
286
287 // encrypt one block, merge the tweak into the output block
288 GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
289 outString, parallelSize, BlockTransformation::BT_AllowParallel);
290
291 // m_xregister[0] always points to the next tweak.
292 GF_Double(m_xregister+0, m_xregister+lastParallelBlock*blockSize, blockSize);
293
294 inString += parallelSize;
295 outString += parallelSize;
296 length -= parallelSize;
297 }
298
299 // encrypt the data unit, 4 blocks at a time
300 while (ParallelBlocks == 12 && length >= blockSize*4)
301 {
302 // m_xregister[0] always points to the next tweak.
303 GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
304 GF_Double(m_xregister+2*blockSize, m_xregister+1*blockSize, blockSize);
305 GF_Double(m_xregister+3*blockSize, m_xregister+2*blockSize, blockSize);
306
307 // merge the tweak into the input block
308 XorBuffer(m_xworkspace, inString, m_xregister, blockSize*4);
309
310 // encrypt one block, merge the tweak into the output block
311 GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
312 outString, blockSize*4, BlockTransformation::BT_AllowParallel);
313
314 // m_xregister[0] always points to the next tweak.
315 GF_Double(m_xregister+0, m_xregister+3*blockSize, blockSize);
316
317 inString += blockSize*4;
318 outString += blockSize*4;
319 length -= blockSize*4;
320 }
321
322 // encrypt the data unit, 2 blocks at a time
323 while (ParallelBlocks == 8 && length >= blockSize*2)
324 {
325 // m_xregister[0] always points to the next tweak.
326 GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
327
328 // merge the tweak into the input block
329 XorBuffer(m_xworkspace, inString, m_xregister, blockSize*2);
330
331 // encrypt one block, merge the tweak into the output block
332 GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
333 outString, blockSize*2, BlockTransformation::BT_AllowParallel);
334
335 // m_xregister[0] always points to the next tweak.
336 GF_Double(m_xregister+0, m_xregister+1*blockSize, blockSize);
337
338 inString += blockSize*2;
339 outString += blockSize*2;
340 length -= blockSize*2;
341 }
342
343 // encrypt the data unit, blocksize at a time
344 while (length)
345 {
346 // merge the tweak into the input block
347 XorBuffer(m_xworkspace, inString, m_xregister, blockSize);
348
349 // encrypt one block
350 GetBlockCipher().ProcessBlock(m_xworkspace);
351
352 // merge the tweak into the output block
353 XorBuffer(outString, m_xworkspace, m_xregister, blockSize);
354
355 // Multiply T by alpha
356 GF_Double(m_xregister, blockSize);
357
358 inString += blockSize;
359 outString += blockSize;
360 length -= blockSize;
361 }
362}
363
364size_t XTS_ModeBase::ProcessLastBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
365{
366 // need at least a full AES block
367 CRYPTOPP_ASSERT(inLength >= BlockSize());
368
369 if (inLength < BlockSize())
370 throw InvalidArgument("XTS: message is too short for ciphertext stealing");
371
373 return ProcessLastPlainBlock(outString, outLength, inString, inLength);
374 else
375 return ProcessLastCipherBlock(outString, outLength, inString, inLength);
376}
377
378size_t XTS_ModeBase::ProcessLastPlainBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
379{
380 // ensure output buffer is large enough
381 CRYPTOPP_ASSERT(outLength >= inLength);
382
383 const unsigned int blockSize = GetBlockCipher().BlockSize();
384 const size_t blocks = inLength / blockSize;
385 const size_t tail = inLength % blockSize;
386 outLength = inLength;
387
388 if (tail == 0)
389 {
390 // Allow ProcessData to handle all the full blocks
391 ProcessData(outString, inString, inLength);
392 return inLength;
393 }
394 else if (blocks > 1)
395 {
396 // Allow ProcessData to handle full blocks except one
397 const size_t head = (blocks-1)*blockSize;
398 ProcessData(outString, inString, inLength-head);
399
400 outString += head;
401 inString += head; inLength -= head;
402 }
403
404 ///// handle the full block /////
405
406 // merge the tweak into the input block
407 XorBuffer(m_xworkspace, inString, m_xregister, blockSize);
408
409 // encrypt one block
410 GetBlockCipher().ProcessBlock(m_xworkspace);
411
412 // merge the tweak into the output block
413 XorBuffer(outString, m_xworkspace, m_xregister, blockSize);
414
415 // Multiply T by alpha
416 GF_Double(m_xregister, blockSize);
417
418 ///// handle final partial block /////
419
420 inString += blockSize;
421 outString += blockSize;
422 const size_t len = inLength-blockSize;
423
424 // copy in the final plaintext bytes
425 std::memcpy(m_xworkspace, inString, len);
426 // and copy out the final ciphertext bytes
427 std::memcpy(outString, outString-blockSize, len);
428 // "steal" ciphertext to complete the block
429 std::memcpy(m_xworkspace+len, outString-blockSize+len, blockSize-len);
430
431 // merge the tweak into the input block
432 XorBuffer(m_xworkspace, m_xregister, blockSize);
433
434 // encrypt one block
435 GetBlockCipher().ProcessBlock(m_xworkspace);
436
437 // merge the tweak into the previous output block
438 XorBuffer(outString-blockSize, m_xworkspace, m_xregister, blockSize);
439
440 return outLength;
441}
442
443size_t XTS_ModeBase::ProcessLastCipherBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
444{
445 // ensure output buffer is large enough
446 CRYPTOPP_ASSERT(outLength >= inLength);
447
448 const unsigned int blockSize = GetBlockCipher().BlockSize();
449 const size_t blocks = inLength / blockSize;
450 const size_t tail = inLength % blockSize;
451 outLength = inLength;
452
453 if (tail == 0)
454 {
455 // Allow ProcessData to handle all the full blocks
456 ProcessData(outString, inString, inLength);
457 return inLength;
458 }
459 else if (blocks > 1)
460 {
461 // Allow ProcessData to handle full blocks except one
462 const size_t head = (blocks-1)*blockSize;
463 ProcessData(outString, inString, inLength-head);
464
465 outString += head;
466 inString += head; inLength -= head;
467 }
468
469 #define poly1 (m_xregister+0*blockSize)
470 #define poly2 (m_xregister+1*blockSize)
471 GF_Double(poly2, poly1, blockSize);
472
473 ///// handle final partial block /////
474
475 inString += blockSize;
476 outString += blockSize;
477 const size_t len = inLength-blockSize;
478
479 // merge the tweak into the input block
480 XorBuffer(m_xworkspace, inString-blockSize, poly2, blockSize);
481
482 // encrypt one block
483 GetBlockCipher().ProcessBlock(m_xworkspace);
484
485 // merge the tweak into the output block
486 XorBuffer(m_xworkspace, poly2, blockSize);
487
488 // copy in the final plaintext bytes
489 std::memcpy(outString-blockSize, inString, len);
490 // and copy out the final ciphertext bytes
491 std::memcpy(outString, m_xworkspace, len);
492 // "steal" ciphertext to complete the block
493 std::memcpy(outString-blockSize+len, m_xworkspace+len, blockSize-len);
494
495 ///// handle the full previous block /////
496
497 inString -= blockSize;
498 outString -= blockSize;
499
500 // merge the tweak into the input block
501 XorBuffer(m_xworkspace, outString, poly1, blockSize);
502
503 // encrypt one block
504 GetBlockCipher().ProcessBlock(m_xworkspace);
505
506 // merge the tweak into the output block
507 XorBuffer(outString, m_xworkspace, poly1, blockSize);
508
509 return outLength;
510}
511
512NAMESPACE_END
#define M128_CAST(x)
Clang workaround.
Definition adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition adv_simd.h:614
Class file for the AES cipher (Rijndael)
bool IsForwardTransformation() const
Determines if the cipher is being operated in its forward direction.
Definition modes.h:258
void Resynchronize(const byte *iv, int length=-1)
Resynchronize with an IV.
Definition modes.h:260
@ BT_AllowParallel
Allow parallel transformations.
Definition cryptlib.h:925
An invalid argument was detected.
Definition cryptlib.h:203
Exception thrown when an invalid key length is encountered.
Definition simple.h:56
Interface for retrieving values given their names.
Definition cryptlib.h:322
void New(size_type newSize)
Change size without preserving contents.
Definition secblock.h:1126
size_type size() const
Provides the count of elements in the SecBlock.
Definition secblock.h:867
SecBlock<byte> typedef.
Definition secblock.h:1226
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
XTS block cipher mode of operation default implementation.
Definition xts.h:50
bool IsValidKeyLength(size_t keylength) const
Returns whether keylength is a valid key length.
Definition xts.h:74
void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition xts.cpp:206
void ProcessData(byte *outString, const byte *inString, size_t length)
Encrypt or decrypt an array of bytes.
Definition xts.cpp:252
void Resynchronize(const byte *iv, int ivLength=-1)
Resynchronize with an IV.
Definition xts.cpp:222
size_t ProcessLastBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
Encrypt or decrypt the last block of data.
Definition xts.cpp:364
unsigned int BlockSize() const
Provides the block size of the cipher.
Definition xts.h:84
void ThrowIfInvalidKeyLength(size_t length)
Validates the key length.
Definition xts.cpp:199
void ThrowIfInvalidBlockSize(size_t length)
Validates the block size.
Definition xts.cpp:186
std::string AlgorithmName() const
Provides the name of this algorithm.
Definition xts.h:61
unsigned int word32
32-bit unsigned datatype
Definition config_int.h:62
unsigned long long word64
64-bit unsigned datatype
Definition config_int.h:91
Functions for CPU features and intrinsics.
ByteOrder
Provides the byte ordering.
Definition cryptlib.h:143
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition cryptlib.h:145
Utility functions for the Crypto++ library.
T GetWord(bool assumeAligned, ByteOrder order, const byte *block)
Access a block of memory.
Definition misc.h:2460
bool IsPowerOf2(const T &value)
Tests whether a value is a power of 2.
Definition misc.h:1010
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
Definition misc.h:2502
CRYPTOPP_DLL void xorbuf(byte *buf, const byte *mask, size_t count)
Performs an XOR of a buffer with a mask.
Classes for block cipher modes of operation.
Crypto++ library namespace.
Precompiled header file.
Support functions for PowerPC and vector operations.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
Definition ppc_simd.h:1414
void VecStore(const T data, byte dest[16])
Stores a vector to a byte array.
Definition ppc_simd.h:895
uint32x4_p VecLoad(const byte src[16])
Loads a vector from a byte array.
Definition ppc_simd.h:369
Classes for the Threefish block cipher.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition trap.h:68
Classes for XTS block cipher mode of operation.