Line data Source code
1 : /*------------------------------------------------------------------------
2 : / OCB Version 3 Reference Code (Optimized C) Last modified 08-SEP-2012
3 : /-------------------------------------------------------------------------
4 : / Copyright (c) 2012 Ted Krovetz.
5 : /
6 : / Permission to use, copy, modify, and/or distribute this software for any
7 : / purpose with or without fee is hereby granted, provided that the above
8 : / copyright notice and this permission notice appear in all copies.
9 : /
10 : / THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 : / WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 : / MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 : / ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 : / WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 : / ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 : / OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 : /
18 : / Phillip Rogaway holds patents relevant to OCB. See the following for
19 : / his patent grant: http://www.cs.ucdavis.edu/~rogaway/ocb/grant.htm
20 : /
21 : / Special thanks to Keegan McAllister for suggesting several good improvements
22 : /
23 : / Comments are welcome: Ted Krovetz <ted@krovetz.net> - Dedicated to Laurel K
24 : /------------------------------------------------------------------------- */
25 :
26 : /* ----------------------------------------------------------------------- */
27 : /* Usage notes */
28 : /* ----------------------------------------------------------------------- */
29 :
30 : /* - When AE_PENDING is passed as the 'final' parameter of any function,
31 : / the length parameters must be a multiple of (BPI*16).
32 : / - When available, SSE or AltiVec registers are used to manipulate data.
33 : / So, when on machines with these facilities, all pointers passed to
34 : / any function should be 16-byte aligned.
35 : / - Plaintext and ciphertext pointers may be equal (ie, plaintext gets
36 : / encrypted in-place), but no other pair of pointers may be equal.
37 : / - This code assumes all x86 processors have SSE2 and SSSE3 instructions
38 : / when compiling under MSVC. If untrue, alter the #define.
39 : / - This code is tested for C99 and recent versions of GCC and MSVC. */
40 :
41 : /* ----------------------------------------------------------------------- */
42 : /* User configuration options */
43 : /* ----------------------------------------------------------------------- */
44 :
45 : /* Set the AES key length to use and length of authentication tag to produce.
46 : / Setting either to 0 requires the value be set at runtime via ae_init().
47 : / Some optimizations occur for each when set to a fixed value. */
48 : #define OCB_KEY_LEN 16 /* 0, 16, 24 or 32. 0 means set in ae_init */
49 : #define OCB_TAG_LEN 16 /* 0 to 16. 0 means set in ae_init */
50 :
51 : /* This implementation has built-in support for multiple AES APIs. Set any
52 : / one of the following to non-zero to specify which to use. */
53 : #if 0
54 : #define USE_APPLE_COMMON_CRYPTO_AES 0
55 : #define USE_NETTLE_AES 0
56 : #define USE_OPENSSL_AES 1 /* http://openssl.org */
57 : #define USE_REFERENCE_AES 0 /* Internet search: rijndael-alg-fst.c */
58 : #define USE_AES_NI 0 /* Uses compiler's intrinsics */
59 : #endif
60 :
61 : /* During encryption and decryption, various "L values" are required.
62 : / The L values can be precomputed during initialization (requiring extra
63 : / space in ae_ctx), generated as needed (slightly slowing encryption and
64 : / decryption), or some combination of the two. L_TABLE_SZ specifies how many
65 : / L values to precompute. L_TABLE_SZ must be at least 3. L_TABLE_SZ*16 bytes
66 : / are used for L values in ae_ctx. Plaintext and ciphertexts shorter than
67 : / 2^L_TABLE_SZ blocks need no L values calculated dynamically. */
68 : #define L_TABLE_SZ 16
69 :
70 : /* Set L_TABLE_SZ_IS_ENOUGH non-zero iff you know that all plaintexts
71 : / will be shorter than 2^(L_TABLE_SZ+4) bytes in length. This results
72 : / in better performance. */
73 : #define L_TABLE_SZ_IS_ENOUGH 1
74 :
75 : /* ----------------------------------------------------------------------- */
76 : /* Includes and compiler specific definitions */
77 : /* ----------------------------------------------------------------------- */
78 :
79 : #include "config.h"
80 : #include "ae.h"
81 : #include <stdlib.h>
82 : #include <string.h>
83 : #if defined(HAVE_STRINGS_H)
84 : #include <strings.h>
85 : #endif
86 : #if defined(HAVE_ENDIAN_H)
87 : #include <endian.h>
88 : #elif defined(HAVE_SYS_ENDIAN_H)
89 : #include <sys/types.h>
90 : #include <sys/endian.h>
91 : #endif
92 :
93 : /* Define standard sized integers */
94 : #if defined(_MSC_VER) && (_MSC_VER < 1600)
95 : typedef unsigned __int8 uint8_t;
96 : typedef unsigned __int32 uint32_t;
97 : typedef unsigned __int64 uint64_t;
98 : typedef __int64 int64_t;
99 : #else
100 : #include <stdint.h>
101 : #endif
102 :
103 : /* Compiler-specific intrinsics and fixes: bswap64, ntz */
104 : #if _MSC_VER
105 : #define inline __inline /* MSVC doesn't recognize "inline" in C */
106 : #define restrict __restrict /* MSVC doesn't recognize "restrict" in C */
107 : #define __SSE2__ (_M_IX86 || _M_AMD64 || _M_X64) /* Assume SSE2 */
108 : #define __SSSE3__ (_M_IX86 || _M_AMD64 || _M_X64) /* Assume SSSE3 */
109 : #include <intrin.h>
110 : #pragma intrinsic(_byteswap_uint64, _BitScanForward, memcpy)
111 : #elif __GNUC__
112 : #ifndef inline
113 : #define inline __inline__ /* No "inline" in GCC ansi C mode */
114 : #endif
115 : #ifndef restrict
116 : #define restrict __restrict__ /* No "restrict" in GCC ansi C mode */
117 : #endif
118 : #endif
119 :
120 : #if _MSC_VER
121 : #define bswap64(x) _byteswap_uint64(x)
122 : #elif HAVE_DECL_BSWAP64
123 : /* nothing */
124 : #elif HAVE_DECL___BUILTIN_BSWAP64
125 : #define bswap64(x) __builtin_bswap64(x) /* GCC 4.3+ */
126 : #else
127 : #define bswap32(x) \
128 : ((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >> 8) | \
129 : (((x) & 0x0000ff00u) << 8) | (((x) & 0x000000ffu) << 24))
130 :
131 : static inline uint64_t bswap64(uint64_t x) {
132 : union { uint64_t u64; uint32_t u32[2]; } in, out;
133 : in.u64 = x;
134 : out.u32[0] = bswap32(in.u32[1]);
135 : out.u32[1] = bswap32(in.u32[0]);
136 : return out.u64;
137 : }
138 : #endif
139 :
140 : #if _MSC_VER
141 : static inline unsigned ntz(unsigned x) {_BitScanForward(&x,x);return x;}
142 : #elif HAVE_DECL___BUILTIN_CTZ
143 : #define ntz(x) __builtin_ctz((unsigned)(x)) /* GCC 3.4+ */
144 : #elif HAVE_DECL_FFS
145 : #define ntz(x) (ffs(x) - 1)
146 : #else
147 : #if (L_TABLE_SZ <= 9) && (L_TABLE_SZ_IS_ENOUGH) /* < 2^13 byte texts */
148 : static inline unsigned ntz(unsigned x) {
149 : static const unsigned char tz_table[] = {0,
150 : 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,7,
151 : 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,8,
152 : 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,7,
153 : 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2};
154 : return tz_table[x/4];
155 : }
156 : #else /* From http://supertech.csail.mit.edu/papers/debruijn.pdf */
157 : static inline unsigned ntz(unsigned x) {
158 : static const unsigned char tz_table[32] =
159 : { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
160 : 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
161 : return tz_table[((uint32_t)((x & -x) * 0x077CB531u)) >> 27];
162 : }
163 : #endif
164 : #endif
165 :
166 : /* ----------------------------------------------------------------------- */
167 : /* Define blocks and operations -- Patch if incorrect on your compiler. */
168 : /* ----------------------------------------------------------------------- */
169 :
170 : #if __SSE2__
171 : #include <xmmintrin.h> /* SSE instructions and _mm_malloc */
172 : #include <emmintrin.h> /* SSE2 instructions */
173 : typedef __m128i block;
174 : #define xor_block(x,y) _mm_xor_si128(x,y)
175 : #define zero_block() _mm_setzero_si128()
176 : #define unequal_blocks(x,y) \
177 : (_mm_movemask_epi8(_mm_cmpeq_epi8(x,y)) != 0xffff)
178 : #if __SSSE3__ || USE_AES_NI
179 : #include <tmmintrin.h> /* SSSE3 instructions */
180 : #define swap_if_le(b) \
181 : _mm_shuffle_epi8(b,_mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15))
182 : #else
183 152449 : static inline block swap_if_le(block b) {
184 304898 : block a = _mm_shuffle_epi32 (b, _MM_SHUFFLE(0,1,2,3));
185 152449 : a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(2,3,0,1));
186 152449 : a = _mm_shufflelo_epi16(a, _MM_SHUFFLE(2,3,0,1));
187 152449 : return _mm_xor_si128(_mm_srli_epi16(a,8), _mm_slli_epi16(a,8));
188 : }
189 : #endif
190 81709 : static inline block gen_offset(uint64_t KtopStr[3], unsigned bot) {
191 81709 : block hi = _mm_load_si128((__m128i *)(KtopStr+0)); /* hi = B A */
192 81709 : block lo = _mm_loadu_si128((__m128i *)(KtopStr+1)); /* lo = C B */
193 81709 : __m128i lshift = _mm_cvtsi32_si128(bot);
194 81709 : __m128i rshift = _mm_cvtsi32_si128(64-bot);
195 81709 : lo = _mm_xor_si128(_mm_sll_epi64(hi,lshift),_mm_srl_epi64(lo,rshift));
196 : #if __SSSE3__ || USE_AES_NI
197 : return _mm_shuffle_epi8(lo,_mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7));
198 : #else
199 81709 : return swap_if_le(_mm_shuffle_epi32(lo, _MM_SHUFFLE(1,0,3,2)));
200 : #endif
201 : }
202 66810 : static inline block double_block(block bl) {
203 66810 : const __m128i mask = _mm_set_epi32(135,1,1,1);
204 133620 : __m128i tmp = _mm_srai_epi32(bl, 31);
205 66810 : tmp = _mm_and_si128(tmp, mask);
206 66810 : tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2,1,0,3));
207 66810 : bl = _mm_slli_epi32(bl, 1);
208 66810 : return _mm_xor_si128(bl,tmp);
209 : }
210 : #elif __ALTIVEC__ && _CALL_ELF != 2
211 : #include <altivec.h>
212 : typedef vector unsigned block;
213 : #define xor_block(x,y) vec_xor(x,y)
214 : #define zero_block() vec_splat_u32(0)
215 : #define unequal_blocks(x,y) vec_any_ne(x,y)
216 : #define swap_if_le(b) (b)
217 : #if __PPC64__
218 : static block gen_offset(uint64_t KtopStr[3], unsigned bot) {
219 : union {uint64_t u64[2]; block bl;} rval;
220 : rval.u64[0] = (KtopStr[0] << bot) | (KtopStr[1] >> (64-bot));
221 : rval.u64[1] = (KtopStr[1] << bot) | (KtopStr[2] >> (64-bot));
222 : return rval.bl;
223 : }
224 : #else
225 : /* Special handling: Shifts are mod 32, and no 64-bit types */
226 : static block gen_offset(uint64_t KtopStr[3], unsigned bot) {
227 : const vector unsigned k32 = {32,32,32,32};
228 : vector unsigned hi = *(vector unsigned *)(KtopStr+0);
229 : vector unsigned lo = *(vector unsigned *)(KtopStr+2);
230 : vector unsigned bot_vec;
231 : if (bot < 32) {
232 : lo = vec_sld(hi,lo,4);
233 : } else {
234 : vector unsigned t = vec_sld(hi,lo,4);
235 : lo = vec_sld(hi,lo,8);
236 : hi = t;
237 : bot = bot - 32;
238 : }
239 : if (bot == 0) return hi;
240 : *(unsigned *)&bot_vec = bot;
241 : vector unsigned lshift = vec_splat(bot_vec,0);
242 : vector unsigned rshift = vec_sub(k32,lshift);
243 : hi = vec_sl(hi,lshift);
244 : lo = vec_sr(lo,rshift);
245 : return vec_xor(hi,lo);
246 : }
247 : #endif
248 : static inline block double_block(block b) {
249 : const vector unsigned char mask = {135,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
250 : const vector unsigned char perm = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0};
251 : const vector unsigned char shift7 = vec_splat_u8(7);
252 : const vector unsigned char shift1 = vec_splat_u8(1);
253 : vector unsigned char c = (vector unsigned char)b;
254 : vector unsigned char t = vec_sra(c,shift7);
255 : t = vec_and(t,mask);
256 : t = vec_perm(t,t,perm);
257 : c = vec_sl(c,shift1);
258 : return (block)vec_xor(c,t);
259 : }
260 : #elif __ARM_NEON__
261 : #include <arm_neon.h>
262 : typedef int8x16_t block; /* Yay! Endian-neutral reads! */
263 : #define xor_block(x,y) veorq_s8(x,y)
264 : #define zero_block() vdupq_n_s8(0)
265 : static inline int unequal_blocks(block a, block b) {
266 : int64x2_t t=veorq_s64((int64x2_t)a,(int64x2_t)b);
267 : return (vgetq_lane_s64(t,0)|vgetq_lane_s64(t,1))!=0;
268 : }
269 : #define swap_if_le(b) (b) /* Using endian-neutral int8x16_t */
270 : /* KtopStr is reg correct by 64 bits, return mem correct */
271 : static block gen_offset(uint64_t KtopStr[3], unsigned bot) {
272 : const union { unsigned x; unsigned char endian; } little = { 1 };
273 : const int64x2_t k64 = {-64,-64};
274 : uint64x2_t hi, lo;
275 : memcpy(&hi, KtopStr, sizeof(hi));
276 : memcpy(&lo, KtopStr+1, sizeof(lo));
277 : int64x2_t ls = vdupq_n_s64(bot);
278 : int64x2_t rs = vqaddq_s64(k64,ls);
279 : block rval = (block)veorq_u64(vshlq_u64(hi,ls),vshlq_u64(lo,rs));
280 : if (little.endian)
281 : rval = vrev64q_s8(rval);
282 : return rval;
283 : }
284 : static inline block double_block(block b)
285 : {
286 : const block mask = {-121,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
287 : block tmp = vshrq_n_s8(b,7);
288 : tmp = vandq_s8(tmp, mask);
289 : tmp = vextq_s8(tmp, tmp, 1); /* Rotate high byte to end */
290 : b = vshlq_n_s8(b,1);
291 : return veorq_s8(tmp,b);
292 : }
293 : #else
294 : typedef struct { uint64_t l,r; } block;
295 : static inline block xor_block(block x, block y) {
296 : x.l^=y.l; x.r^=y.r; return x;
297 : }
298 : static inline block zero_block(void) { const block t = {0,0}; return t; }
299 : #define unequal_blocks(x, y) ((((x).l^(y).l)|((x).r^(y).r)) != 0)
300 : static inline block swap_if_le(block b) {
301 : const union { unsigned x; unsigned char endian; } little = { 1 };
302 : if (little.endian) {
303 : block r;
304 : r.l = bswap64(b.l);
305 : r.r = bswap64(b.r);
306 : return r;
307 : } else
308 : return b;
309 : }
310 :
311 : /* KtopStr is reg correct by 64 bits, return mem correct */
312 : static block gen_offset(uint64_t KtopStr[3], unsigned bot) {
313 : block rval;
314 : if (bot != 0) {
315 : rval.l = (KtopStr[0] << bot) | (KtopStr[1] >> (64-bot));
316 : rval.r = (KtopStr[1] << bot) | (KtopStr[2] >> (64-bot));
317 : } else {
318 : rval.l = KtopStr[0];
319 : rval.r = KtopStr[1];
320 : }
321 : return swap_if_le(rval);
322 : }
323 :
324 : #if __GNUC__ && !__clang__ && __arm__
325 : static inline block double_block(block b) {
326 : __asm__ ("adds %1,%1,%1\n\t"
327 : "adcs %H1,%H1,%H1\n\t"
328 : "adcs %0,%0,%0\n\t"
329 : "adcs %H0,%H0,%H0\n\t"
330 : "it cs\n\t"
331 : "eorcs %1,%1,#135"
332 : : "+r"(b.l), "+r"(b.r) : : "cc");
333 : return b;
334 : }
335 : #else
336 : static inline block double_block(block b) {
337 : uint64_t t = (uint64_t)((int64_t)b.l >> 63);
338 : b.l = (b.l + b.l) ^ (b.r >> 63);
339 : b.r = (b.r + b.r) ^ (t & 135);
340 : return b;
341 : }
342 : #endif
343 :
344 : #endif
345 :
346 : /* ----------------------------------------------------------------------- */
347 : /* AES - Code uses OpenSSL API. Other implementations get mapped to it. */
348 : /* ----------------------------------------------------------------------- */
349 :
350 : /*---------------*/
351 : #if USE_OPENSSL_AES
352 : /*---------------*/
353 :
354 : #include <openssl/aes.h> /* http://openssl.org/ */
355 :
356 : /* How to ECB encrypt an array of blocks, in place */
357 550019 : static inline void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
358 2724066 : while (nblks) {
359 2174047 : --nblks;
360 2174047 : AES_encrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key);
361 : }
362 550019 : }
363 :
364 583257 : static inline void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
365 2808257 : while (nblks) {
366 2225000 : --nblks;
367 2225000 : AES_decrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key);
368 : }
369 583257 : }
370 :
371 : #define BPI 4 /* Number of blocks in buffer per ECB call */
372 :
373 : /*-------------------*/
374 : #elif USE_APPLE_COMMON_CRYPTO_AES
375 : /*-------------------*/
376 :
377 : #include <fatal_assert.h>
378 : #include <CommonCrypto/CommonCryptor.h>
379 :
380 : typedef struct {
381 : CCCryptorRef ref;
382 : uint8_t b[4096];
383 : } AES_KEY;
384 : #if (OCB_KEY_LEN == 0)
385 : #define ROUNDS(ctx) ((ctx)->rounds)
386 : #else
387 : #define ROUNDS(ctx) (6+OCB_KEY_LEN/4)
388 : #endif
389 :
390 : static inline void AES_set_encrypt_key(unsigned char *handle, const int bits, AES_KEY *key)
391 : {
392 : CCCryptorStatus rv = CCCryptorCreateFromData(
393 : kCCEncrypt,
394 : kCCAlgorithmAES128,
395 : kCCOptionECBMode,
396 : handle,
397 : bits / 8,
398 : NULL,
399 : &(key->b),
400 : sizeof (key->b),
401 : &(key->ref),
402 : NULL);
403 :
404 : fatal_assert(rv == kCCSuccess);
405 : }
406 : static inline void AES_set_decrypt_key(unsigned char *handle, const int bits, AES_KEY *key)
407 : {
408 : CCCryptorStatus rv = CCCryptorCreateFromData(
409 : kCCDecrypt,
410 : kCCAlgorithmAES128,
411 : kCCOptionECBMode,
412 : handle,
413 : bits / 8,
414 : NULL,
415 : &(key->b),
416 : sizeof (key->b),
417 : &(key->ref),
418 : NULL);
419 :
420 : fatal_assert(rv == kCCSuccess);
421 : }
422 : static inline void AES_encrypt(unsigned char *src, unsigned char *dst, AES_KEY *key) {
423 : size_t dataOutMoved;
424 : CCCryptorStatus rv = CCCryptorUpdate(
425 : key->ref,
426 : (const void *)src,
427 : kCCBlockSizeAES128,
428 : (void *)dst,
429 : kCCBlockSizeAES128,
430 : &dataOutMoved);
431 : fatal_assert(rv == kCCSuccess);
432 : fatal_assert(dataOutMoved == kCCBlockSizeAES128);
433 : }
434 : #if 0
435 : /* unused */
436 : static inline void AES_decrypt(unsigned char *src, unsigned char *dst, AES_KEY *key) {
437 : AES_encrypt(src, dst, key);
438 : }
439 : #endif
440 : static inline void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
441 : const size_t dataSize = kCCBlockSizeAES128 * nblks;
442 : size_t dataOutMoved;
443 : CCCryptorStatus rv = CCCryptorUpdate(
444 : key->ref,
445 : (const void *)blks,
446 : dataSize,
447 : (void *)blks,
448 : dataSize,
449 : &dataOutMoved);
450 : fatal_assert(rv == kCCSuccess);
451 : fatal_assert(dataOutMoved == dataSize);
452 : }
453 : static inline void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
454 : AES_ecb_encrypt_blks(blks, nblks, key);
455 : }
456 :
457 : #define BPI 4 /* Number of blocks in buffer per ECB call */
458 :
459 : /*-------------------*/
460 : #elif USE_NETTLE_AES
461 : /*-------------------*/
462 :
463 : #include <nettle/aes.h>
464 :
465 : typedef struct aes_ctx AES_KEY;
466 : #if (OCB_KEY_LEN == 0)
467 : #define ROUNDS(ctx) ((ctx)->rounds)
468 : #else
469 : #define ROUNDS(ctx) (6+OCB_KEY_LEN/4)
470 : #endif
471 :
472 : static inline void AES_set_encrypt_key(unsigned char *handle, const int bits, AES_KEY *key)
473 : {
474 : nettle_aes_set_encrypt_key(key, bits/8, (const uint8_t *)handle);
475 : }
476 : static inline void AES_set_decrypt_key(unsigned char *handle, const int bits, AES_KEY *key)
477 : {
478 : nettle_aes_set_decrypt_key(key, bits/8, (const uint8_t *)handle);
479 : }
480 : static inline void AES_encrypt(unsigned char *src, unsigned char *dst, AES_KEY *key) {
481 : nettle_aes_encrypt(key, AES_BLOCK_SIZE, dst, src);
482 : }
483 : #if 0
484 : /* unused */
485 : static inline void AES_decrypt(unsigned char *src, unsigned char *dst, AES_KEY *key) {
486 : nettle_aes_decrypt(key, AES_BLOCK_SIZE, dst, src);
487 : }
488 : #endif
489 : static inline void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
490 : nettle_aes_encrypt(key, nblks * AES_BLOCK_SIZE, (unsigned char*)blks, (unsigned char*)blks);
491 : }
492 : static inline void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
493 : nettle_aes_decrypt(key, nblks * AES_BLOCK_SIZE, (unsigned char*)blks, (unsigned char*)blks);
494 : }
495 :
496 : #define BPI 4 /* Number of blocks in buffer per ECB call */
497 :
498 : /*-------------------*/
499 : #elif USE_REFERENCE_AES
500 : /*-------------------*/
501 :
502 : #include "rijndael-alg-fst.h" /* Barreto's Public-Domain Code */
503 : #if (OCB_KEY_LEN == 0)
504 : typedef struct { uint32_t rd_key[60]; int rounds; } AES_KEY;
505 : #define ROUNDS(ctx) ((ctx)->rounds)
506 : #define AES_set_encrypt_key(x, y, z) \
507 : do {rijndaelKeySetupEnc((z)->rd_key, x, y); (z)->rounds = y/32+6;} while (0)
508 : #define AES_set_decrypt_key(x, y, z) \
509 : do {rijndaelKeySetupDec((z)->rd_key, x, y); (z)->rounds = y/32+6;} while (0)
510 : #else
511 : typedef struct { uint32_t rd_key[OCB_KEY_LEN+28]; } AES_KEY;
512 : #define ROUNDS(ctx) (6+OCB_KEY_LEN/4)
513 : #define AES_set_encrypt_key(x, y, z) rijndaelKeySetupEnc((z)->rd_key, x, y)
514 : #define AES_set_decrypt_key(x, y, z) rijndaelKeySetupDec((z)->rd_key, x, y)
515 : #endif
516 : #define AES_encrypt(x,y,z) rijndaelEncrypt((z)->rd_key, ROUNDS(z), x, y)
517 : #define AES_decrypt(x,y,z) rijndaelDecrypt((z)->rd_key, ROUNDS(z), x, y)
518 :
519 : static void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
520 : while (nblks) {
521 : --nblks;
522 : AES_encrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key);
523 : }
524 : }
525 :
526 : void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
527 : while (nblks) {
528 : --nblks;
529 : AES_decrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key);
530 : }
531 : }
532 :
533 : #define BPI 4 /* Number of blocks in buffer per ECB call */
534 :
535 : /*----------*/
536 : #elif USE_AES_NI
537 : /*----------*/
538 :
539 : #include <wmmintrin.h>
540 :
541 : #if (OCB_KEY_LEN == 0)
542 : typedef struct { __m128i rd_key[15]; int rounds; } AES_KEY;
543 : #define ROUNDS(ctx) ((ctx)->rounds)
544 : #else
545 : typedef struct { __m128i rd_key[7+OCB_KEY_LEN/4]; } AES_KEY;
546 : #define ROUNDS(ctx) (6+OCB_KEY_LEN/4)
547 : #endif
548 :
549 : #define EXPAND_ASSIST(v1,v2,v3,v4,shuff_const,aes_const) \
550 : v2 = _mm_aeskeygenassist_si128(v4,aes_const); \
551 : v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), \
552 : _mm_castsi128_ps(v1), 16)); \
553 : v1 = _mm_xor_si128(v1,v3); \
554 : v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), \
555 : _mm_castsi128_ps(v1), 140)); \
556 : v1 = _mm_xor_si128(v1,v3); \
557 : v2 = _mm_shuffle_epi32(v2,shuff_const); \
558 : v1 = _mm_xor_si128(v1,v2)
559 :
560 : #define EXPAND192_STEP(idx,aes_const) \
561 : EXPAND_ASSIST(x0,x1,x2,x3,85,aes_const); \
562 : x3 = _mm_xor_si128(x3,_mm_slli_si128 (x3, 4)); \
563 : x3 = _mm_xor_si128(x3,_mm_shuffle_epi32(x0, 255)); \
564 : kp[idx] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp), \
565 : _mm_castsi128_ps(x0), 68)); \
566 : kp[idx+1] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(x0), \
567 : _mm_castsi128_ps(x3), 78)); \
568 : EXPAND_ASSIST(x0,x1,x2,x3,85,(aes_const*2)); \
569 : x3 = _mm_xor_si128(x3,_mm_slli_si128 (x3, 4)); \
570 : x3 = _mm_xor_si128(x3,_mm_shuffle_epi32(x0, 255)); \
571 : kp[idx+2] = x0; tmp = x3
572 :
573 : static void AES_128_Key_Expansion(const unsigned char *userkey, void *key)
574 : {
575 : __m128i x0,x1,x2;
576 : __m128i *kp = (__m128i *)key;
577 : kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
578 : x2 = _mm_setzero_si128();
579 : EXPAND_ASSIST(x0,x1,x2,x0,255,1); kp[1] = x0;
580 : EXPAND_ASSIST(x0,x1,x2,x0,255,2); kp[2] = x0;
581 : EXPAND_ASSIST(x0,x1,x2,x0,255,4); kp[3] = x0;
582 : EXPAND_ASSIST(x0,x1,x2,x0,255,8); kp[4] = x0;
583 : EXPAND_ASSIST(x0,x1,x2,x0,255,16); kp[5] = x0;
584 : EXPAND_ASSIST(x0,x1,x2,x0,255,32); kp[6] = x0;
585 : EXPAND_ASSIST(x0,x1,x2,x0,255,64); kp[7] = x0;
586 : EXPAND_ASSIST(x0,x1,x2,x0,255,128); kp[8] = x0;
587 : EXPAND_ASSIST(x0,x1,x2,x0,255,27); kp[9] = x0;
588 : EXPAND_ASSIST(x0,x1,x2,x0,255,54); kp[10] = x0;
589 : }
590 :
591 : static void AES_192_Key_Expansion(const unsigned char *userkey, void *key)
592 : {
593 : __m128i x0,x1,x2,x3,tmp,*kp = (__m128i *)key;
594 : kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
595 : tmp = x3 = _mm_loadu_si128((__m128i*)(userkey+16));
596 : x2 = _mm_setzero_si128();
597 : EXPAND192_STEP(1,1);
598 : EXPAND192_STEP(4,4);
599 : EXPAND192_STEP(7,16);
600 : EXPAND192_STEP(10,64);
601 : }
602 :
603 : static void AES_256_Key_Expansion(const unsigned char *userkey, void *key)
604 : {
605 : __m128i x0,x1,x2,x3,*kp = (__m128i *)key;
606 : kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey );
607 : kp[1] = x3 = _mm_loadu_si128((__m128i*)(userkey+16));
608 : x2 = _mm_setzero_si128();
609 : EXPAND_ASSIST(x0,x1,x2,x3,255,1); kp[2] = x0;
610 : EXPAND_ASSIST(x3,x1,x2,x0,170,1); kp[3] = x3;
611 : EXPAND_ASSIST(x0,x1,x2,x3,255,2); kp[4] = x0;
612 : EXPAND_ASSIST(x3,x1,x2,x0,170,2); kp[5] = x3;
613 : EXPAND_ASSIST(x0,x1,x2,x3,255,4); kp[6] = x0;
614 : EXPAND_ASSIST(x3,x1,x2,x0,170,4); kp[7] = x3;
615 : EXPAND_ASSIST(x0,x1,x2,x3,255,8); kp[8] = x0;
616 : EXPAND_ASSIST(x3,x1,x2,x0,170,8); kp[9] = x3;
617 : EXPAND_ASSIST(x0,x1,x2,x3,255,16); kp[10] = x0;
618 : EXPAND_ASSIST(x3,x1,x2,x0,170,16); kp[11] = x3;
619 : EXPAND_ASSIST(x0,x1,x2,x3,255,32); kp[12] = x0;
620 : EXPAND_ASSIST(x3,x1,x2,x0,170,32); kp[13] = x3;
621 : EXPAND_ASSIST(x0,x1,x2,x3,255,64); kp[14] = x0;
622 : }
623 :
624 : static int AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
625 : {
626 : if (bits == 128) {
627 : AES_128_Key_Expansion (userKey,key);
628 : } else if (bits == 192) {
629 : AES_192_Key_Expansion (userKey,key);
630 : } else if (bits == 256) {
631 : AES_256_Key_Expansion (userKey,key);
632 : }
633 : #if (OCB_KEY_LEN == 0)
634 : key->rounds = 6+bits/32;
635 : #endif
636 : return 0;
637 : }
638 :
639 : static void AES_set_decrypt_key_fast(AES_KEY *dkey, const AES_KEY *ekey)
640 : {
641 : int j = 0;
642 : int i = ROUNDS(ekey);
643 : #if (OCB_KEY_LEN == 0)
644 : dkey->rounds = i;
645 : #endif
646 : dkey->rd_key[i--] = ekey->rd_key[j++];
647 : while (i)
648 : dkey->rd_key[i--] = _mm_aesimc_si128(ekey->rd_key[j++]);
649 : dkey->rd_key[i] = ekey->rd_key[j];
650 : }
651 :
652 : static int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
653 : {
654 : AES_KEY temp_key;
655 : AES_set_encrypt_key(userKey,bits,&temp_key);
656 : AES_set_decrypt_key_fast(key, &temp_key);
657 : return 0;
658 : }
659 :
660 : static inline void AES_encrypt(const unsigned char *in,
661 : unsigned char *out, const AES_KEY *key)
662 : {
663 : int j,rnds=ROUNDS(key);
664 : const __m128i *sched = ((__m128i *)(key->rd_key));
665 : __m128i tmp = _mm_load_si128 ((__m128i*)in);
666 : tmp = _mm_xor_si128 (tmp,sched[0]);
667 : for (j=1; j<rnds; j++) tmp = _mm_aesenc_si128 (tmp,sched[j]);
668 : tmp = _mm_aesenclast_si128 (tmp,sched[j]);
669 : _mm_store_si128 ((__m128i*)out,tmp);
670 : }
671 :
672 : static inline void AES_decrypt(const unsigned char *in,
673 : unsigned char *out, const AES_KEY *key)
674 : {
675 : int j,rnds=ROUNDS(key);
676 : const __m128i *sched = ((__m128i *)(key->rd_key));
677 : __m128i tmp = _mm_load_si128 ((__m128i*)in);
678 : tmp = _mm_xor_si128 (tmp,sched[0]);
679 : for (j=1; j<rnds; j++) tmp = _mm_aesdec_si128 (tmp,sched[j]);
680 : tmp = _mm_aesdeclast_si128 (tmp,sched[j]);
681 : _mm_store_si128 ((__m128i*)out,tmp);
682 : }
683 :
684 : static inline void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
685 : unsigned i,j,rnds=ROUNDS(key);
686 : const __m128i *sched = ((__m128i *)(key->rd_key));
687 : for (i=0; i<nblks; ++i)
688 : blks[i] =_mm_xor_si128(blks[i], sched[0]);
689 : for(j=1; j<rnds; ++j)
690 : for (i=0; i<nblks; ++i)
691 : blks[i] = _mm_aesenc_si128(blks[i], sched[j]);
692 : for (i=0; i<nblks; ++i)
693 : blks[i] =_mm_aesenclast_si128(blks[i], sched[j]);
694 : }
695 :
696 : static inline void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
697 : unsigned i,j,rnds=ROUNDS(key);
698 : const __m128i *sched = ((__m128i *)(key->rd_key));
699 : for (i=0; i<nblks; ++i)
700 : blks[i] =_mm_xor_si128(blks[i], sched[0]);
701 : for(j=1; j<rnds; ++j)
702 : for (i=0; i<nblks; ++i)
703 : blks[i] = _mm_aesdec_si128(blks[i], sched[j]);
704 : for (i=0; i<nblks; ++i)
705 : blks[i] =_mm_aesdeclast_si128(blks[i], sched[j]);
706 : }
707 :
708 : #define BPI 8 /* Number of blocks in buffer per ECB call */
709 : /* Set to 4 for Westmere, 8 for Sandy Bridge */
710 :
711 : #else
712 : #error "No AES implementation selected."
713 : #endif
714 :
715 : /* ----------------------------------------------------------------------- */
716 : /* Define OCB context structure. */
717 : /* ----------------------------------------------------------------------- */
718 :
719 : /*------------------------------------------------------------------------
720 : / Each item in the OCB context is stored either "memory correct" or
721 : / "register correct". On big-endian machines, this is identical. On
722 : / little-endian machines, one must choose whether the byte-string
723 : / is in the correct order when it resides in memory or in registers.
724 : / It must be register correct whenever it is to be manipulated
725 : / arithmetically, but must be memory correct whenever it interacts
726 : / with the plaintext or ciphertext.
727 : /------------------------------------------------------------------------- */
728 :
729 : struct _ae_ctx {
730 : block offset; /* Memory correct */
731 : block checksum; /* Memory correct */
732 : block Lstar; /* Memory correct */
733 : block Ldollar; /* Memory correct */
734 : block L[L_TABLE_SZ]; /* Memory correct */
735 : block ad_checksum; /* Memory correct */
736 : block ad_offset; /* Memory correct */
737 : block cached_Top; /* Memory correct */
738 : uint64_t KtopStr[3]; /* Register correct, each item */
739 : uint32_t ad_blocks_processed;
740 : uint32_t blocks_processed;
741 : AES_KEY decrypt_key;
742 : AES_KEY encrypt_key;
743 : #if (OCB_TAG_LEN == 0)
744 : unsigned tag_len;
745 : #endif
746 : };
747 :
748 : /* ----------------------------------------------------------------------- */
749 : /* L table lookup (or on-the-fly generation) */
750 : /* ----------------------------------------------------------------------- */
751 :
752 : #if L_TABLE_SZ_IS_ENOUGH
753 : #define getL(_ctx, _tz) ((_ctx)->L[_tz])
754 : #else
755 : static block getL(const ae_ctx *ctx, unsigned tz)
756 : {
757 : if (tz < L_TABLE_SZ)
758 : return ctx->L[tz];
759 : else {
760 : unsigned i;
761 : /* Bring L[MAX] into registers, make it register correct */
762 : block rval = swap_if_le(ctx->L[L_TABLE_SZ-1]);
763 : rval = double_block(rval);
764 : for (i=L_TABLE_SZ; i < tz; i++)
765 : rval = double_block(rval);
766 : return swap_if_le(rval); /* To memory correct */
767 : }
768 : }
769 : #endif
770 :
771 : /* ----------------------------------------------------------------------- */
772 : /* Public functions */
773 : /* ----------------------------------------------------------------------- */
774 :
775 : /* 32-bit SSE2 and Altivec systems need to be forced to allocate memory
776 : on 16-byte alignments. (I believe all major 64-bit systems do already.) */
777 :
778 : /* Mosh uses its own AlignedBuffer class, not ae_allocate() or ae_free(). */
779 :
780 : /* ----------------------------------------------------------------------- */
781 :
782 4380 : int ae_clear (ae_ctx *ctx) /* Zero ae_ctx and undo initialization */
783 : {
784 4380 : memset(ctx, 0, sizeof(ae_ctx));
785 4380 : return AE_SUCCESS;
786 : }
787 :
788 3930 : int ae_ctx_sizeof(void) { return (int) sizeof(ae_ctx); }
789 :
790 : /* ----------------------------------------------------------------------- */
791 :
792 3930 : int ae_init(ae_ctx *ctx, const void *key, int key_len, int nonce_len, int tag_len)
793 : {
794 3930 : unsigned i;
795 3930 : block tmp_blk;
796 :
797 3930 : if (nonce_len != 12)
798 : return AE_NOT_SUPPORTED;
799 :
800 : /* Initialize encryption & decryption keys */
801 : #if (OCB_KEY_LEN > 0)
802 3930 : key_len = OCB_KEY_LEN;
803 : #endif
804 3930 : AES_set_encrypt_key((unsigned char *)key, key_len*8, &ctx->encrypt_key);
805 : #if USE_AES_NI
806 : AES_set_decrypt_key_fast(&ctx->decrypt_key,&ctx->encrypt_key);
807 : #else
808 3930 : AES_set_decrypt_key((unsigned char *)key, (int)(key_len*8), &ctx->decrypt_key);
809 : #endif
810 :
811 : /* Zero things that need zeroing */
812 3930 : ctx->cached_Top = ctx->ad_checksum = zero_block();
813 3930 : ctx->ad_blocks_processed = 0;
814 :
815 : /* Compute key-dependent values */
816 3930 : AES_encrypt((unsigned char *)&ctx->cached_Top,
817 3930 : (unsigned char *)&ctx->Lstar, &ctx->encrypt_key);
818 3930 : tmp_blk = swap_if_le(ctx->Lstar);
819 3930 : tmp_blk = double_block(tmp_blk);
820 3930 : ctx->Ldollar = swap_if_le(tmp_blk);
821 3930 : tmp_blk = double_block(tmp_blk);
822 3930 : ctx->L[0] = swap_if_le(tmp_blk);
823 62880 : for (i = 1; i < L_TABLE_SZ; i++) {
824 58950 : tmp_blk = double_block(tmp_blk);
825 117900 : ctx->L[i] = swap_if_le(tmp_blk);
826 : }
827 :
828 : #if (OCB_TAG_LEN == 0)
829 : ctx->tag_len = tag_len;
830 : #else
831 : (void) tag_len; /* Suppress var not used error */
832 : #endif
833 :
834 : return AE_SUCCESS;
835 : }
836 :
837 : /* ----------------------------------------------------------------------- */
838 :
839 81709 : static block gen_offset_from_nonce(ae_ctx *ctx, const void *nonce)
840 : {
841 81709 : const union { unsigned x; unsigned char endian; } little = { 1 };
842 81709 : union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp;
843 81709 : unsigned idx;
844 :
845 : /* Replace cached nonce Top if needed */
846 81709 : tmp.u32[0] = (little.endian?0x01000000:0x00000001);
847 81709 : tmp.u32[1] = ((uint32_t *)nonce)[0];
848 81709 : tmp.u32[2] = ((uint32_t *)nonce)[1];
849 81709 : tmp.u32[3] = ((uint32_t *)nonce)[2];
850 81709 : idx = (unsigned)(tmp.u8[15] & 0x3f); /* Get low 6 bits of nonce */
851 81709 : tmp.u8[15] = tmp.u8[15] & 0xc0; /* Zero low 6 bits of nonce */
852 81709 : if ( unequal_blocks(tmp.bl,ctx->cached_Top) ) { /* Cached? */
853 14755 : ctx->cached_Top = tmp.bl; /* Update cache, KtopStr */
854 14755 : AES_encrypt(tmp.u8, (unsigned char *)&ctx->KtopStr, &ctx->encrypt_key);
855 14755 : if (little.endian) { /* Make Register Correct */
856 14755 : ctx->KtopStr[0] = bswap64(ctx->KtopStr[0]);
857 14755 : ctx->KtopStr[1] = bswap64(ctx->KtopStr[1]);
858 : }
859 14755 : ctx->KtopStr[2] = ctx->KtopStr[0] ^
860 14755 : (ctx->KtopStr[0] << 8) ^ (ctx->KtopStr[1] >> 56);
861 : }
862 81709 : return gen_offset(ctx->KtopStr, idx);
863 : }
864 :
865 2094 : static void process_ad(ae_ctx *ctx, const void *ad, int ad_len, int final)
866 : {
867 2094 : union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp;
868 2094 : block ad_offset, ad_checksum;
869 2094 : const block * adp = (block *)ad;
870 2094 : unsigned i,k,tz,remaining;
871 :
872 2094 : ad_offset = ctx->ad_offset;
873 2094 : ad_checksum = ctx->ad_checksum;
874 2094 : i = ad_len/(BPI*16);
875 2094 : if (i) {
876 258 : unsigned ad_block_num = ctx->ad_blocks_processed;
877 956 : do {
878 956 : block ta[BPI], oa[BPI];
879 956 : ad_block_num += BPI;
880 956 : tz = ntz(ad_block_num);
881 956 : oa[0] = xor_block(ad_offset, ctx->L[0]);
882 956 : ta[0] = xor_block(oa[0], adp[0]);
883 956 : oa[1] = xor_block(oa[0], ctx->L[1]);
884 956 : ta[1] = xor_block(oa[1], adp[1]);
885 956 : oa[2] = xor_block(ad_offset, ctx->L[1]);
886 956 : ta[2] = xor_block(oa[2], adp[2]);
887 : #if BPI == 4
888 956 : ad_offset = xor_block(oa[2], getL(ctx, tz));
889 956 : ta[3] = xor_block(ad_offset, adp[3]);
890 : #elif BPI == 8
891 : oa[3] = xor_block(oa[2], ctx->L[2]);
892 : ta[3] = xor_block(oa[3], adp[3]);
893 : oa[4] = xor_block(oa[1], ctx->L[2]);
894 : ta[4] = xor_block(oa[4], adp[4]);
895 : oa[5] = xor_block(oa[0], ctx->L[2]);
896 : ta[5] = xor_block(oa[5], adp[5]);
897 : oa[6] = xor_block(ad_offset, ctx->L[2]);
898 : ta[6] = xor_block(oa[6], adp[6]);
899 : ad_offset = xor_block(oa[6], getL(ctx, tz));
900 : ta[7] = xor_block(ad_offset, adp[7]);
901 : #endif
902 956 : AES_ecb_encrypt_blks(ta,BPI,&ctx->encrypt_key);
903 956 : ad_checksum = xor_block(ad_checksum, ta[0]);
904 956 : ad_checksum = xor_block(ad_checksum, ta[1]);
905 956 : ad_checksum = xor_block(ad_checksum, ta[2]);
906 956 : ad_checksum = xor_block(ad_checksum, ta[3]);
907 : #if (BPI == 8)
908 : ad_checksum = xor_block(ad_checksum, ta[4]);
909 : ad_checksum = xor_block(ad_checksum, ta[5]);
910 : ad_checksum = xor_block(ad_checksum, ta[6]);
911 : ad_checksum = xor_block(ad_checksum, ta[7]);
912 : #endif
913 956 : adp += BPI;
914 956 : } while (--i);
915 258 : ctx->ad_blocks_processed = ad_block_num;
916 258 : ctx->ad_offset = ad_offset;
917 258 : ctx->ad_checksum = ad_checksum;
918 : }
919 :
920 2094 : if (final) {
921 2094 : block ta[BPI];
922 :
923 : /* Process remaining associated data, compute its tag contribution */
924 2094 : remaining = ((unsigned)ad_len) % (BPI*16);
925 2094 : if (remaining) {
926 2088 : k=0;
927 : #if (BPI == 8)
928 : if (remaining >= 64) {
929 : tmp.bl = xor_block(ad_offset, ctx->L[0]);
930 : ta[0] = xor_block(tmp.bl, adp[0]);
931 : tmp.bl = xor_block(tmp.bl, ctx->L[1]);
932 : ta[1] = xor_block(tmp.bl, adp[1]);
933 : ad_offset = xor_block(ad_offset, ctx->L[1]);
934 : ta[2] = xor_block(ad_offset, adp[2]);
935 : ad_offset = xor_block(ad_offset, ctx->L[2]);
936 : ta[3] = xor_block(ad_offset, adp[3]);
937 : remaining -= 64;
938 : k=4;
939 : }
940 : #endif
941 2088 : if (remaining >= 32) {
942 784 : ad_offset = xor_block(ad_offset, ctx->L[0]);
943 784 : ta[k] = xor_block(ad_offset, adp[k]);
944 784 : ad_offset = xor_block(ad_offset, getL(ctx, ntz(k+2)));
945 784 : ta[k+1] = xor_block(ad_offset, adp[k+1]);
946 784 : remaining -= 32;
947 784 : k+=2;
948 : }
949 2088 : if (remaining >= 16) {
950 1048 : ad_offset = xor_block(ad_offset, ctx->L[0]);
951 1048 : ta[k] = xor_block(ad_offset, adp[k]);
952 1048 : remaining = remaining - 16;
953 1048 : ++k;
954 : }
955 2088 : if (remaining) {
956 1536 : ad_offset = xor_block(ad_offset,ctx->Lstar);
957 1536 : tmp.bl = zero_block();
958 1536 : memcpy(tmp.u8, adp+k, remaining);
959 1536 : tmp.u8[remaining] = (unsigned char)0x80u;
960 1536 : ta[k] = xor_block(ad_offset, tmp.bl);
961 1536 : ++k;
962 : }
963 2088 : AES_ecb_encrypt_blks(ta,k,&ctx->encrypt_key);
964 2088 : switch (k) {
965 : #if (BPI == 8)
966 : case 8: ad_checksum = xor_block(ad_checksum, ta[7]);
967 : /* fallthrough */
968 : case 7: ad_checksum = xor_block(ad_checksum, ta[6]);
969 : /* fallthrough */
970 : case 6: ad_checksum = xor_block(ad_checksum, ta[5]);
971 : /* fallthrough */
972 : case 5: ad_checksum = xor_block(ad_checksum, ta[4]);
973 : /* fallthrough */
974 : #endif
975 120 : case 4: ad_checksum = xor_block(ad_checksum, ta[3]);
976 : /* fallthrough */
977 512 : case 3: ad_checksum = xor_block(ad_checksum, ta[2]);
978 : /* fallthrough */
979 1432 : case 2: ad_checksum = xor_block(ad_checksum, ta[1]);
980 : /* fallthrough */
981 2088 : case 1: ad_checksum = xor_block(ad_checksum, ta[0]);
982 : }
983 2088 : ctx->ad_checksum = ad_checksum;
984 : }
985 : }
986 2094 : }
987 :
988 : /* ----------------------------------------------------------------------- */
989 :
990 38913 : int ae_encrypt(ae_ctx * ctx,
991 : const void * nonce,
992 : const void *pt,
993 : int pt_len,
994 : const void *ad,
995 : int ad_len,
996 : void *ct,
997 : void *tag,
998 : int final)
999 : {
1000 38913 : union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp;
1001 38913 : block offset, checksum;
1002 38913 : unsigned i, k;
1003 38913 : block * ctp = (block *)ct;
1004 38913 : const block * ptp = (block *)pt;
1005 :
1006 : /* Non-null nonce means start of new message, init per-message values */
1007 38913 : if (nonce) {
1008 38913 : ctx->offset = gen_offset_from_nonce(ctx, nonce);
1009 38913 : ctx->ad_offset = ctx->checksum = zero_block();
1010 38913 : ctx->ad_blocks_processed = ctx->blocks_processed = 0;
1011 38913 : if (ad_len >= 0)
1012 38913 : ctx->ad_checksum = zero_block();
1013 : }
1014 :
1015 : /* Process associated data */
1016 38913 : if (ad_len > 0)
1017 534 : process_ad(ctx, ad, ad_len, final);
1018 :
1019 : /* Encrypt plaintext data BPI blocks at a time */
1020 38913 : offset = ctx->offset;
1021 38913 : checksum = ctx->checksum;
1022 38913 : i = pt_len/(BPI*16);
1023 38913 : if (i) {
1024 34378 : block oa[BPI];
1025 34378 : unsigned block_num = ctx->blocks_processed;
1026 34378 : oa[BPI-1] = offset;
1027 508062 : do {
1028 508062 : block ta[BPI];
1029 508062 : block_num += BPI;
1030 508062 : oa[0] = xor_block(oa[BPI-1], ctx->L[0]);
1031 508062 : ta[0] = xor_block(oa[0], ptp[0]);
1032 508062 : checksum = xor_block(checksum, ptp[0]);
1033 508062 : oa[1] = xor_block(oa[0], ctx->L[1]);
1034 508062 : ta[1] = xor_block(oa[1], ptp[1]);
1035 508062 : checksum = xor_block(checksum, ptp[1]);
1036 508062 : oa[2] = xor_block(oa[1], ctx->L[0]);
1037 508062 : ta[2] = xor_block(oa[2], ptp[2]);
1038 508062 : checksum = xor_block(checksum, ptp[2]);
1039 : #if BPI == 4
1040 508062 : oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
1041 508062 : ta[3] = xor_block(oa[3], ptp[3]);
1042 508062 : checksum = xor_block(checksum, ptp[3]);
1043 : #elif BPI == 8
1044 : oa[3] = xor_block(oa[2], ctx->L[2]);
1045 : ta[3] = xor_block(oa[3], ptp[3]);
1046 : checksum = xor_block(checksum, ptp[3]);
1047 : oa[4] = xor_block(oa[1], ctx->L[2]);
1048 : ta[4] = xor_block(oa[4], ptp[4]);
1049 : checksum = xor_block(checksum, ptp[4]);
1050 : oa[5] = xor_block(oa[0], ctx->L[2]);
1051 : ta[5] = xor_block(oa[5], ptp[5]);
1052 : checksum = xor_block(checksum, ptp[5]);
1053 : oa[6] = xor_block(oa[7], ctx->L[2]);
1054 : ta[6] = xor_block(oa[6], ptp[6]);
1055 : checksum = xor_block(checksum, ptp[6]);
1056 : oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
1057 : ta[7] = xor_block(oa[7], ptp[7]);
1058 : checksum = xor_block(checksum, ptp[7]);
1059 : #endif
1060 508062 : AES_ecb_encrypt_blks(ta,BPI,&ctx->encrypt_key);
1061 508062 : ctp[0] = xor_block(ta[0], oa[0]);
1062 508062 : ctp[1] = xor_block(ta[1], oa[1]);
1063 508062 : ctp[2] = xor_block(ta[2], oa[2]);
1064 508062 : ctp[3] = xor_block(ta[3], oa[3]);
1065 : #if (BPI == 8)
1066 : ctp[4] = xor_block(ta[4], oa[4]);
1067 : ctp[5] = xor_block(ta[5], oa[5]);
1068 : ctp[6] = xor_block(ta[6], oa[6]);
1069 : ctp[7] = xor_block(ta[7], oa[7]);
1070 : #endif
1071 508062 : ptp += BPI;
1072 508062 : ctp += BPI;
1073 508062 : } while (--i);
1074 34378 : ctx->offset = offset = oa[BPI-1];
1075 34378 : ctx->blocks_processed = block_num;
1076 34378 : ctx->checksum = checksum;
1077 : }
1078 :
1079 38913 : if (final) {
1080 38913 : block ta[BPI+1], oa[BPI];
1081 :
1082 : /* Process remaining plaintext and compute its tag contribution */
1083 38913 : unsigned remaining = ((unsigned)pt_len) % (BPI*16);
1084 38913 : k = 0; /* How many blocks in ta[] need ECBing */
1085 38913 : if (remaining) {
1086 : #if (BPI == 8)
1087 : if (remaining >= 64) {
1088 : oa[0] = xor_block(offset, ctx->L[0]);
1089 : ta[0] = xor_block(oa[0], ptp[0]);
1090 : checksum = xor_block(checksum, ptp[0]);
1091 : oa[1] = xor_block(oa[0], ctx->L[1]);
1092 : ta[1] = xor_block(oa[1], ptp[1]);
1093 : checksum = xor_block(checksum, ptp[1]);
1094 : oa[2] = xor_block(oa[1], ctx->L[0]);
1095 : ta[2] = xor_block(oa[2], ptp[2]);
1096 : checksum = xor_block(checksum, ptp[2]);
1097 : offset = oa[3] = xor_block(oa[2], ctx->L[2]);
1098 : ta[3] = xor_block(offset, ptp[3]);
1099 : checksum = xor_block(checksum, ptp[3]);
1100 : remaining -= 64;
1101 : k = 4;
1102 : }
1103 : #endif
1104 38008 : if (remaining >= 32) {
1105 19783 : oa[k] = xor_block(offset, ctx->L[0]);
1106 19783 : ta[k] = xor_block(oa[k], ptp[k]);
1107 19783 : checksum = xor_block(checksum, ptp[k]);
1108 19783 : offset = oa[k+1] = xor_block(oa[k], ctx->L[1]);
1109 19783 : ta[k+1] = xor_block(offset, ptp[k+1]);
1110 19783 : checksum = xor_block(checksum, ptp[k+1]);
1111 19783 : remaining -= 32;
1112 19783 : k+=2;
1113 : }
1114 38008 : if (remaining >= 16) {
1115 19231 : offset = oa[k] = xor_block(offset, ctx->L[0]);
1116 19231 : ta[k] = xor_block(offset, ptp[k]);
1117 19231 : checksum = xor_block(checksum, ptp[k]);
1118 19231 : remaining -= 16;
1119 19231 : ++k;
1120 : }
1121 38008 : if (remaining) {
1122 36113 : tmp.bl = zero_block();
1123 36113 : memcpy(tmp.u8, ptp+k, remaining);
1124 36113 : tmp.u8[remaining] = (unsigned char)0x80u;
1125 36113 : checksum = xor_block(checksum, tmp.bl);
1126 36113 : ta[k] = offset = xor_block(offset,ctx->Lstar);
1127 36113 : ++k;
1128 : }
1129 : }
1130 38913 : offset = xor_block(offset, ctx->Ldollar); /* Part of tag gen */
1131 38913 : ta[k] = xor_block(offset, checksum); /* Part of tag gen */
1132 38913 : AES_ecb_encrypt_blks(ta,k+1,&ctx->encrypt_key);
1133 38913 : offset = xor_block(ta[k], ctx->ad_checksum); /* Part of tag gen */
1134 38913 : if (remaining) {
1135 36113 : --k;
1136 36113 : tmp.bl = xor_block(tmp.bl, ta[k]);
1137 36113 : memcpy(ctp+k, tmp.u8, remaining);
1138 : }
1139 38913 : switch (k) {
1140 : #if (BPI == 8)
1141 : case 7: ctp[6] = xor_block(ta[6], oa[6]);
1142 : /* fallthrough */
1143 : case 6: ctp[5] = xor_block(ta[5], oa[5]);
1144 : /* fallthrough */
1145 : case 5: ctp[4] = xor_block(ta[4], oa[4]);
1146 : /* fallthrough */
1147 : case 4: ctp[3] = xor_block(ta[3], oa[3]);
1148 : /* fallthrough */
1149 : #endif
1150 10321 : case 3: ctp[2] = xor_block(ta[2], oa[2]);
1151 : /* fallthrough */
1152 19783 : case 2: ctp[1] = xor_block(ta[1], oa[1]);
1153 : /* fallthrough */
1154 28693 : case 1: ctp[0] = xor_block(ta[0], oa[0]);
1155 : }
1156 :
1157 : /* Tag is placed at the correct location
1158 : */
1159 38913 : if (tag) {
1160 : #if (OCB_TAG_LEN == 16)
1161 0 : *(block *)tag = offset;
1162 : #elif (OCB_TAG_LEN > 0)
1163 : memcpy((char *)tag, &offset, OCB_TAG_LEN);
1164 : #else
1165 : memcpy((char *)tag, &offset, ctx->tag_len);
1166 : #endif
1167 : } else {
1168 : #if (OCB_TAG_LEN > 0)
1169 38913 : memcpy((char *)ct + pt_len, &offset, OCB_TAG_LEN);
1170 38913 : pt_len += OCB_TAG_LEN;
1171 : #else
1172 : memcpy((char *)ct + pt_len, &offset, ctx->tag_len);
1173 : pt_len += ctx->tag_len;
1174 : #endif
1175 : }
1176 : }
1177 38913 : return (int) pt_len;
1178 : }
1179 :
1180 : /* ----------------------------------------------------------------------- */
1181 :
1182 : /* Compare two regions of memory, taking a constant amount of time for a
1183 : given buffer size -- under certain assumptions about the compiler
1184 : and machine, of course.
1185 :
1186 : Use this to avoid timing side-channel attacks.
1187 :
1188 : Returns 0 for memory regions with equal contents; non-zero otherwise. */
1189 42796 : static int constant_time_memcmp(const void *av, const void *bv, size_t n) {
1190 42796 : const uint8_t *a = (const uint8_t *) av;
1191 42796 : const uint8_t *b = (const uint8_t *) bv;
1192 42796 : uint8_t result = 0;
1193 42796 : size_t i;
1194 :
1195 727532 : for (i=0; i<n; i++) {
1196 684736 : result |= *a ^ *b;
1197 684736 : a++;
1198 684736 : b++;
1199 : }
1200 :
1201 42796 : return (int) result;
1202 : }
1203 :
1204 42796 : int ae_decrypt(ae_ctx *ctx,
1205 : const void *nonce,
1206 : const void *ct,
1207 : int ct_len,
1208 : const void *ad,
1209 : int ad_len,
1210 : void *pt,
1211 : const void *tag,
1212 : int final)
1213 : {
1214 42796 : union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp;
1215 42796 : block offset, checksum;
1216 42796 : unsigned i, k;
1217 42796 : block *ctp = (block *)ct;
1218 42796 : block *ptp = (block *)pt;
1219 :
1220 : /* Reduce ct_len tag bundled in ct */
1221 42796 : if ((final) && (!tag))
1222 : #if (OCB_TAG_LEN > 0)
1223 42796 : ct_len -= OCB_TAG_LEN;
1224 : #else
1225 : ct_len -= ctx->tag_len;
1226 : #endif
1227 :
1228 : /* Non-null nonce means start of new message, init per-message values */
1229 42796 : if (nonce) {
1230 42796 : ctx->offset = gen_offset_from_nonce(ctx, nonce);
1231 42796 : ctx->ad_offset = ctx->checksum = zero_block();
1232 42796 : ctx->ad_blocks_processed = ctx->blocks_processed = 0;
1233 42796 : if (ad_len >= 0)
1234 42796 : ctx->ad_checksum = zero_block();
1235 : }
1236 :
1237 : /* Process associated data */
1238 42796 : if (ad_len > 0)
1239 1560 : process_ad(ctx, ad, ad_len, final);
1240 :
1241 : /* Encrypt plaintext data BPI blocks at a time */
1242 42796 : offset = ctx->offset;
1243 42796 : checksum = ctx->checksum;
1244 42796 : i = ct_len/(BPI*16);
1245 42796 : if (i) {
1246 36338 : block oa[BPI];
1247 36338 : unsigned block_num = ctx->blocks_processed;
1248 36338 : oa[BPI-1] = offset;
1249 540461 : do {
1250 540461 : block ta[BPI];
1251 540461 : block_num += BPI;
1252 540461 : oa[0] = xor_block(oa[BPI-1], ctx->L[0]);
1253 540461 : ta[0] = xor_block(oa[0], ctp[0]);
1254 540461 : oa[1] = xor_block(oa[0], ctx->L[1]);
1255 540461 : ta[1] = xor_block(oa[1], ctp[1]);
1256 540461 : oa[2] = xor_block(oa[1], ctx->L[0]);
1257 540461 : ta[2] = xor_block(oa[2], ctp[2]);
1258 : #if BPI == 4
1259 540461 : oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
1260 540461 : ta[3] = xor_block(oa[3], ctp[3]);
1261 : #elif BPI == 8
1262 : oa[3] = xor_block(oa[2], ctx->L[2]);
1263 : ta[3] = xor_block(oa[3], ctp[3]);
1264 : oa[4] = xor_block(oa[1], ctx->L[2]);
1265 : ta[4] = xor_block(oa[4], ctp[4]);
1266 : oa[5] = xor_block(oa[0], ctx->L[2]);
1267 : ta[5] = xor_block(oa[5], ctp[5]);
1268 : oa[6] = xor_block(oa[7], ctx->L[2]);
1269 : ta[6] = xor_block(oa[6], ctp[6]);
1270 : oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
1271 : ta[7] = xor_block(oa[7], ctp[7]);
1272 : #endif
1273 540461 : AES_ecb_decrypt_blks(ta,BPI,&ctx->decrypt_key);
1274 540461 : ptp[0] = xor_block(ta[0], oa[0]);
1275 540461 : checksum = xor_block(checksum, ptp[0]);
1276 540461 : ptp[1] = xor_block(ta[1], oa[1]);
1277 540461 : checksum = xor_block(checksum, ptp[1]);
1278 540461 : ptp[2] = xor_block(ta[2], oa[2]);
1279 540461 : checksum = xor_block(checksum, ptp[2]);
1280 540461 : ptp[3] = xor_block(ta[3], oa[3]);
1281 540461 : checksum = xor_block(checksum, ptp[3]);
1282 : #if (BPI == 8)
1283 : ptp[4] = xor_block(ta[4], oa[4]);
1284 : checksum = xor_block(checksum, ptp[4]);
1285 : ptp[5] = xor_block(ta[5], oa[5]);
1286 : checksum = xor_block(checksum, ptp[5]);
1287 : ptp[6] = xor_block(ta[6], oa[6]);
1288 : checksum = xor_block(checksum, ptp[6]);
1289 : ptp[7] = xor_block(ta[7], oa[7]);
1290 : checksum = xor_block(checksum, ptp[7]);
1291 : #endif
1292 540461 : ptp += BPI;
1293 540461 : ctp += BPI;
1294 540461 : } while (--i);
1295 36338 : ctx->offset = offset = oa[BPI-1];
1296 36338 : ctx->blocks_processed = block_num;
1297 36338 : ctx->checksum = checksum;
1298 : }
1299 :
1300 42796 : if (final) {
1301 42796 : block ta[BPI+1], oa[BPI];
1302 :
1303 : /* Process remaining plaintext and compute its tag contribution */
1304 42796 : unsigned remaining = ((unsigned)ct_len) % (BPI*16);
1305 42796 : k = 0; /* How many blocks in ta[] need ECBing */
1306 42796 : if (remaining) {
1307 : #if (BPI == 8)
1308 : if (remaining >= 64) {
1309 : oa[0] = xor_block(offset, ctx->L[0]);
1310 : ta[0] = xor_block(oa[0], ctp[0]);
1311 : oa[1] = xor_block(oa[0], ctx->L[1]);
1312 : ta[1] = xor_block(oa[1], ctp[1]);
1313 : oa[2] = xor_block(oa[1], ctx->L[0]);
1314 : ta[2] = xor_block(oa[2], ctp[2]);
1315 : offset = oa[3] = xor_block(oa[2], ctx->L[2]);
1316 : ta[3] = xor_block(offset, ctp[3]);
1317 : remaining -= 64;
1318 : k = 4;
1319 : }
1320 : #endif
1321 41105 : if (remaining >= 32) {
1322 21131 : oa[k] = xor_block(offset, ctx->L[0]);
1323 21131 : ta[k] = xor_block(oa[k], ctp[k]);
1324 21131 : offset = oa[k+1] = xor_block(oa[k], ctx->L[1]);
1325 21131 : ta[k+1] = xor_block(offset, ctp[k+1]);
1326 21131 : remaining -= 32;
1327 21131 : k+=2;
1328 : }
1329 41105 : if (remaining >= 16) {
1330 20894 : offset = oa[k] = xor_block(offset, ctx->L[0]);
1331 20894 : ta[k] = xor_block(offset, ctp[k]);
1332 20894 : remaining -= 16;
1333 20894 : ++k;
1334 : }
1335 41105 : if (remaining) {
1336 38636 : block pad;
1337 38636 : offset = xor_block(offset,ctx->Lstar);
1338 38636 : AES_encrypt((unsigned char *)&offset, tmp.u8, &ctx->encrypt_key);
1339 38636 : pad = tmp.bl;
1340 38636 : memcpy(tmp.u8,ctp+k,remaining);
1341 38636 : tmp.bl = xor_block(tmp.bl, pad);
1342 38636 : tmp.u8[remaining] = (unsigned char)0x80u;
1343 38636 : memcpy(ptp+k, tmp.u8, remaining);
1344 38636 : checksum = xor_block(checksum, tmp.bl);
1345 : }
1346 : }
1347 42796 : AES_ecb_decrypt_blks(ta,k,&ctx->decrypt_key);
1348 42796 : switch (k) {
1349 : #if (BPI == 8)
1350 : case 7: ptp[6] = xor_block(ta[6], oa[6]);
1351 : checksum = xor_block(checksum, ptp[6]);
1352 : /* fallthrough */
1353 : case 6: ptp[5] = xor_block(ta[5], oa[5]);
1354 : checksum = xor_block(checksum, ptp[5]);
1355 : /* fallthrough */
1356 : case 5: ptp[4] = xor_block(ta[4], oa[4]);
1357 : checksum = xor_block(checksum, ptp[4]);
1358 : /* fallthrough */
1359 : case 4: ptp[3] = xor_block(ta[3], oa[3]);
1360 : checksum = xor_block(checksum, ptp[3]);
1361 : /* fallthrough */
1362 : #endif
1363 10819 : case 3: ptp[2] = xor_block(ta[2], oa[2]);
1364 10819 : checksum = xor_block(checksum, ptp[2]);
1365 : /* fallthrough */
1366 21131 : case 2: ptp[1] = xor_block(ta[1], oa[1]);
1367 21131 : checksum = xor_block(checksum, ptp[1]);
1368 : /* fallthrough */
1369 31206 : case 1: ptp[0] = xor_block(ta[0], oa[0]);
1370 31206 : checksum = xor_block(checksum, ptp[0]);
1371 : }
1372 :
1373 : /* Calculate expected tag */
1374 42796 : offset = xor_block(offset, ctx->Ldollar);
1375 42796 : tmp.bl = xor_block(offset, checksum);
1376 42796 : AES_encrypt(tmp.u8, tmp.u8, &ctx->encrypt_key);
1377 42796 : tmp.bl = xor_block(tmp.bl, ctx->ad_checksum); /* Full tag */
1378 :
1379 : /* Compare with proposed tag, change ct_len if invalid */
1380 42796 : if ((OCB_TAG_LEN == 16) && tag) {
1381 0 : if (unequal_blocks(tmp.bl, *(block *)tag))
1382 0 : ct_len = AE_INVALID;
1383 : } else {
1384 : #if (OCB_TAG_LEN > 0)
1385 42796 : int len = OCB_TAG_LEN;
1386 : #else
1387 : int len = ctx->tag_len;
1388 : #endif
1389 42796 : if (tag) {
1390 : if (constant_time_memcmp(tag,tmp.u8,len) != 0)
1391 : ct_len = AE_INVALID;
1392 : } else {
1393 85592 : if (constant_time_memcmp((char *)ct + ct_len,tmp.u8,len) != 0)
1394 4701 : ct_len = AE_INVALID;
1395 : }
1396 : }
1397 : }
1398 42796 : return ct_len;
1399 : }
1400 :
1401 : /* ----------------------------------------------------------------------- */
1402 : /* Simple test program */
1403 : /* ----------------------------------------------------------------------- */
1404 :
1405 : #if defined(OCB_TEST_PROGRAM)
1406 :
1407 : #include <stdio.h>
1408 : #include <time.h>
1409 :
1410 : #if __GNUC__
1411 : #define ALIGN(n) __attribute__ ((aligned(n)))
1412 : #elif _MSC_VER
1413 : #define ALIGN(n) __declspec(align(n))
1414 : #else /* Not GNU/Microsoft: delete alignment uses. */
1415 : #define ALIGN(n)
1416 : #endif
1417 :
1418 : static void pbuf(void *p, unsigned len, const void *s)
1419 : {
1420 : unsigned i;
1421 : if (s)
1422 : printf("%s", (char *)s);
1423 : for (i = 0; i < len; i++)
1424 : printf("%02X", (unsigned)(((unsigned char *)p)[i]));
1425 : printf("\n");
1426 : }
1427 :
1428 : static void vectors(ae_ctx *ctx, int len)
1429 : {
1430 : ALIGN(16) uint8_t pt[128];
1431 : ALIGN(16) uint8_t ct[144];
1432 : ALIGN(16) uint8_t nonce[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1433 : int i;
1434 : for (i=0; i < 128; i++) pt[i] = i;
1435 : i = ae_encrypt(ctx,nonce,pt,len,pt,len,ct,NULL,AE_FINALIZE);
1436 : printf("P=%d,A=%d: ",len,len); pbuf(ct, i, NULL);
1437 : i = ae_encrypt(ctx,nonce,pt,0,pt,len,ct,NULL,AE_FINALIZE);
1438 : printf("P=%d,A=%d: ",0,len); pbuf(ct, i, NULL);
1439 : i = ae_encrypt(ctx,nonce,pt,len,pt,0,ct,NULL,AE_FINALIZE);
1440 : printf("P=%d,A=%d: ",len,0); pbuf(ct, i, NULL);
1441 : }
1442 :
1443 : static void validate()
1444 : {
1445 : ALIGN(16) uint8_t pt[1024];
1446 : ALIGN(16) uint8_t ct[1024];
1447 : ALIGN(16) uint8_t tag[16];
1448 : ALIGN(16) uint8_t nonce[12] = {0,};
1449 : ALIGN(16) uint8_t key[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
1450 : ALIGN(16) uint8_t valid[] = {0xB2,0xB4,0x1C,0xBF,0x9B,0x05,0x03,0x7D,
1451 : 0xA7,0xF1,0x6C,0x24,0xA3,0x5C,0x1C,0x94};
1452 : ALIGN(16) uint8_t val_buf[22400];
1453 : ae_ctx ctx;
1454 : uint8_t *next = val_buf;
1455 : int i, len;
1456 :
1457 : if (0) {
1458 : ae_init(&ctx, key, 16, 12, 16);
1459 : /* pbuf(&ctx, sizeof(ctx), "CTX: "); */
1460 : vectors(&ctx,0);
1461 : vectors(&ctx,8);
1462 : vectors(&ctx,16);
1463 : vectors(&ctx,24);
1464 : vectors(&ctx,32);
1465 : vectors(&ctx,40);
1466 : }
1467 :
1468 : memset(key,0,32);
1469 : memset(pt,0,128);
1470 : ae_init(&ctx, key, 16, 12, 16);
1471 :
1472 : /* RFC Vector test */
1473 : for (i = 0; i < 128; i++) {
1474 : int first = ((i/3)/(BPI*16))*(BPI*16);
1475 : int second = first;
1476 : int third = i - (first + second);
1477 :
1478 : nonce[11] = i;
1479 :
1480 : if (0) {
1481 : ae_encrypt(&ctx,nonce,pt,i,pt,i,ct,NULL,AE_FINALIZE);
1482 : memcpy(next,ct,(size_t)i+16);
1483 : next = next+i+16;
1484 :
1485 : ae_encrypt(&ctx,nonce,pt,i,pt,0,ct,NULL,AE_FINALIZE);
1486 : memcpy(next,ct,(size_t)i+16);
1487 : next = next+i+16;
1488 :
1489 : ae_encrypt(&ctx,nonce,pt,0,pt,i,ct,NULL,AE_FINALIZE);
1490 : memcpy(next,ct,16);
1491 : next = next+16;
1492 : } else {
1493 : ae_encrypt(&ctx,nonce,pt,first,pt,first,ct,NULL,AE_PENDING);
1494 : ae_encrypt(&ctx,NULL,pt+first,second,pt+first,second,ct+first,NULL,AE_PENDING);
1495 : ae_encrypt(&ctx,NULL,pt+first+second,third,pt+first+second,third,ct+first+second,NULL,AE_FINALIZE);
1496 : memcpy(next,ct,(size_t)i+16);
1497 : next = next+i+16;
1498 :
1499 : ae_encrypt(&ctx,nonce,pt,first,pt,0,ct,NULL,AE_PENDING);
1500 : ae_encrypt(&ctx,NULL,pt+first,second,pt,0,ct+first,NULL,AE_PENDING);
1501 : ae_encrypt(&ctx,NULL,pt+first+second,third,pt,0,ct+first+second,NULL,AE_FINALIZE);
1502 : memcpy(next,ct,(size_t)i+16);
1503 : next = next+i+16;
1504 :
1505 : ae_encrypt(&ctx,nonce,pt,0,pt,first,ct,NULL,AE_PENDING);
1506 : ae_encrypt(&ctx,NULL,pt,0,pt+first,second,ct,NULL,AE_PENDING);
1507 : ae_encrypt(&ctx,NULL,pt,0,pt+first+second,third,ct,NULL,AE_FINALIZE);
1508 : memcpy(next,ct,16);
1509 : next = next+16;
1510 : }
1511 :
1512 : }
1513 : nonce[11] = 0;
1514 : ae_encrypt(&ctx,nonce,NULL,0,val_buf,next-val_buf,ct,tag,AE_FINALIZE);
1515 : pbuf(tag,16,0);
1516 : if (memcmp(valid,tag,16) == 0)
1517 : printf("Vectors: PASS\n");
1518 : else
1519 : printf("Vectors: FAIL\n");
1520 :
1521 :
1522 : /* Encrypt/Decrypt test */
1523 : for (i = 0; i < 128; i++) {
1524 : int first = ((i/3)/(BPI*16))*(BPI*16);
1525 : int second = first;
1526 : int third = i - (first + second);
1527 :
1528 : nonce[11] = i%128;
1529 :
1530 : if (1) {
1531 : len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,tag,AE_FINALIZE);
1532 : len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,-1,ct,tag,AE_FINALIZE);
1533 : len = ae_decrypt(&ctx,nonce,ct,len,val_buf,-1,pt,tag,AE_FINALIZE);
1534 : if (len == -1) { printf("Authentication error: %d\n", i); return; }
1535 : if (len != i) { printf("Length error: %d\n", i); return; }
1536 : if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
1537 : } else {
1538 : len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,NULL,AE_FINALIZE);
1539 : ae_decrypt(&ctx,nonce,ct,first,val_buf,first,pt,NULL,AE_PENDING);
1540 : ae_decrypt(&ctx,NULL,ct+first,second,val_buf+first,second,pt+first,NULL,AE_PENDING);
1541 : len = ae_decrypt(&ctx,NULL,ct+first+second,len-(first+second),val_buf+first+second,third,pt+first+second,NULL,AE_FINALIZE);
1542 : if (len == -1) { printf("Authentication error: %d\n", i); return; }
1543 : if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
1544 : }
1545 :
1546 : }
1547 : printf("Decrypt: PASS\n");
1548 : }
1549 :
1550 : int main()
1551 : {
1552 : validate();
1553 : return 0;
1554 : }
1555 : #endif
1556 :
1557 : #if USE_AES_NI
1558 : char infoString[] = "OCB3 (AES-NI)";
1559 : #elif USE_REFERENCE_AES
1560 : char infoString[] = "OCB3 (Reference)";
1561 : #elif USE_OPENSSL_AES
1562 : char infoString[] = "OCB3 (OpenSSL)";
1563 : #endif
|