LCOV - code coverage report
Current view: top level - src/crypto - ocb.cc (source / functions) Hit Total Coverage
Test: mosh-1.3.2 Code Coverage Lines: 329 332 99.1 %
Date: 2022-02-06 20:19:53 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*------------------------------------------------------------------------
       2             : / OCB Version 3 Reference Code (Optimized C)     Last modified 08-SEP-2012
       3             : /-------------------------------------------------------------------------
       4             : / Copyright (c) 2012 Ted Krovetz.
       5             : /
       6             : / Permission to use, copy, modify, and/or distribute this software for any
       7             : / purpose with or without fee is hereby granted, provided that the above
       8             : / copyright notice and this permission notice appear in all copies.
       9             : /
      10             : / THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
      11             : / WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
      12             : / MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
      13             : / ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
      14             : / WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
      15             : / ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
      16             : / OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
      17             : /
      18             : / Phillip Rogaway holds patents relevant to OCB. See the following for
      19             : / his patent grant: http://www.cs.ucdavis.edu/~rogaway/ocb/grant.htm
      20             : /
      21             : / Special thanks to Keegan McAllister for suggesting several good improvements
      22             : /
      23             : / Comments are welcome: Ted Krovetz <ted@krovetz.net> - Dedicated to Laurel K
      24             : /------------------------------------------------------------------------- */
      25             : 
      26             : /* ----------------------------------------------------------------------- */
      27             : /* Usage notes                                                             */
      28             : /* ----------------------------------------------------------------------- */
      29             : 
      30             : /* - When AE_PENDING is passed as the 'final' parameter of any function,
      31             : /    the length parameters must be a multiple of (BPI*16).
      32             : /  - When available, SSE or AltiVec registers are used to manipulate data.
      33             : /    So, when on machines with these facilities, all pointers passed to
      34             : /    any function should be 16-byte aligned.
      35             : /  - Plaintext and ciphertext pointers may be equal (ie, plaintext gets
      36             : /    encrypted in-place), but no other pair of pointers may be equal.
      37             : /  - This code assumes all x86 processors have SSE2 and SSSE3 instructions
      38             : /    when compiling under MSVC. If untrue, alter the #define.
      39             : /  - This code is tested for C99 and recent versions of GCC and MSVC.      */
      40             : 
      41             : /* ----------------------------------------------------------------------- */
      42             : /* User configuration options                                              */
      43             : /* ----------------------------------------------------------------------- */
      44             : 
      45             : /* Set the AES key length to use and length of authentication tag to produce.
      46             : /  Setting either to 0 requires the value be set at runtime via ae_init().
      47             : /  Some optimizations occur for each when set to a fixed value.            */
      48             : #define OCB_KEY_LEN         16  /* 0, 16, 24 or 32. 0 means set in ae_init */
      49             : #define OCB_TAG_LEN         16  /* 0 to 16. 0 means set in ae_init         */
      50             : 
      51             : /* This implementation has built-in support for multiple AES APIs. Set any
      52             : /  one of the following to non-zero to specify which to use.               */
      53             : #if 0
      54             : #define USE_APPLE_COMMON_CRYPTO_AES       0
      55             : #define USE_NETTLE_AES       0
      56             : #define USE_OPENSSL_AES      1  /* http://openssl.org                      */
      57             : #define USE_REFERENCE_AES    0  /* Internet search: rijndael-alg-fst.c     */
      58             : #define USE_AES_NI           0  /* Uses compiler's intrinsics              */
      59             : #endif
      60             : 
      61             : /* During encryption and decryption, various "L values" are required.
      62             : /  The L values can be precomputed during initialization (requiring extra
      63             : /  space in ae_ctx), generated as needed (slightly slowing encryption and
      64             : /  decryption), or some combination of the two. L_TABLE_SZ specifies how many
      65             : /  L values to precompute. L_TABLE_SZ must be at least 3. L_TABLE_SZ*16 bytes
      66             : /  are used for L values in ae_ctx. Plaintext and ciphertexts shorter than
      67             : /  2^L_TABLE_SZ blocks need no L values calculated dynamically.            */
      68             : #define L_TABLE_SZ          16
      69             : 
      70             : /* Set L_TABLE_SZ_IS_ENOUGH non-zero iff you know that all plaintexts
      71             : /  will be shorter than 2^(L_TABLE_SZ+4) bytes in length. This results
      72             : /  in better performance.                                                  */
      73             : #define L_TABLE_SZ_IS_ENOUGH 1
      74             : 
      75             : /* ----------------------------------------------------------------------- */
      76             : /* Includes and compiler specific definitions                              */
      77             : /* ----------------------------------------------------------------------- */
      78             : 
      79             : #include "config.h"
      80             : #include "ae.h"
      81             : #include <stdlib.h>
      82             : #include <string.h>
      83             : #if defined(HAVE_STRINGS_H)
      84             : #include <strings.h>
      85             : #endif
      86             : #if defined(HAVE_ENDIAN_H)
      87             : #include <endian.h>
      88             : #elif defined(HAVE_SYS_ENDIAN_H)
      89             : #include <sys/types.h>
      90             : #include <sys/endian.h>
      91             : #endif
      92             : 
      93             : /* Define standard sized integers                                          */
      94             : #if defined(_MSC_VER) && (_MSC_VER < 1600)
      95             :         typedef unsigned __int8  uint8_t;
      96             :         typedef unsigned __int32 uint32_t;
      97             :         typedef unsigned __int64 uint64_t;
      98             :         typedef          __int64 int64_t;
      99             : #else
     100             :         #include <stdint.h>
     101             : #endif
     102             : 
     103             : /* Compiler-specific intrinsics and fixes: bswap64, ntz                    */
     104             : #if _MSC_VER
     105             :         #define inline __inline        /* MSVC doesn't recognize "inline" in C */
     106             :         #define restrict __restrict  /* MSVC doesn't recognize "restrict" in C */
     107             :     #define __SSE2__   (_M_IX86 || _M_AMD64 || _M_X64)    /* Assume SSE2  */
     108             :     #define __SSSE3__  (_M_IX86 || _M_AMD64 || _M_X64)    /* Assume SSSE3 */
     109             :         #include <intrin.h>
     110             :         #pragma intrinsic(_byteswap_uint64, _BitScanForward, memcpy)
     111             : #elif __GNUC__
     112             :         #ifndef inline
     113             :         #define inline __inline__            /* No "inline" in GCC ansi C mode */
     114             :         #endif
     115             :         #ifndef restrict
     116             :         #define restrict __restrict__      /* No "restrict" in GCC ansi C mode */
     117             :         #endif
     118             : #endif
     119             : 
     120             : #if _MSC_VER
     121             :         #define bswap64(x) _byteswap_uint64(x)
     122             : #elif HAVE_DECL_BSWAP64
     123             :         /* nothing */
     124             : #elif HAVE_DECL___BUILTIN_BSWAP64
     125             :         #define bswap64(x) __builtin_bswap64(x)           /* GCC 4.3+ */
     126             : #else
     127             :         #define bswap32(x)                                              \
     128             :            ((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >>  8) | \
     129             :                 (((x) & 0x0000ff00u) <<  8) | (((x) & 0x000000ffu) << 24))
     130             : 
     131             :          static inline uint64_t bswap64(uint64_t x) {
     132             :                 union { uint64_t u64; uint32_t u32[2]; } in, out;
     133             :                 in.u64 = x;
     134             :                 out.u32[0] = bswap32(in.u32[1]);
     135             :                 out.u32[1] = bswap32(in.u32[0]);
     136             :                 return out.u64;
     137             :         }
     138             : #endif
     139             : 
     140             : #if _MSC_VER
     141             :         static inline unsigned ntz(unsigned x) {_BitScanForward(&x,x);return x;}
     142             : #elif HAVE_DECL___BUILTIN_CTZ
     143             :         #define ntz(x)     __builtin_ctz((unsigned)(x))   /* GCC 3.4+ */
     144             : #elif HAVE_DECL_FFS
     145             :         #define ntz(x)     (ffs(x) - 1)
     146             : #else
     147             :         #if (L_TABLE_SZ <= 9) && (L_TABLE_SZ_IS_ENOUGH)   /* < 2^13 byte texts */
     148             :         static inline unsigned ntz(unsigned x) {
     149             :                 static const unsigned char tz_table[] = {0,
     150             :                 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,7,
     151             :                 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,8,
     152             :                 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,7,
     153             :                 2,3,2,4,2,3,2,5,2,3,2,4,2,3,2,6,2,3,2,4,2,3,2,5,2,3,2,4,2,3,2};
     154             :                 return tz_table[x/4];
     155             :         }
     156             :         #else       /* From http://supertech.csail.mit.edu/papers/debruijn.pdf */
     157             :         static inline unsigned ntz(unsigned x) {
     158             :                 static const unsigned char tz_table[32] =
     159             :                 { 0,  1, 28,  2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17,  4, 8,
     160             :                  31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18,  6, 11,  5, 10, 9};
     161             :                 return tz_table[((uint32_t)((x & -x) * 0x077CB531u)) >> 27];
     162             :         }
     163             :         #endif
     164             : #endif
     165             : 
     166             : /* ----------------------------------------------------------------------- */
     167             : /* Define blocks and operations -- Patch if incorrect on your compiler.    */
     168             : /* ----------------------------------------------------------------------- */
     169             : 
     170             : #if __SSE2__
     171             :     #include <xmmintrin.h>              /* SSE instructions and _mm_malloc */
     172             :     #include <emmintrin.h>              /* SSE2 instructions               */
     173             :     typedef __m128i block;
     174             :     #define xor_block(x,y)        _mm_xor_si128(x,y)
     175             :     #define zero_block()          _mm_setzero_si128()
     176             :     #define unequal_blocks(x,y) \
     177             :                                            (_mm_movemask_epi8(_mm_cmpeq_epi8(x,y)) != 0xffff)
     178             :         #if __SSSE3__ || USE_AES_NI
     179             :     #include <tmmintrin.h>              /* SSSE3 instructions              */
     180             :     #define swap_if_le(b) \
     181             :       _mm_shuffle_epi8(b,_mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15))
     182             :         #else
     183      152449 :     static inline block swap_if_le(block b) {
     184      304898 :                 block a = _mm_shuffle_epi32  (b, _MM_SHUFFLE(0,1,2,3));
     185      152449 :                 a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(2,3,0,1));
     186      152449 :                 a = _mm_shufflelo_epi16(a, _MM_SHUFFLE(2,3,0,1));
     187      152449 :                 return _mm_xor_si128(_mm_srli_epi16(a,8), _mm_slli_epi16(a,8));
     188             :     }
     189             :         #endif
     190       81709 :         static inline block gen_offset(uint64_t KtopStr[3], unsigned bot) {
     191       81709 :                 block hi = _mm_load_si128((__m128i *)(KtopStr+0));   /* hi = B A */
     192       81709 :                 block lo = _mm_loadu_si128((__m128i *)(KtopStr+1));  /* lo = C B */
     193       81709 :                 __m128i lshift = _mm_cvtsi32_si128(bot);
     194       81709 :                 __m128i rshift = _mm_cvtsi32_si128(64-bot);
     195       81709 :                 lo = _mm_xor_si128(_mm_sll_epi64(hi,lshift),_mm_srl_epi64(lo,rshift));
     196             :                 #if __SSSE3__ || USE_AES_NI
     197             :                 return _mm_shuffle_epi8(lo,_mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7));
     198             :                 #else
     199       81709 :                 return swap_if_le(_mm_shuffle_epi32(lo, _MM_SHUFFLE(1,0,3,2)));
     200             :                 #endif
     201             :         }
     202       66810 :         static inline block double_block(block bl) {
     203       66810 :                 const __m128i mask = _mm_set_epi32(135,1,1,1);
     204      133620 :                 __m128i tmp = _mm_srai_epi32(bl, 31);
     205       66810 :                 tmp = _mm_and_si128(tmp, mask);
     206       66810 :                 tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2,1,0,3));
     207       66810 :                 bl = _mm_slli_epi32(bl, 1);
     208       66810 :                 return _mm_xor_si128(bl,tmp);
     209             :         }
     210             : #elif __ALTIVEC__ && _CALL_ELF != 2
     211             :     #include <altivec.h>
     212             :     typedef vector unsigned block;
     213             :     #define xor_block(x,y)         vec_xor(x,y)
     214             :     #define zero_block()           vec_splat_u32(0)
     215             :     #define unequal_blocks(x,y)    vec_any_ne(x,y)
     216             :     #define swap_if_le(b)          (b)
     217             :         #if __PPC64__
     218             :         static block gen_offset(uint64_t KtopStr[3], unsigned bot) {
     219             :                 union {uint64_t u64[2]; block bl;} rval;
     220             :                 rval.u64[0] = (KtopStr[0] << bot) | (KtopStr[1] >> (64-bot));
     221             :                 rval.u64[1] = (KtopStr[1] << bot) | (KtopStr[2] >> (64-bot));
     222             :         return rval.bl;
     223             :         }
     224             :         #else
     225             :         /* Special handling: Shifts are mod 32, and no 64-bit types */
     226             :         static block gen_offset(uint64_t KtopStr[3], unsigned bot) {
     227             :                 const vector unsigned k32 = {32,32,32,32};
     228             :                 vector unsigned hi = *(vector unsigned *)(KtopStr+0);
     229             :                 vector unsigned lo = *(vector unsigned *)(KtopStr+2);
     230             :                 vector unsigned bot_vec;
     231             :                 if (bot < 32) {
     232             :                         lo = vec_sld(hi,lo,4);
     233             :                 } else {
     234             :                         vector unsigned t = vec_sld(hi,lo,4);
     235             :                         lo = vec_sld(hi,lo,8);
     236             :                         hi = t;
     237             :                         bot = bot - 32;
     238             :                 }
     239             :                 if (bot == 0) return hi;
     240             :                 *(unsigned *)&bot_vec = bot;
     241             :                 vector unsigned lshift = vec_splat(bot_vec,0);
     242             :                 vector unsigned rshift = vec_sub(k32,lshift);
     243             :                 hi = vec_sl(hi,lshift);
     244             :                 lo = vec_sr(lo,rshift);
     245             :                 return vec_xor(hi,lo);
     246             :         }
     247             :         #endif
     248             :         static inline block double_block(block b) {
     249             :                 const vector unsigned char mask = {135,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
     250             :                 const vector unsigned char perm = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0};
     251             :                 const vector unsigned char shift7  = vec_splat_u8(7);
     252             :                 const vector unsigned char shift1  = vec_splat_u8(1);
     253             :                 vector unsigned char c = (vector unsigned char)b;
     254             :                 vector unsigned char t = vec_sra(c,shift7);
     255             :                 t = vec_and(t,mask);
     256             :                 t = vec_perm(t,t,perm);
     257             :                 c = vec_sl(c,shift1);
     258             :                 return (block)vec_xor(c,t);
     259             :         }
     260             : #elif __ARM_NEON__
     261             :     #include <arm_neon.h>
     262             :     typedef int8x16_t block;      /* Yay! Endian-neutral reads! */
     263             :     #define xor_block(x,y)             veorq_s8(x,y)
     264             :     #define zero_block()               vdupq_n_s8(0)
     265             :     static inline int unequal_blocks(block a, block b) {
     266             :                 int64x2_t t=veorq_s64((int64x2_t)a,(int64x2_t)b);
     267             :                 return (vgetq_lane_s64(t,0)|vgetq_lane_s64(t,1))!=0;
     268             :     }
     269             :     #define swap_if_le(b)          (b)  /* Using endian-neutral int8x16_t */
     270             :         /* KtopStr is reg correct by 64 bits, return mem correct */
     271             :         static block gen_offset(uint64_t KtopStr[3], unsigned bot) {
     272             :                 const union { unsigned x; unsigned char endian; } little = { 1 };
     273             :                 const int64x2_t k64 = {-64,-64};
     274             :                 uint64x2_t hi, lo;
     275             :                 memcpy(&hi, KtopStr, sizeof(hi));
     276             :                 memcpy(&lo, KtopStr+1, sizeof(lo));
     277             :                 int64x2_t ls = vdupq_n_s64(bot);
     278             :                 int64x2_t rs = vqaddq_s64(k64,ls);
     279             :                 block rval = (block)veorq_u64(vshlq_u64(hi,ls),vshlq_u64(lo,rs));
     280             :                 if (little.endian)
     281             :                         rval = vrev64q_s8(rval);
     282             :                 return rval;
     283             :         }
     284             :         static inline block double_block(block b)
     285             :         {
     286             :                 const block mask = {-121,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
     287             :                 block tmp = vshrq_n_s8(b,7);
     288             :                 tmp = vandq_s8(tmp, mask);
     289             :                 tmp = vextq_s8(tmp, tmp, 1);  /* Rotate high byte to end */
     290             :                 b = vshlq_n_s8(b,1);
     291             :                 return veorq_s8(tmp,b);
     292             :         }
     293             : #else
     294             :     typedef struct { uint64_t l,r; } block;
     295             :     static inline block xor_block(block x, block y) {
     296             :         x.l^=y.l; x.r^=y.r; return x;
     297             :     }
     298             :     static inline block zero_block(void) { const block t = {0,0}; return t; }
     299             :     #define unequal_blocks(x, y)         ((((x).l^(y).l)|((x).r^(y).r)) != 0)
     300             :     static inline block swap_if_le(block b) {
     301             :                 const union { unsigned x; unsigned char endian; } little = { 1 };
     302             :         if (little.endian) {
     303             :                 block r;
     304             :                 r.l = bswap64(b.l);
     305             :                 r.r = bswap64(b.r);
     306             :                 return r;
     307             :         } else
     308             :                 return b;
     309             :     }
     310             : 
     311             :         /* KtopStr is reg correct by 64 bits, return mem correct */
     312             :         static block gen_offset(uint64_t KtopStr[3], unsigned bot) {
     313             :         block rval;
     314             :         if (bot != 0) {
     315             :                         rval.l = (KtopStr[0] << bot) | (KtopStr[1] >> (64-bot));
     316             :                         rval.r = (KtopStr[1] << bot) | (KtopStr[2] >> (64-bot));
     317             :                 } else {
     318             :                         rval.l = KtopStr[0];
     319             :                         rval.r = KtopStr[1];
     320             :                 }
     321             :         return swap_if_le(rval);
     322             :         }
     323             : 
     324             :         #if __GNUC__ && !__clang__ && __arm__
     325             :         static inline block double_block(block b) {
     326             :                 __asm__ ("adds %1,%1,%1\n\t"
     327             :                                  "adcs %H1,%H1,%H1\n\t"
     328             :                                  "adcs %0,%0,%0\n\t"
     329             :                                  "adcs %H0,%H0,%H0\n\t"
     330             :                                  "it cs\n\t"
     331             :                                  "eorcs %1,%1,#135"
     332             :                 : "+r"(b.l), "+r"(b.r) : : "cc");
     333             :                 return b;
     334             :         }
     335             :         #else
     336             :         static inline block double_block(block b) {
     337             :                 uint64_t t = (uint64_t)((int64_t)b.l >> 63);
     338             :                 b.l = (b.l + b.l) ^ (b.r >> 63);
     339             :                 b.r = (b.r + b.r) ^ (t & 135);
     340             :                 return b;
     341             :         }
     342             :         #endif
     343             : 
     344             : #endif
     345             : 
     346             : /* ----------------------------------------------------------------------- */
     347             : /* AES - Code uses OpenSSL API. Other implementations get mapped to it.    */
     348             : /* ----------------------------------------------------------------------- */
     349             : 
     350             : /*---------------*/
     351             : #if USE_OPENSSL_AES
     352             : /*---------------*/
     353             : 
     354             : #include <openssl/aes.h>                            /* http://openssl.org/ */
     355             : 
     356             : /* How to ECB encrypt an array of blocks, in place                         */
     357      550019 : static inline void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
     358     2724066 :         while (nblks) {
     359     2174047 :                 --nblks;
     360     2174047 :                 AES_encrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key);
     361             :         }
     362      550019 : }
     363             : 
     364      583257 : static inline void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
     365     2808257 :         while (nblks) {
     366     2225000 :                 --nblks;
     367     2225000 :                 AES_decrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key);
     368             :         }
     369      583257 : }
     370             : 
     371             : #define BPI 4  /* Number of blocks in buffer per ECB call */
     372             : 
     373             : /*-------------------*/
     374             : #elif USE_APPLE_COMMON_CRYPTO_AES
     375             : /*-------------------*/
     376             : 
     377             : #include <fatal_assert.h>
     378             : #include <CommonCrypto/CommonCryptor.h>
     379             : 
     380             : typedef struct {
     381             :         CCCryptorRef ref;
     382             :         uint8_t b[4096];
     383             : } AES_KEY;
     384             : #if (OCB_KEY_LEN == 0)
     385             : #define ROUNDS(ctx) ((ctx)->rounds)
     386             : #else
     387             : #define ROUNDS(ctx) (6+OCB_KEY_LEN/4)
     388             : #endif
     389             : 
     390             : static inline void AES_set_encrypt_key(unsigned char *handle, const int bits, AES_KEY *key)
     391             : {
     392             :         CCCryptorStatus rv = CCCryptorCreateFromData(
     393             :                 kCCEncrypt,
     394             :                 kCCAlgorithmAES128,
     395             :                 kCCOptionECBMode,
     396             :                 handle,
     397             :                 bits / 8,
     398             :                 NULL,
     399             :                 &(key->b),
     400             :                 sizeof (key->b),
     401             :                 &(key->ref),
     402             :                 NULL);
     403             : 
     404             :         fatal_assert(rv == kCCSuccess);
     405             : }
     406             : static inline void AES_set_decrypt_key(unsigned char *handle, const int bits, AES_KEY *key)
     407             : {
     408             :         CCCryptorStatus rv = CCCryptorCreateFromData(
     409             :                 kCCDecrypt,
     410             :                 kCCAlgorithmAES128,
     411             :                 kCCOptionECBMode,
     412             :                 handle,
     413             :                 bits / 8,
     414             :                 NULL,
     415             :                 &(key->b),
     416             :                 sizeof (key->b),
     417             :                 &(key->ref),
     418             :                 NULL);
     419             : 
     420             :         fatal_assert(rv == kCCSuccess);
     421             : }
     422             : static inline void AES_encrypt(unsigned char *src, unsigned char *dst, AES_KEY *key) {
     423             :         size_t dataOutMoved;
     424             :         CCCryptorStatus rv = CCCryptorUpdate(
     425             :                 key->ref,
     426             :                 (const void *)src,
     427             :                 kCCBlockSizeAES128,
     428             :                 (void *)dst,
     429             :                 kCCBlockSizeAES128,
     430             :                 &dataOutMoved);
     431             :         fatal_assert(rv == kCCSuccess);
     432             :         fatal_assert(dataOutMoved == kCCBlockSizeAES128);
     433             : }
     434             : #if 0
     435             : /* unused */
     436             : static inline void AES_decrypt(unsigned char *src, unsigned char *dst, AES_KEY *key) {
     437             :         AES_encrypt(src, dst, key);
     438             : }
     439             : #endif
     440             : static inline void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
     441             :         const size_t dataSize = kCCBlockSizeAES128 * nblks;
     442             :         size_t dataOutMoved;
     443             :         CCCryptorStatus rv = CCCryptorUpdate(
     444             :                 key->ref,
     445             :                 (const void *)blks,
     446             :                 dataSize,
     447             :                 (void *)blks,
     448             :                 dataSize,
     449             :                 &dataOutMoved);
     450             :         fatal_assert(rv == kCCSuccess);
     451             :         fatal_assert(dataOutMoved == dataSize);
     452             : }
     453             : static inline void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
     454             :         AES_ecb_encrypt_blks(blks, nblks, key);
     455             : }
     456             : 
     457             : #define BPI 4  /* Number of blocks in buffer per ECB call */
     458             : 
     459             : /*-------------------*/
     460             : #elif USE_NETTLE_AES
     461             : /*-------------------*/
     462             : 
     463             : #include <nettle/aes.h>
     464             : 
     465             : typedef struct aes_ctx AES_KEY;
     466             : #if (OCB_KEY_LEN == 0)
     467             : #define ROUNDS(ctx) ((ctx)->rounds)
     468             : #else
     469             : #define ROUNDS(ctx) (6+OCB_KEY_LEN/4)
     470             : #endif
     471             : 
     472             : static inline void AES_set_encrypt_key(unsigned char *handle, const int bits, AES_KEY *key)
     473             : {
     474             :         nettle_aes_set_encrypt_key(key, bits/8, (const uint8_t *)handle);
     475             : }
     476             : static inline void AES_set_decrypt_key(unsigned char *handle, const int bits, AES_KEY *key)
     477             : {
     478             :         nettle_aes_set_decrypt_key(key, bits/8, (const uint8_t *)handle);
     479             : }
     480             : static inline void AES_encrypt(unsigned char *src, unsigned char *dst, AES_KEY *key) {
     481             :         nettle_aes_encrypt(key, AES_BLOCK_SIZE, dst, src);
     482             : }
     483             : #if 0
     484             : /* unused */
     485             : static inline void AES_decrypt(unsigned char *src, unsigned char *dst, AES_KEY *key) {
     486             :         nettle_aes_decrypt(key, AES_BLOCK_SIZE, dst, src);
     487             : }
     488             : #endif
     489             : static inline void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
     490             :         nettle_aes_encrypt(key, nblks * AES_BLOCK_SIZE, (unsigned char*)blks, (unsigned char*)blks);
     491             : }
     492             : static inline void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
     493             :         nettle_aes_decrypt(key, nblks * AES_BLOCK_SIZE, (unsigned char*)blks, (unsigned char*)blks);
     494             : }
     495             : 
     496             : #define BPI 4  /* Number of blocks in buffer per ECB call */
     497             : 
     498             : /*-------------------*/
     499             : #elif USE_REFERENCE_AES
     500             : /*-------------------*/
     501             : 
     502             : #include "rijndael-alg-fst.h"              /* Barreto's Public-Domain Code */
     503             : #if (OCB_KEY_LEN == 0)
     504             :         typedef struct { uint32_t rd_key[60]; int rounds; } AES_KEY;
     505             :         #define ROUNDS(ctx) ((ctx)->rounds)
     506             :         #define AES_set_encrypt_key(x, y, z) \
     507             :          do {rijndaelKeySetupEnc((z)->rd_key, x, y); (z)->rounds = y/32+6;} while (0)
     508             :         #define AES_set_decrypt_key(x, y, z) \
     509             :          do {rijndaelKeySetupDec((z)->rd_key, x, y); (z)->rounds = y/32+6;} while (0)
     510             : #else
     511             :         typedef struct { uint32_t rd_key[OCB_KEY_LEN+28]; } AES_KEY;
     512             :         #define ROUNDS(ctx) (6+OCB_KEY_LEN/4)
     513             :         #define AES_set_encrypt_key(x, y, z) rijndaelKeySetupEnc((z)->rd_key, x, y)
     514             :         #define AES_set_decrypt_key(x, y, z) rijndaelKeySetupDec((z)->rd_key, x, y)
     515             : #endif
     516             : #define AES_encrypt(x,y,z) rijndaelEncrypt((z)->rd_key, ROUNDS(z), x, y)
     517             : #define AES_decrypt(x,y,z) rijndaelDecrypt((z)->rd_key, ROUNDS(z), x, y)
     518             : 
     519             : static void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
     520             :         while (nblks) {
     521             :                 --nblks;
     522             :                 AES_encrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key);
     523             :         }
     524             : }
     525             : 
     526             :  void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
     527             :         while (nblks) {
     528             :                 --nblks;
     529             :                 AES_decrypt((unsigned char *)(blks+nblks), (unsigned char *)(blks+nblks), key);
     530             :         }
     531             : }
     532             : 
     533             : #define BPI 4  /* Number of blocks in buffer per ECB call */
     534             : 
     535             : /*----------*/
     536             : #elif USE_AES_NI
     537             : /*----------*/
     538             : 
     539             : #include <wmmintrin.h>
     540             : 
     541             : #if (OCB_KEY_LEN == 0)
     542             :         typedef struct { __m128i rd_key[15]; int rounds; } AES_KEY;
     543             :         #define ROUNDS(ctx) ((ctx)->rounds)
     544             : #else
     545             :         typedef struct { __m128i rd_key[7+OCB_KEY_LEN/4]; } AES_KEY;
     546             :         #define ROUNDS(ctx) (6+OCB_KEY_LEN/4)
     547             : #endif
     548             : 
     549             : #define EXPAND_ASSIST(v1,v2,v3,v4,shuff_const,aes_const)                    \
     550             :     v2 = _mm_aeskeygenassist_si128(v4,aes_const);                           \
     551             :     v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3),              \
     552             :                                          _mm_castsi128_ps(v1), 16));        \
     553             :     v1 = _mm_xor_si128(v1,v3);                                              \
     554             :     v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3),              \
     555             :                                          _mm_castsi128_ps(v1), 140));       \
     556             :     v1 = _mm_xor_si128(v1,v3);                                              \
     557             :     v2 = _mm_shuffle_epi32(v2,shuff_const);                                 \
     558             :     v1 = _mm_xor_si128(v1,v2)
     559             : 
     560             : #define EXPAND192_STEP(idx,aes_const)                                       \
     561             :     EXPAND_ASSIST(x0,x1,x2,x3,85,aes_const);                                \
     562             :     x3 = _mm_xor_si128(x3,_mm_slli_si128 (x3, 4));                          \
     563             :     x3 = _mm_xor_si128(x3,_mm_shuffle_epi32(x0, 255));                      \
     564             :     kp[idx] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp),        \
     565             :                                               _mm_castsi128_ps(x0), 68));   \
     566             :     kp[idx+1] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(x0),       \
     567             :                                                 _mm_castsi128_ps(x3), 78)); \
     568             :     EXPAND_ASSIST(x0,x1,x2,x3,85,(aes_const*2));                            \
     569             :     x3 = _mm_xor_si128(x3,_mm_slli_si128 (x3, 4));                          \
     570             :     x3 = _mm_xor_si128(x3,_mm_shuffle_epi32(x0, 255));                      \
     571             :     kp[idx+2] = x0; tmp = x3
     572             : 
     573             : static void AES_128_Key_Expansion(const unsigned char *userkey, void *key)
     574             : {
     575             :     __m128i x0,x1,x2;
     576             :     __m128i *kp = (__m128i *)key;
     577             :     kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
     578             :     x2 = _mm_setzero_si128();
     579             :     EXPAND_ASSIST(x0,x1,x2,x0,255,1);   kp[1]  = x0;
     580             :     EXPAND_ASSIST(x0,x1,x2,x0,255,2);   kp[2]  = x0;
     581             :     EXPAND_ASSIST(x0,x1,x2,x0,255,4);   kp[3]  = x0;
     582             :     EXPAND_ASSIST(x0,x1,x2,x0,255,8);   kp[4]  = x0;
     583             :     EXPAND_ASSIST(x0,x1,x2,x0,255,16);  kp[5]  = x0;
     584             :     EXPAND_ASSIST(x0,x1,x2,x0,255,32);  kp[6]  = x0;
     585             :     EXPAND_ASSIST(x0,x1,x2,x0,255,64);  kp[7]  = x0;
     586             :     EXPAND_ASSIST(x0,x1,x2,x0,255,128); kp[8]  = x0;
     587             :     EXPAND_ASSIST(x0,x1,x2,x0,255,27);  kp[9]  = x0;
     588             :     EXPAND_ASSIST(x0,x1,x2,x0,255,54);  kp[10] = x0;
     589             : }
     590             : 
     591             : static void AES_192_Key_Expansion(const unsigned char *userkey, void *key)
     592             : {
     593             :     __m128i x0,x1,x2,x3,tmp,*kp = (__m128i *)key;
     594             :     kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
     595             :     tmp = x3 = _mm_loadu_si128((__m128i*)(userkey+16));
     596             :     x2 = _mm_setzero_si128();
     597             :     EXPAND192_STEP(1,1);
     598             :     EXPAND192_STEP(4,4);
     599             :     EXPAND192_STEP(7,16);
     600             :     EXPAND192_STEP(10,64);
     601             : }
     602             : 
     603             : static void AES_256_Key_Expansion(const unsigned char *userkey, void *key)
     604             : {
     605             :     __m128i x0,x1,x2,x3,*kp = (__m128i *)key;
     606             :     kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey   );
     607             :     kp[1] = x3 = _mm_loadu_si128((__m128i*)(userkey+16));
     608             :     x2 = _mm_setzero_si128();
     609             :     EXPAND_ASSIST(x0,x1,x2,x3,255,1);  kp[2]  = x0;
     610             :     EXPAND_ASSIST(x3,x1,x2,x0,170,1);  kp[3]  = x3;
     611             :     EXPAND_ASSIST(x0,x1,x2,x3,255,2);  kp[4]  = x0;
     612             :     EXPAND_ASSIST(x3,x1,x2,x0,170,2);  kp[5]  = x3;
     613             :     EXPAND_ASSIST(x0,x1,x2,x3,255,4);  kp[6]  = x0;
     614             :     EXPAND_ASSIST(x3,x1,x2,x0,170,4);  kp[7]  = x3;
     615             :     EXPAND_ASSIST(x0,x1,x2,x3,255,8);  kp[8]  = x0;
     616             :     EXPAND_ASSIST(x3,x1,x2,x0,170,8);  kp[9]  = x3;
     617             :     EXPAND_ASSIST(x0,x1,x2,x3,255,16); kp[10] = x0;
     618             :     EXPAND_ASSIST(x3,x1,x2,x0,170,16); kp[11] = x3;
     619             :     EXPAND_ASSIST(x0,x1,x2,x3,255,32); kp[12] = x0;
     620             :     EXPAND_ASSIST(x3,x1,x2,x0,170,32); kp[13] = x3;
     621             :     EXPAND_ASSIST(x0,x1,x2,x3,255,64); kp[14] = x0;
     622             : }
     623             : 
     624             : static int AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
     625             : {
     626             :     if (bits == 128) {
     627             :         AES_128_Key_Expansion (userKey,key);
     628             :     } else if (bits == 192) {
     629             :         AES_192_Key_Expansion (userKey,key);
     630             :     } else if (bits == 256) {
     631             :         AES_256_Key_Expansion (userKey,key);
     632             :     }
     633             :     #if (OCB_KEY_LEN == 0)
     634             :         key->rounds = 6+bits/32;
     635             :     #endif
     636             :     return 0;
     637             : }
     638             : 
     639             : static void AES_set_decrypt_key_fast(AES_KEY *dkey, const AES_KEY *ekey)
     640             : {
     641             :     int j = 0;
     642             :     int i = ROUNDS(ekey);
     643             :     #if (OCB_KEY_LEN == 0)
     644             :         dkey->rounds = i;
     645             :     #endif
     646             :     dkey->rd_key[i--] = ekey->rd_key[j++];
     647             :     while (i)
     648             :         dkey->rd_key[i--] = _mm_aesimc_si128(ekey->rd_key[j++]);
     649             :     dkey->rd_key[i] = ekey->rd_key[j];
     650             : }
     651             : 
     652             : static int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
     653             : {
     654             :     AES_KEY temp_key;
     655             :     AES_set_encrypt_key(userKey,bits,&temp_key);
     656             :     AES_set_decrypt_key_fast(key, &temp_key);
     657             :     return 0;
     658             : }
     659             : 
     660             : static inline void AES_encrypt(const unsigned char *in,
     661             :                         unsigned char *out, const AES_KEY *key)
     662             : {
     663             :         int j,rnds=ROUNDS(key);
     664             :         const __m128i *sched = ((__m128i *)(key->rd_key));
     665             :         __m128i tmp = _mm_load_si128 ((__m128i*)in);
     666             :         tmp = _mm_xor_si128 (tmp,sched[0]);
     667             :         for (j=1; j<rnds; j++)  tmp = _mm_aesenc_si128 (tmp,sched[j]);
     668             :         tmp = _mm_aesenclast_si128 (tmp,sched[j]);
     669             :         _mm_store_si128 ((__m128i*)out,tmp);
     670             : }
     671             : 
     672             : static inline void AES_decrypt(const unsigned char *in,
     673             :                         unsigned char *out, const AES_KEY *key)
     674             : {
     675             :         int j,rnds=ROUNDS(key);
     676             :         const __m128i *sched = ((__m128i *)(key->rd_key));
     677             :         __m128i tmp = _mm_load_si128 ((__m128i*)in);
     678             :         tmp = _mm_xor_si128 (tmp,sched[0]);
     679             :         for (j=1; j<rnds; j++)  tmp = _mm_aesdec_si128 (tmp,sched[j]);
     680             :         tmp = _mm_aesdeclast_si128 (tmp,sched[j]);
     681             :         _mm_store_si128 ((__m128i*)out,tmp);
     682             : }
     683             : 
     684             : static inline void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
     685             :     unsigned i,j,rnds=ROUNDS(key);
     686             :         const __m128i *sched = ((__m128i *)(key->rd_key));
     687             :         for (i=0; i<nblks; ++i)
     688             :             blks[i] =_mm_xor_si128(blks[i], sched[0]);
     689             :         for(j=1; j<rnds; ++j)
     690             :             for (i=0; i<nblks; ++i)
     691             :                     blks[i] = _mm_aesenc_si128(blks[i], sched[j]);
     692             :         for (i=0; i<nblks; ++i)
     693             :             blks[i] =_mm_aesenclast_si128(blks[i], sched[j]);
     694             : }
     695             : 
     696             : static inline void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
     697             :     unsigned i,j,rnds=ROUNDS(key);
     698             :         const __m128i *sched = ((__m128i *)(key->rd_key));
     699             :         for (i=0; i<nblks; ++i)
     700             :             blks[i] =_mm_xor_si128(blks[i], sched[0]);
     701             :         for(j=1; j<rnds; ++j)
     702             :             for (i=0; i<nblks; ++i)
     703             :                     blks[i] = _mm_aesdec_si128(blks[i], sched[j]);
     704             :         for (i=0; i<nblks; ++i)
     705             :             blks[i] =_mm_aesdeclast_si128(blks[i], sched[j]);
     706             : }
     707             : 
     708             : #define BPI 8  /* Number of blocks in buffer per ECB call   */
     709             :                /* Set to 4 for Westmere, 8 for Sandy Bridge */
     710             : 
     711             : #else
     712             : #error "No AES implementation selected."
     713             : #endif
     714             : 
     715             : /* ----------------------------------------------------------------------- */
     716             : /* Define OCB context structure.                                           */
     717             : /* ----------------------------------------------------------------------- */
     718             : 
     719             : /*------------------------------------------------------------------------
     720             : / Each item in the OCB context is stored either "memory correct" or
     721             : / "register correct". On big-endian machines, this is identical. On
     722             : / little-endian machines, one must choose whether the byte-string
     723             : / is in the correct order when it resides in memory or in registers.
     724             : / It must be register correct whenever it is to be manipulated
     725             : / arithmetically, but must be memory correct whenever it interacts
     726             : / with the plaintext or ciphertext.
     727             : /------------------------------------------------------------------------- */
     728             : 
     729             : struct _ae_ctx {
     730             :     block offset;                          /* Memory correct               */
     731             :     block checksum;                        /* Memory correct               */
     732             :     block Lstar;                           /* Memory correct               */
     733             :     block Ldollar;                         /* Memory correct               */
     734             :     block L[L_TABLE_SZ];                   /* Memory correct               */
     735             :     block ad_checksum;                     /* Memory correct               */
     736             :     block ad_offset;                       /* Memory correct               */
     737             :     block cached_Top;                      /* Memory correct               */
     738             :         uint64_t KtopStr[3];                   /* Register correct, each item  */
     739             :     uint32_t ad_blocks_processed;
     740             :     uint32_t blocks_processed;
     741             :     AES_KEY decrypt_key;
     742             :     AES_KEY encrypt_key;
     743             :     #if (OCB_TAG_LEN == 0)
     744             :     unsigned tag_len;
     745             :     #endif
     746             : };
     747             : 
     748             : /* ----------------------------------------------------------------------- */
     749             : /* L table lookup (or on-the-fly generation)                               */
     750             : /* ----------------------------------------------------------------------- */
     751             : 
     752             : #if L_TABLE_SZ_IS_ENOUGH
     753             : #define getL(_ctx, _tz) ((_ctx)->L[_tz])
     754             : #else
     755             : static block getL(const ae_ctx *ctx, unsigned tz)
     756             : {
     757             :     if (tz < L_TABLE_SZ)
     758             :         return ctx->L[tz];
     759             :     else {
     760             :         unsigned i;
     761             :         /* Bring L[MAX] into registers, make it register correct */
     762             :         block rval = swap_if_le(ctx->L[L_TABLE_SZ-1]);
     763             :         rval = double_block(rval);
     764             :         for (i=L_TABLE_SZ; i < tz; i++)
     765             :             rval = double_block(rval);
     766             :         return swap_if_le(rval);             /* To memory correct */
     767             :     }
     768             : }
     769             : #endif
     770             : 
     771             : /* ----------------------------------------------------------------------- */
     772             : /* Public functions                                                        */
     773             : /* ----------------------------------------------------------------------- */
     774             : 
     775             : /* 32-bit SSE2 and Altivec systems need to be forced to allocate memory
     776             :    on 16-byte alignments. (I believe all major 64-bit systems do already.) */
     777             : 
     778             : /* Mosh uses its own AlignedBuffer class, not ae_allocate() or ae_free(). */
     779             : 
     780             : /* ----------------------------------------------------------------------- */
     781             : 
     782        4380 : int ae_clear (ae_ctx *ctx) /* Zero ae_ctx and undo initialization          */
     783             : {
     784        4380 :         memset(ctx, 0, sizeof(ae_ctx));
     785        4380 :         return AE_SUCCESS;
     786             : }
     787             : 
     788        3930 : int ae_ctx_sizeof(void) { return (int) sizeof(ae_ctx); }
     789             : 
     790             : /* ----------------------------------------------------------------------- */
     791             : 
     792        3930 : int ae_init(ae_ctx *ctx, const void *key, int key_len, int nonce_len, int tag_len)
     793             : {
     794        3930 :     unsigned i;
     795        3930 :     block tmp_blk;
     796             : 
     797        3930 :     if (nonce_len != 12)
     798             :         return AE_NOT_SUPPORTED;
     799             : 
     800             :     /* Initialize encryption & decryption keys */
     801             :     #if (OCB_KEY_LEN > 0)
     802        3930 :     key_len = OCB_KEY_LEN;
     803             :     #endif
     804        3930 :     AES_set_encrypt_key((unsigned char *)key, key_len*8, &ctx->encrypt_key);
     805             :     #if USE_AES_NI
     806             :     AES_set_decrypt_key_fast(&ctx->decrypt_key,&ctx->encrypt_key);
     807             :     #else
     808        3930 :     AES_set_decrypt_key((unsigned char *)key, (int)(key_len*8), &ctx->decrypt_key);
     809             :     #endif
     810             : 
     811             :     /* Zero things that need zeroing */
     812        3930 :     ctx->cached_Top = ctx->ad_checksum = zero_block();
     813        3930 :     ctx->ad_blocks_processed = 0;
     814             : 
     815             :     /* Compute key-dependent values */
     816        3930 :     AES_encrypt((unsigned char *)&ctx->cached_Top,
     817        3930 :                             (unsigned char *)&ctx->Lstar, &ctx->encrypt_key);
     818        3930 :     tmp_blk = swap_if_le(ctx->Lstar);
     819        3930 :     tmp_blk = double_block(tmp_blk);
     820        3930 :     ctx->Ldollar = swap_if_le(tmp_blk);
     821        3930 :     tmp_blk = double_block(tmp_blk);
     822        3930 :     ctx->L[0] = swap_if_le(tmp_blk);
     823       62880 :     for (i = 1; i < L_TABLE_SZ; i++) {
     824       58950 :                 tmp_blk = double_block(tmp_blk);
     825      117900 :         ctx->L[i] = swap_if_le(tmp_blk);
     826             :     }
     827             : 
     828             :     #if (OCB_TAG_LEN == 0)
     829             :         ctx->tag_len = tag_len;
     830             :     #else
     831             :         (void) tag_len;  /* Suppress var not used error */
     832             :     #endif
     833             : 
     834             :     return AE_SUCCESS;
     835             : }
     836             : 
     837             : /* ----------------------------------------------------------------------- */
     838             : 
     839       81709 : static block gen_offset_from_nonce(ae_ctx *ctx, const void *nonce)
     840             : {
     841       81709 :         const union { unsigned x; unsigned char endian; } little = { 1 };
     842       81709 :         union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp;
     843       81709 :         unsigned idx;
     844             : 
     845             :         /* Replace cached nonce Top if needed */
     846       81709 :         tmp.u32[0] = (little.endian?0x01000000:0x00000001);
     847       81709 :         tmp.u32[1] = ((uint32_t *)nonce)[0];
     848       81709 :         tmp.u32[2] = ((uint32_t *)nonce)[1];
     849       81709 :         tmp.u32[3] = ((uint32_t *)nonce)[2];
     850       81709 :         idx = (unsigned)(tmp.u8[15] & 0x3f);   /* Get low 6 bits of nonce  */
     851       81709 :         tmp.u8[15] = tmp.u8[15] & 0xc0;        /* Zero low 6 bits of nonce */
     852       81709 :         if ( unequal_blocks(tmp.bl,ctx->cached_Top) )   { /* Cached?       */
     853       14755 :                 ctx->cached_Top = tmp.bl;          /* Update cache, KtopStr    */
     854       14755 :                 AES_encrypt(tmp.u8, (unsigned char *)&ctx->KtopStr, &ctx->encrypt_key);
     855       14755 :                 if (little.endian) {               /* Make Register Correct    */
     856       14755 :                         ctx->KtopStr[0] = bswap64(ctx->KtopStr[0]);
     857       14755 :                         ctx->KtopStr[1] = bswap64(ctx->KtopStr[1]);
     858             :                 }
     859       14755 :                 ctx->KtopStr[2] = ctx->KtopStr[0] ^
     860       14755 :                                                  (ctx->KtopStr[0] << 8) ^ (ctx->KtopStr[1] >> 56);
     861             :         }
     862       81709 :         return gen_offset(ctx->KtopStr, idx);
     863             : }
     864             : 
     865        2094 : static void process_ad(ae_ctx *ctx, const void *ad, int ad_len, int final)
     866             : {
     867        2094 :         union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp;
     868        2094 :     block ad_offset, ad_checksum;
     869        2094 :     const block *  adp = (block *)ad;
     870        2094 :         unsigned i,k,tz,remaining;
     871             : 
     872        2094 :     ad_offset = ctx->ad_offset;
     873        2094 :     ad_checksum = ctx->ad_checksum;
     874        2094 :     i = ad_len/(BPI*16);
     875        2094 :     if (i) {
     876         258 :                 unsigned ad_block_num = ctx->ad_blocks_processed;
     877         956 :                 do {
     878         956 :                         block ta[BPI], oa[BPI];
     879         956 :                         ad_block_num += BPI;
     880         956 :                         tz = ntz(ad_block_num);
     881         956 :                         oa[0] = xor_block(ad_offset, ctx->L[0]);
     882         956 :                         ta[0] = xor_block(oa[0], adp[0]);
     883         956 :                         oa[1] = xor_block(oa[0], ctx->L[1]);
     884         956 :                         ta[1] = xor_block(oa[1], adp[1]);
     885         956 :                         oa[2] = xor_block(ad_offset, ctx->L[1]);
     886         956 :                         ta[2] = xor_block(oa[2], adp[2]);
     887             :                         #if BPI == 4
     888         956 :                                 ad_offset = xor_block(oa[2], getL(ctx, tz));
     889         956 :                                 ta[3] = xor_block(ad_offset, adp[3]);
     890             :                         #elif BPI == 8
     891             :                                 oa[3] = xor_block(oa[2], ctx->L[2]);
     892             :                                 ta[3] = xor_block(oa[3], adp[3]);
     893             :                                 oa[4] = xor_block(oa[1], ctx->L[2]);
     894             :                                 ta[4] = xor_block(oa[4], adp[4]);
     895             :                                 oa[5] = xor_block(oa[0], ctx->L[2]);
     896             :                                 ta[5] = xor_block(oa[5], adp[5]);
     897             :                                 oa[6] = xor_block(ad_offset, ctx->L[2]);
     898             :                                 ta[6] = xor_block(oa[6], adp[6]);
     899             :                                 ad_offset = xor_block(oa[6], getL(ctx, tz));
     900             :                                 ta[7] = xor_block(ad_offset, adp[7]);
     901             :                         #endif
     902         956 :                         AES_ecb_encrypt_blks(ta,BPI,&ctx->encrypt_key);
     903         956 :                         ad_checksum = xor_block(ad_checksum, ta[0]);
     904         956 :                         ad_checksum = xor_block(ad_checksum, ta[1]);
     905         956 :                         ad_checksum = xor_block(ad_checksum, ta[2]);
     906         956 :                         ad_checksum = xor_block(ad_checksum, ta[3]);
     907             :                         #if (BPI == 8)
     908             :                         ad_checksum = xor_block(ad_checksum, ta[4]);
     909             :                         ad_checksum = xor_block(ad_checksum, ta[5]);
     910             :                         ad_checksum = xor_block(ad_checksum, ta[6]);
     911             :                         ad_checksum = xor_block(ad_checksum, ta[7]);
     912             :                         #endif
     913         956 :                         adp += BPI;
     914         956 :                 } while (--i);
     915         258 :                 ctx->ad_blocks_processed = ad_block_num;
     916         258 :                 ctx->ad_offset = ad_offset;
     917         258 :                 ctx->ad_checksum = ad_checksum;
     918             :         }
     919             : 
     920        2094 :     if (final) {
     921        2094 :                 block ta[BPI];
     922             : 
     923             :         /* Process remaining associated data, compute its tag contribution */
     924        2094 :         remaining = ((unsigned)ad_len) % (BPI*16);
     925        2094 :         if (remaining) {
     926        2088 :                         k=0;
     927             :                         #if (BPI == 8)
     928             :                         if (remaining >= 64) {
     929             :                                 tmp.bl = xor_block(ad_offset, ctx->L[0]);
     930             :                                 ta[0] = xor_block(tmp.bl, adp[0]);
     931             :                                 tmp.bl = xor_block(tmp.bl, ctx->L[1]);
     932             :                                 ta[1] = xor_block(tmp.bl, adp[1]);
     933             :                                 ad_offset = xor_block(ad_offset, ctx->L[1]);
     934             :                                 ta[2] = xor_block(ad_offset, adp[2]);
     935             :                                 ad_offset = xor_block(ad_offset, ctx->L[2]);
     936             :                                 ta[3] = xor_block(ad_offset, adp[3]);
     937             :                                 remaining -= 64;
     938             :                                 k=4;
     939             :                         }
     940             :                         #endif
     941        2088 :                         if (remaining >= 32) {
     942         784 :                                 ad_offset = xor_block(ad_offset, ctx->L[0]);
     943         784 :                                 ta[k] = xor_block(ad_offset, adp[k]);
     944         784 :                                 ad_offset = xor_block(ad_offset, getL(ctx, ntz(k+2)));
     945         784 :                                 ta[k+1] = xor_block(ad_offset, adp[k+1]);
     946         784 :                                 remaining -= 32;
     947         784 :                                 k+=2;
     948             :                         }
     949        2088 :                         if (remaining >= 16) {
     950        1048 :                                 ad_offset = xor_block(ad_offset, ctx->L[0]);
     951        1048 :                                 ta[k] = xor_block(ad_offset, adp[k]);
     952        1048 :                                 remaining = remaining - 16;
     953        1048 :                                 ++k;
     954             :                         }
     955        2088 :                         if (remaining) {
     956        1536 :                                 ad_offset = xor_block(ad_offset,ctx->Lstar);
     957        1536 :                                 tmp.bl = zero_block();
     958        1536 :                                 memcpy(tmp.u8, adp+k, remaining);
     959        1536 :                                 tmp.u8[remaining] = (unsigned char)0x80u;
     960        1536 :                                 ta[k] = xor_block(ad_offset, tmp.bl);
     961        1536 :                                 ++k;
     962             :                         }
     963        2088 :                         AES_ecb_encrypt_blks(ta,k,&ctx->encrypt_key);
     964        2088 :                         switch (k) {
     965             :                                 #if (BPI == 8)
     966             :                                 case 8: ad_checksum = xor_block(ad_checksum, ta[7]);
     967             :                                         /* fallthrough */
     968             :                                 case 7: ad_checksum = xor_block(ad_checksum, ta[6]);
     969             :                                         /* fallthrough */
     970             :                                 case 6: ad_checksum = xor_block(ad_checksum, ta[5]);
     971             :                                         /* fallthrough */
     972             :                                 case 5: ad_checksum = xor_block(ad_checksum, ta[4]);
     973             :                                         /* fallthrough */
     974             :                                 #endif
     975         120 :                                 case 4: ad_checksum = xor_block(ad_checksum, ta[3]);
     976             :                                         /* fallthrough */
     977         512 :                                 case 3: ad_checksum = xor_block(ad_checksum, ta[2]);
     978             :                                         /* fallthrough */
     979        1432 :                                 case 2: ad_checksum = xor_block(ad_checksum, ta[1]);
     980             :                                         /* fallthrough */
     981        2088 :                                 case 1: ad_checksum = xor_block(ad_checksum, ta[0]);
     982             :                         }
     983        2088 :                         ctx->ad_checksum = ad_checksum;
     984             :                 }
     985             :         }
     986        2094 : }
     987             : 
     988             : /* ----------------------------------------------------------------------- */
     989             : 
     990       38913 : int ae_encrypt(ae_ctx     *  ctx,
     991             :                const void *  nonce,
     992             :                const void *pt,
     993             :                int         pt_len,
     994             :                const void *ad,
     995             :                int         ad_len,
     996             :                void       *ct,
     997             :                void       *tag,
     998             :                int         final)
     999             : {
    1000       38913 :         union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp;
    1001       38913 :     block offset, checksum;
    1002       38913 :     unsigned i, k;
    1003       38913 :     block       * ctp = (block *)ct;
    1004       38913 :     const block * ptp = (block *)pt;
    1005             : 
    1006             :     /* Non-null nonce means start of new message, init per-message values */
    1007       38913 :     if (nonce) {
    1008       38913 :         ctx->offset = gen_offset_from_nonce(ctx, nonce);
    1009       38913 :         ctx->ad_offset = ctx->checksum   = zero_block();
    1010       38913 :         ctx->ad_blocks_processed = ctx->blocks_processed    = 0;
    1011       38913 :         if (ad_len >= 0)
    1012       38913 :                 ctx->ad_checksum = zero_block();
    1013             :     }
    1014             : 
    1015             :         /* Process associated data */
    1016       38913 :         if (ad_len > 0)
    1017         534 :                 process_ad(ctx, ad, ad_len, final);
    1018             : 
    1019             :         /* Encrypt plaintext data BPI blocks at a time */
    1020       38913 :     offset = ctx->offset;
    1021       38913 :     checksum  = ctx->checksum;
    1022       38913 :     i = pt_len/(BPI*16);
    1023       38913 :     if (i) {
    1024       34378 :         block oa[BPI];
    1025       34378 :         unsigned block_num = ctx->blocks_processed;
    1026       34378 :         oa[BPI-1] = offset;
    1027      508062 :                 do {
    1028      508062 :                         block ta[BPI];
    1029      508062 :                         block_num += BPI;
    1030      508062 :                         oa[0] = xor_block(oa[BPI-1], ctx->L[0]);
    1031      508062 :                         ta[0] = xor_block(oa[0], ptp[0]);
    1032      508062 :                         checksum = xor_block(checksum, ptp[0]);
    1033      508062 :                         oa[1] = xor_block(oa[0], ctx->L[1]);
    1034      508062 :                         ta[1] = xor_block(oa[1], ptp[1]);
    1035      508062 :                         checksum = xor_block(checksum, ptp[1]);
    1036      508062 :                         oa[2] = xor_block(oa[1], ctx->L[0]);
    1037      508062 :                         ta[2] = xor_block(oa[2], ptp[2]);
    1038      508062 :                         checksum = xor_block(checksum, ptp[2]);
    1039             :                         #if BPI == 4
    1040      508062 :                                 oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
    1041      508062 :                                 ta[3] = xor_block(oa[3], ptp[3]);
    1042      508062 :                                 checksum = xor_block(checksum, ptp[3]);
    1043             :                         #elif BPI == 8
    1044             :                                 oa[3] = xor_block(oa[2], ctx->L[2]);
    1045             :                                 ta[3] = xor_block(oa[3], ptp[3]);
    1046             :                                 checksum = xor_block(checksum, ptp[3]);
    1047             :                                 oa[4] = xor_block(oa[1], ctx->L[2]);
    1048             :                                 ta[4] = xor_block(oa[4], ptp[4]);
    1049             :                                 checksum = xor_block(checksum, ptp[4]);
    1050             :                                 oa[5] = xor_block(oa[0], ctx->L[2]);
    1051             :                                 ta[5] = xor_block(oa[5], ptp[5]);
    1052             :                                 checksum = xor_block(checksum, ptp[5]);
    1053             :                                 oa[6] = xor_block(oa[7], ctx->L[2]);
    1054             :                                 ta[6] = xor_block(oa[6], ptp[6]);
    1055             :                                 checksum = xor_block(checksum, ptp[6]);
    1056             :                                 oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
    1057             :                                 ta[7] = xor_block(oa[7], ptp[7]);
    1058             :                                 checksum = xor_block(checksum, ptp[7]);
    1059             :                         #endif
    1060      508062 :                         AES_ecb_encrypt_blks(ta,BPI,&ctx->encrypt_key);
    1061      508062 :                         ctp[0] = xor_block(ta[0], oa[0]);
    1062      508062 :                         ctp[1] = xor_block(ta[1], oa[1]);
    1063      508062 :                         ctp[2] = xor_block(ta[2], oa[2]);
    1064      508062 :                         ctp[3] = xor_block(ta[3], oa[3]);
    1065             :                         #if (BPI == 8)
    1066             :                         ctp[4] = xor_block(ta[4], oa[4]);
    1067             :                         ctp[5] = xor_block(ta[5], oa[5]);
    1068             :                         ctp[6] = xor_block(ta[6], oa[6]);
    1069             :                         ctp[7] = xor_block(ta[7], oa[7]);
    1070             :                         #endif
    1071      508062 :                         ptp += BPI;
    1072      508062 :                         ctp += BPI;
    1073      508062 :                 } while (--i);
    1074       34378 :         ctx->offset = offset = oa[BPI-1];
    1075       34378 :             ctx->blocks_processed = block_num;
    1076       34378 :                 ctx->checksum = checksum;
    1077             :     }
    1078             : 
    1079       38913 :     if (final) {
    1080       38913 :                 block ta[BPI+1], oa[BPI];
    1081             : 
    1082             :         /* Process remaining plaintext and compute its tag contribution    */
    1083       38913 :         unsigned remaining = ((unsigned)pt_len) % (BPI*16);
    1084       38913 :         k = 0;                      /* How many blocks in ta[] need ECBing */
    1085       38913 :         if (remaining) {
    1086             :                         #if (BPI == 8)
    1087             :                         if (remaining >= 64) {
    1088             :                                 oa[0] = xor_block(offset, ctx->L[0]);
    1089             :                                 ta[0] = xor_block(oa[0], ptp[0]);
    1090             :                                 checksum = xor_block(checksum, ptp[0]);
    1091             :                                 oa[1] = xor_block(oa[0], ctx->L[1]);
    1092             :                                 ta[1] = xor_block(oa[1], ptp[1]);
    1093             :                                 checksum = xor_block(checksum, ptp[1]);
    1094             :                                 oa[2] = xor_block(oa[1], ctx->L[0]);
    1095             :                                 ta[2] = xor_block(oa[2], ptp[2]);
    1096             :                                 checksum = xor_block(checksum, ptp[2]);
    1097             :                                 offset = oa[3] = xor_block(oa[2], ctx->L[2]);
    1098             :                                 ta[3] = xor_block(offset, ptp[3]);
    1099             :                                 checksum = xor_block(checksum, ptp[3]);
    1100             :                                 remaining -= 64;
    1101             :                                 k = 4;
    1102             :                         }
    1103             :                         #endif
    1104       38008 :                         if (remaining >= 32) {
    1105       19783 :                                 oa[k] = xor_block(offset, ctx->L[0]);
    1106       19783 :                                 ta[k] = xor_block(oa[k], ptp[k]);
    1107       19783 :                                 checksum = xor_block(checksum, ptp[k]);
    1108       19783 :                                 offset = oa[k+1] = xor_block(oa[k], ctx->L[1]);
    1109       19783 :                                 ta[k+1] = xor_block(offset, ptp[k+1]);
    1110       19783 :                                 checksum = xor_block(checksum, ptp[k+1]);
    1111       19783 :                                 remaining -= 32;
    1112       19783 :                                 k+=2;
    1113             :                         }
    1114       38008 :                         if (remaining >= 16) {
    1115       19231 :                                 offset = oa[k] = xor_block(offset, ctx->L[0]);
    1116       19231 :                                 ta[k] = xor_block(offset, ptp[k]);
    1117       19231 :                                 checksum = xor_block(checksum, ptp[k]);
    1118       19231 :                                 remaining -= 16;
    1119       19231 :                                 ++k;
    1120             :                         }
    1121       38008 :                         if (remaining) {
    1122       36113 :                                 tmp.bl = zero_block();
    1123       36113 :                                 memcpy(tmp.u8, ptp+k, remaining);
    1124       36113 :                                 tmp.u8[remaining] = (unsigned char)0x80u;
    1125       36113 :                                 checksum = xor_block(checksum, tmp.bl);
    1126       36113 :                                 ta[k] = offset = xor_block(offset,ctx->Lstar);
    1127       36113 :                                 ++k;
    1128             :                         }
    1129             :                 }
    1130       38913 :         offset = xor_block(offset, ctx->Ldollar);      /* Part of tag gen */
    1131       38913 :         ta[k] = xor_block(offset, checksum);           /* Part of tag gen */
    1132       38913 :                 AES_ecb_encrypt_blks(ta,k+1,&ctx->encrypt_key);
    1133       38913 :                 offset = xor_block(ta[k], ctx->ad_checksum);   /* Part of tag gen */
    1134       38913 :                 if (remaining) {
    1135       36113 :                         --k;
    1136       36113 :                         tmp.bl = xor_block(tmp.bl, ta[k]);
    1137       36113 :                         memcpy(ctp+k, tmp.u8, remaining);
    1138             :                 }
    1139       38913 :                 switch (k) {
    1140             :                         #if (BPI == 8)
    1141             :                         case 7: ctp[6] = xor_block(ta[6], oa[6]);
    1142             :                                 /* fallthrough */
    1143             :                         case 6: ctp[5] = xor_block(ta[5], oa[5]);
    1144             :                                 /* fallthrough */
    1145             :                         case 5: ctp[4] = xor_block(ta[4], oa[4]);
    1146             :                                 /* fallthrough */
    1147             :                         case 4: ctp[3] = xor_block(ta[3], oa[3]);
    1148             :                                 /* fallthrough */
    1149             :                         #endif
    1150       10321 :                         case 3: ctp[2] = xor_block(ta[2], oa[2]);
    1151             :                                 /* fallthrough */
    1152       19783 :                         case 2: ctp[1] = xor_block(ta[1], oa[1]);
    1153             :                                 /* fallthrough */
    1154       28693 :                         case 1: ctp[0] = xor_block(ta[0], oa[0]);
    1155             :                 }
    1156             : 
    1157             :         /* Tag is placed at the correct location
    1158             :          */
    1159       38913 :         if (tag) {
    1160             :                         #if (OCB_TAG_LEN == 16)
    1161           0 :                 *(block *)tag = offset;
    1162             :                         #elif (OCB_TAG_LEN > 0)
    1163             :                     memcpy((char *)tag, &offset, OCB_TAG_LEN);
    1164             :                         #else
    1165             :                     memcpy((char *)tag, &offset, ctx->tag_len);
    1166             :                 #endif
    1167             :         } else {
    1168             :                         #if (OCB_TAG_LEN > 0)
    1169       38913 :                     memcpy((char *)ct + pt_len, &offset, OCB_TAG_LEN);
    1170       38913 :                 pt_len += OCB_TAG_LEN;
    1171             :                         #else
    1172             :                     memcpy((char *)ct + pt_len, &offset, ctx->tag_len);
    1173             :                 pt_len += ctx->tag_len;
    1174             :                 #endif
    1175             :         }
    1176             :     }
    1177       38913 :     return (int) pt_len;
    1178             : }
    1179             : 
    1180             : /* ----------------------------------------------------------------------- */
    1181             : 
    1182             : /* Compare two regions of memory, taking a constant amount of time for a
    1183             :    given buffer size -- under certain assumptions about the compiler
    1184             :    and machine, of course.
    1185             : 
    1186             :    Use this to avoid timing side-channel attacks.
    1187             : 
    1188             :    Returns 0 for memory regions with equal contents; non-zero otherwise. */
    1189       42796 : static int constant_time_memcmp(const void *av, const void *bv, size_t n) {
    1190       42796 :     const uint8_t *a = (const uint8_t *) av;
    1191       42796 :     const uint8_t *b = (const uint8_t *) bv;
    1192       42796 :     uint8_t result = 0;
    1193       42796 :     size_t i;
    1194             : 
    1195      727532 :     for (i=0; i<n; i++) {
    1196      684736 :         result |= *a ^ *b;
    1197      684736 :         a++;
    1198      684736 :         b++;
    1199             :     }
    1200             : 
    1201       42796 :     return (int) result;
    1202             : }
    1203             : 
    1204       42796 : int ae_decrypt(ae_ctx     *ctx,
    1205             :                const void *nonce,
    1206             :                const void *ct,
    1207             :                int         ct_len,
    1208             :                const void *ad,
    1209             :                int         ad_len,
    1210             :                void       *pt,
    1211             :                const void *tag,
    1212             :                int         final)
    1213             : {
    1214       42796 :         union { uint32_t u32[4]; uint8_t u8[16]; block bl; } tmp;
    1215       42796 :     block offset, checksum;
    1216       42796 :     unsigned i, k;
    1217       42796 :     block       *ctp = (block *)ct;
    1218       42796 :     block       *ptp = (block *)pt;
    1219             : 
    1220             :         /* Reduce ct_len tag bundled in ct */
    1221       42796 :         if ((final) && (!tag))
    1222             :                 #if (OCB_TAG_LEN > 0)
    1223       42796 :                         ct_len -= OCB_TAG_LEN;
    1224             :                 #else
    1225             :                         ct_len -= ctx->tag_len;
    1226             :                 #endif
    1227             : 
    1228             :     /* Non-null nonce means start of new message, init per-message values */
    1229       42796 :     if (nonce) {
    1230       42796 :         ctx->offset = gen_offset_from_nonce(ctx, nonce);
    1231       42796 :         ctx->ad_offset = ctx->checksum   = zero_block();
    1232       42796 :         ctx->ad_blocks_processed = ctx->blocks_processed    = 0;
    1233       42796 :         if (ad_len >= 0)
    1234       42796 :                 ctx->ad_checksum = zero_block();
    1235             :     }
    1236             : 
    1237             :         /* Process associated data */
    1238       42796 :         if (ad_len > 0)
    1239        1560 :                 process_ad(ctx, ad, ad_len, final);
    1240             : 
    1241             :         /* Encrypt plaintext data BPI blocks at a time */
    1242       42796 :     offset = ctx->offset;
    1243       42796 :     checksum  = ctx->checksum;
    1244       42796 :     i = ct_len/(BPI*16);
    1245       42796 :     if (i) {
    1246       36338 :         block oa[BPI];
    1247       36338 :         unsigned block_num = ctx->blocks_processed;
    1248       36338 :         oa[BPI-1] = offset;
    1249      540461 :                 do {
    1250      540461 :                         block ta[BPI];
    1251      540461 :                         block_num += BPI;
    1252      540461 :                         oa[0] = xor_block(oa[BPI-1], ctx->L[0]);
    1253      540461 :                         ta[0] = xor_block(oa[0], ctp[0]);
    1254      540461 :                         oa[1] = xor_block(oa[0], ctx->L[1]);
    1255      540461 :                         ta[1] = xor_block(oa[1], ctp[1]);
    1256      540461 :                         oa[2] = xor_block(oa[1], ctx->L[0]);
    1257      540461 :                         ta[2] = xor_block(oa[2], ctp[2]);
    1258             :                         #if BPI == 4
    1259      540461 :                                 oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
    1260      540461 :                                 ta[3] = xor_block(oa[3], ctp[3]);
    1261             :                         #elif BPI == 8
    1262             :                                 oa[3] = xor_block(oa[2], ctx->L[2]);
    1263             :                                 ta[3] = xor_block(oa[3], ctp[3]);
    1264             :                                 oa[4] = xor_block(oa[1], ctx->L[2]);
    1265             :                                 ta[4] = xor_block(oa[4], ctp[4]);
    1266             :                                 oa[5] = xor_block(oa[0], ctx->L[2]);
    1267             :                                 ta[5] = xor_block(oa[5], ctp[5]);
    1268             :                                 oa[6] = xor_block(oa[7], ctx->L[2]);
    1269             :                                 ta[6] = xor_block(oa[6], ctp[6]);
    1270             :                                 oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
    1271             :                                 ta[7] = xor_block(oa[7], ctp[7]);
    1272             :                         #endif
    1273      540461 :                         AES_ecb_decrypt_blks(ta,BPI,&ctx->decrypt_key);
    1274      540461 :                         ptp[0] = xor_block(ta[0], oa[0]);
    1275      540461 :                         checksum = xor_block(checksum, ptp[0]);
    1276      540461 :                         ptp[1] = xor_block(ta[1], oa[1]);
    1277      540461 :                         checksum = xor_block(checksum, ptp[1]);
    1278      540461 :                         ptp[2] = xor_block(ta[2], oa[2]);
    1279      540461 :                         checksum = xor_block(checksum, ptp[2]);
    1280      540461 :                         ptp[3] = xor_block(ta[3], oa[3]);
    1281      540461 :                         checksum = xor_block(checksum, ptp[3]);
    1282             :                         #if (BPI == 8)
    1283             :                         ptp[4] = xor_block(ta[4], oa[4]);
    1284             :                         checksum = xor_block(checksum, ptp[4]);
    1285             :                         ptp[5] = xor_block(ta[5], oa[5]);
    1286             :                         checksum = xor_block(checksum, ptp[5]);
    1287             :                         ptp[6] = xor_block(ta[6], oa[6]);
    1288             :                         checksum = xor_block(checksum, ptp[6]);
    1289             :                         ptp[7] = xor_block(ta[7], oa[7]);
    1290             :                         checksum = xor_block(checksum, ptp[7]);
    1291             :                         #endif
    1292      540461 :                         ptp += BPI;
    1293      540461 :                         ctp += BPI;
    1294      540461 :                 } while (--i);
    1295       36338 :         ctx->offset = offset = oa[BPI-1];
    1296       36338 :             ctx->blocks_processed = block_num;
    1297       36338 :                 ctx->checksum = checksum;
    1298             :     }
    1299             : 
    1300       42796 :     if (final) {
    1301       42796 :                 block ta[BPI+1], oa[BPI];
    1302             : 
    1303             :         /* Process remaining plaintext and compute its tag contribution    */
    1304       42796 :         unsigned remaining = ((unsigned)ct_len) % (BPI*16);
    1305       42796 :         k = 0;                      /* How many blocks in ta[] need ECBing */
    1306       42796 :         if (remaining) {
    1307             :                         #if (BPI == 8)
    1308             :                         if (remaining >= 64) {
    1309             :                                 oa[0] = xor_block(offset, ctx->L[0]);
    1310             :                                 ta[0] = xor_block(oa[0], ctp[0]);
    1311             :                                 oa[1] = xor_block(oa[0], ctx->L[1]);
    1312             :                                 ta[1] = xor_block(oa[1], ctp[1]);
    1313             :                                 oa[2] = xor_block(oa[1], ctx->L[0]);
    1314             :                                 ta[2] = xor_block(oa[2], ctp[2]);
    1315             :                                 offset = oa[3] = xor_block(oa[2], ctx->L[2]);
    1316             :                                 ta[3] = xor_block(offset, ctp[3]);
    1317             :                                 remaining -= 64;
    1318             :                                 k = 4;
    1319             :                         }
    1320             :                         #endif
    1321       41105 :                         if (remaining >= 32) {
    1322       21131 :                                 oa[k] = xor_block(offset, ctx->L[0]);
    1323       21131 :                                 ta[k] = xor_block(oa[k], ctp[k]);
    1324       21131 :                                 offset = oa[k+1] = xor_block(oa[k], ctx->L[1]);
    1325       21131 :                                 ta[k+1] = xor_block(offset, ctp[k+1]);
    1326       21131 :                                 remaining -= 32;
    1327       21131 :                                 k+=2;
    1328             :                         }
    1329       41105 :                         if (remaining >= 16) {
    1330       20894 :                                 offset = oa[k] = xor_block(offset, ctx->L[0]);
    1331       20894 :                                 ta[k] = xor_block(offset, ctp[k]);
    1332       20894 :                                 remaining -= 16;
    1333       20894 :                                 ++k;
    1334             :                         }
    1335       41105 :                         if (remaining) {
    1336       38636 :                                 block pad;
    1337       38636 :                                 offset = xor_block(offset,ctx->Lstar);
    1338       38636 :                                 AES_encrypt((unsigned char *)&offset, tmp.u8, &ctx->encrypt_key);
    1339       38636 :                                 pad = tmp.bl;
    1340       38636 :                                 memcpy(tmp.u8,ctp+k,remaining);
    1341       38636 :                                 tmp.bl = xor_block(tmp.bl, pad);
    1342       38636 :                                 tmp.u8[remaining] = (unsigned char)0x80u;
    1343       38636 :                                 memcpy(ptp+k, tmp.u8, remaining);
    1344       38636 :                                 checksum = xor_block(checksum, tmp.bl);
    1345             :                         }
    1346             :                 }
    1347       42796 :                 AES_ecb_decrypt_blks(ta,k,&ctx->decrypt_key);
    1348       42796 :                 switch (k) {
    1349             :                         #if (BPI == 8)
    1350             :                         case 7: ptp[6] = xor_block(ta[6], oa[6]);
    1351             :                                     checksum = xor_block(checksum, ptp[6]);
    1352             :                                     /* fallthrough */
    1353             :                         case 6: ptp[5] = xor_block(ta[5], oa[5]);
    1354             :                                     checksum = xor_block(checksum, ptp[5]);
    1355             :                                     /* fallthrough */
    1356             :                         case 5: ptp[4] = xor_block(ta[4], oa[4]);
    1357             :                                     checksum = xor_block(checksum, ptp[4]);
    1358             :                                     /* fallthrough */
    1359             :                         case 4: ptp[3] = xor_block(ta[3], oa[3]);
    1360             :                                     checksum = xor_block(checksum, ptp[3]);
    1361             :                                     /* fallthrough */
    1362             :                         #endif
    1363       10819 :                         case 3: ptp[2] = xor_block(ta[2], oa[2]);
    1364       10819 :                                     checksum = xor_block(checksum, ptp[2]);
    1365             :                                     /* fallthrough */
    1366       21131 :                         case 2: ptp[1] = xor_block(ta[1], oa[1]);
    1367       21131 :                                     checksum = xor_block(checksum, ptp[1]);
    1368             :                                     /* fallthrough */
    1369       31206 :                         case 1: ptp[0] = xor_block(ta[0], oa[0]);
    1370       31206 :                                     checksum = xor_block(checksum, ptp[0]);
    1371             :                 }
    1372             : 
    1373             :                 /* Calculate expected tag */
    1374       42796 :         offset = xor_block(offset, ctx->Ldollar);
    1375       42796 :         tmp.bl = xor_block(offset, checksum);
    1376       42796 :                 AES_encrypt(tmp.u8, tmp.u8, &ctx->encrypt_key);
    1377       42796 :                 tmp.bl = xor_block(tmp.bl, ctx->ad_checksum); /* Full tag */
    1378             : 
    1379             :                 /* Compare with proposed tag, change ct_len if invalid */
    1380       42796 :                 if ((OCB_TAG_LEN == 16) && tag) {
    1381           0 :                         if (unequal_blocks(tmp.bl, *(block *)tag))
    1382           0 :                                 ct_len = AE_INVALID;
    1383             :                 } else {
    1384             :                         #if (OCB_TAG_LEN > 0)
    1385       42796 :                                 int len = OCB_TAG_LEN;
    1386             :                         #else
    1387             :                                 int len = ctx->tag_len;
    1388             :                         #endif
    1389       42796 :                         if (tag) {
    1390             :                                 if (constant_time_memcmp(tag,tmp.u8,len) != 0)
    1391             :                                         ct_len = AE_INVALID;
    1392             :                         } else {
    1393       85592 :                                 if (constant_time_memcmp((char *)ct + ct_len,tmp.u8,len) != 0)
    1394        4701 :                                         ct_len = AE_INVALID;
    1395             :                         }
    1396             :                 }
    1397             :     }
    1398       42796 :     return ct_len;
    1399             :  }
    1400             : 
    1401             : /* ----------------------------------------------------------------------- */
    1402             : /* Simple test program                                                     */
    1403             : /* ----------------------------------------------------------------------- */
    1404             : 
    1405             : #if defined(OCB_TEST_PROGRAM)
    1406             : 
    1407             : #include <stdio.h>
    1408             : #include <time.h>
    1409             : 
    1410             : #if __GNUC__
    1411             :         #define ALIGN(n) __attribute__ ((aligned(n)))
    1412             : #elif _MSC_VER
    1413             :         #define ALIGN(n) __declspec(align(n))
    1414             : #else /* Not GNU/Microsoft: delete alignment uses.     */
    1415             :         #define ALIGN(n)
    1416             : #endif
    1417             : 
    1418             : static void pbuf(void *p, unsigned len, const void *s)
    1419             : {
    1420             :     unsigned i;
    1421             :     if (s)
    1422             :         printf("%s", (char *)s);
    1423             :     for (i = 0; i < len; i++)
    1424             :         printf("%02X", (unsigned)(((unsigned char *)p)[i]));
    1425             :     printf("\n");
    1426             : }
    1427             : 
    1428             : static void vectors(ae_ctx *ctx, int len)
    1429             : {
    1430             :     ALIGN(16) uint8_t pt[128];
    1431             :     ALIGN(16) uint8_t ct[144];
    1432             :     ALIGN(16) uint8_t nonce[] = {0,1,2,3,4,5,6,7,8,9,10,11};
    1433             :     int i;
    1434             :     for (i=0; i < 128; i++) pt[i] = i;
    1435             :     i = ae_encrypt(ctx,nonce,pt,len,pt,len,ct,NULL,AE_FINALIZE);
    1436             :     printf("P=%d,A=%d: ",len,len); pbuf(ct, i, NULL);
    1437             :     i = ae_encrypt(ctx,nonce,pt,0,pt,len,ct,NULL,AE_FINALIZE);
    1438             :     printf("P=%d,A=%d: ",0,len); pbuf(ct, i, NULL);
    1439             :     i = ae_encrypt(ctx,nonce,pt,len,pt,0,ct,NULL,AE_FINALIZE);
    1440             :     printf("P=%d,A=%d: ",len,0); pbuf(ct, i, NULL);
    1441             : }
    1442             : 
    1443             : static void validate()
    1444             : {
    1445             :     ALIGN(16) uint8_t pt[1024];
    1446             :     ALIGN(16) uint8_t ct[1024];
    1447             :     ALIGN(16) uint8_t tag[16];
    1448             :     ALIGN(16) uint8_t nonce[12] = {0,};
    1449             :     ALIGN(16) uint8_t key[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
    1450             :     ALIGN(16) uint8_t valid[] = {0xB2,0xB4,0x1C,0xBF,0x9B,0x05,0x03,0x7D,
    1451             :                                  0xA7,0xF1,0x6C,0x24,0xA3,0x5C,0x1C,0x94};
    1452             :     ALIGN(16) uint8_t val_buf[22400];
    1453             :     ae_ctx ctx;
    1454             :     uint8_t *next = val_buf;
    1455             :     int i, len;
    1456             : 
    1457             :     if (0) {
    1458             :                 ae_init(&ctx, key, 16, 12, 16);
    1459             :                 /* pbuf(&ctx, sizeof(ctx), "CTX: "); */
    1460             :                 vectors(&ctx,0);
    1461             :                 vectors(&ctx,8);
    1462             :                 vectors(&ctx,16);
    1463             :                 vectors(&ctx,24);
    1464             :                 vectors(&ctx,32);
    1465             :                 vectors(&ctx,40);
    1466             :     }
    1467             : 
    1468             :     memset(key,0,32);
    1469             :     memset(pt,0,128);
    1470             :     ae_init(&ctx, key, 16, 12, 16);
    1471             : 
    1472             :     /* RFC Vector test */
    1473             :     for (i = 0; i < 128; i++) {
    1474             :         int first = ((i/3)/(BPI*16))*(BPI*16);
    1475             :         int second = first;
    1476             :         int third = i - (first + second);
    1477             : 
    1478             :         nonce[11] = i;
    1479             : 
    1480             :         if (0) {
    1481             :             ae_encrypt(&ctx,nonce,pt,i,pt,i,ct,NULL,AE_FINALIZE);
    1482             :             memcpy(next,ct,(size_t)i+16);
    1483             :             next = next+i+16;
    1484             : 
    1485             :             ae_encrypt(&ctx,nonce,pt,i,pt,0,ct,NULL,AE_FINALIZE);
    1486             :             memcpy(next,ct,(size_t)i+16);
    1487             :             next = next+i+16;
    1488             : 
    1489             :             ae_encrypt(&ctx,nonce,pt,0,pt,i,ct,NULL,AE_FINALIZE);
    1490             :             memcpy(next,ct,16);
    1491             :             next = next+16;
    1492             :         } else {
    1493             :             ae_encrypt(&ctx,nonce,pt,first,pt,first,ct,NULL,AE_PENDING);
    1494             :             ae_encrypt(&ctx,NULL,pt+first,second,pt+first,second,ct+first,NULL,AE_PENDING);
    1495             :             ae_encrypt(&ctx,NULL,pt+first+second,third,pt+first+second,third,ct+first+second,NULL,AE_FINALIZE);
    1496             :             memcpy(next,ct,(size_t)i+16);
    1497             :             next = next+i+16;
    1498             : 
    1499             :             ae_encrypt(&ctx,nonce,pt,first,pt,0,ct,NULL,AE_PENDING);
    1500             :             ae_encrypt(&ctx,NULL,pt+first,second,pt,0,ct+first,NULL,AE_PENDING);
    1501             :             ae_encrypt(&ctx,NULL,pt+first+second,third,pt,0,ct+first+second,NULL,AE_FINALIZE);
    1502             :             memcpy(next,ct,(size_t)i+16);
    1503             :             next = next+i+16;
    1504             : 
    1505             :             ae_encrypt(&ctx,nonce,pt,0,pt,first,ct,NULL,AE_PENDING);
    1506             :             ae_encrypt(&ctx,NULL,pt,0,pt+first,second,ct,NULL,AE_PENDING);
    1507             :             ae_encrypt(&ctx,NULL,pt,0,pt+first+second,third,ct,NULL,AE_FINALIZE);
    1508             :             memcpy(next,ct,16);
    1509             :             next = next+16;
    1510             :         }
    1511             : 
    1512             :     }
    1513             :     nonce[11] = 0;
    1514             :     ae_encrypt(&ctx,nonce,NULL,0,val_buf,next-val_buf,ct,tag,AE_FINALIZE);
    1515             :     pbuf(tag,16,0);
    1516             :     if (memcmp(valid,tag,16) == 0)
    1517             :         printf("Vectors: PASS\n");
    1518             :     else
    1519             :         printf("Vectors: FAIL\n");
    1520             : 
    1521             : 
    1522             :     /* Encrypt/Decrypt test */
    1523             :     for (i = 0; i < 128; i++) {
    1524             :         int first = ((i/3)/(BPI*16))*(BPI*16);
    1525             :         int second = first;
    1526             :         int third = i - (first + second);
    1527             : 
    1528             :         nonce[11] = i%128;
    1529             : 
    1530             :         if (1) {
    1531             :             len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,tag,AE_FINALIZE);
    1532             :             len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,-1,ct,tag,AE_FINALIZE);
    1533             :             len = ae_decrypt(&ctx,nonce,ct,len,val_buf,-1,pt,tag,AE_FINALIZE);
    1534             :             if (len == -1) { printf("Authentication error: %d\n", i); return; }
    1535             :             if (len != i) { printf("Length error: %d\n", i); return; }
    1536             :             if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
    1537             :         } else {
    1538             :             len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,NULL,AE_FINALIZE);
    1539             :             ae_decrypt(&ctx,nonce,ct,first,val_buf,first,pt,NULL,AE_PENDING);
    1540             :             ae_decrypt(&ctx,NULL,ct+first,second,val_buf+first,second,pt+first,NULL,AE_PENDING);
    1541             :             len = ae_decrypt(&ctx,NULL,ct+first+second,len-(first+second),val_buf+first+second,third,pt+first+second,NULL,AE_FINALIZE);
    1542             :             if (len == -1) { printf("Authentication error: %d\n", i); return; }
    1543             :             if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
    1544             :         }
    1545             : 
    1546             :     }
    1547             :     printf("Decrypt: PASS\n");
    1548             : }
    1549             : 
    1550             : int main()
    1551             : {
    1552             :     validate();
    1553             :     return 0;
    1554             : }
    1555             : #endif
    1556             : 
    1557             : #if USE_AES_NI
    1558             : char infoString[] = "OCB3 (AES-NI)";
    1559             : #elif USE_REFERENCE_AES
    1560             : char infoString[] = "OCB3 (Reference)";
    1561             : #elif USE_OPENSSL_AES
    1562             : char infoString[] = "OCB3 (OpenSSL)";
    1563             : #endif

Generated by: LCOV version 1.14