gnutls-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] GNU gnutls branch, master, updated. gnutls_2_99_0-61-g065927b


From: Nikos Mavrogiannopoulos
Subject: [SCM] GNU gnutls branch, master, updated. gnutls_2_99_0-61-g065927b
Date: Sat, 16 Apr 2011 16:41:30 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU gnutls".

http://git.savannah.gnu.org/cgit/gnutls.git/commit/?id=065927be43c1aef5771c0b18b28b8ef9ccdc6eae

The branch, master has been updated
       via  065927be43c1aef5771c0b18b28b8ef9ccdc6eae (commit)
      from  d8aedd61d833fe71b3a0707c74b2c491d6dce14c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 065927be43c1aef5771c0b18b28b8ef9ccdc6eae
Author: Nikos Mavrogiannopoulos <address@hidden>
Date:   Sat Apr 16 18:39:29 2011 +0200

    Added Andy Polyakov's version of AES-NI optimizations.

-----------------------------------------------------------------------

Summary of changes:
 NEWS                                         |    2 +-
 configure.ac                                 |   16 +-
 lib/accelerated/intel/Makefile.am            |   15 +-
 lib/accelerated/intel/README                 |    4 +-
 lib/accelerated/intel/aes-x86.c              |  162 +-
 lib/accelerated/intel/asm/appro-aes-x86-64.s |  782 +++++++++
 lib/accelerated/intel/asm/appro-aes-x86.s    |  798 ++++++++++
 lib/accelerated/intel/asm/x64_iaesx64.s      | 2054 ------------------------
 lib/accelerated/intel/asm/x86_iaesx86.s      | 2183 --------------------------
 lib/accelerated/intel/iaes_asm_interface.h   |  126 --
 lib/accelerated/intel/iaesni.h               |  147 --
 lib/accelerated/intel/license.txt            |   77 +-
 12 files changed, 1710 insertions(+), 4656 deletions(-)
 create mode 100644 lib/accelerated/intel/asm/appro-aes-x86-64.s
 create mode 100644 lib/accelerated/intel/asm/appro-aes-x86.s
 delete mode 100755 lib/accelerated/intel/asm/x64_iaesx64.s
 delete mode 100755 lib/accelerated/intel/asm/x86_iaesx86.s
 delete mode 100755 lib/accelerated/intel/iaes_asm_interface.h
 delete mode 100755 lib/accelerated/intel/iaesni.h

diff --git a/NEWS b/NEWS
index db71fa7..2976d3c 100644
--- a/NEWS
+++ b/NEWS
@@ -6,7 +6,7 @@ See the end for copying conditions.
 * Version 2.99.1 (unreleased)
 
 ** libgnutls: Added support for AES-NI if detected. Uses
-Intel AES-NI code.
+Andy Polyakov's AES-NI code.
 
 ** libgnutls-extra: Dropped support of LZO compression via liblzo.
 
diff --git a/configure.ac b/configure.ac
index 5dc81a1..85fa87a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -36,6 +36,7 @@ AC_MSG_RESULT([***
 
 dnl Checks for programs.
 AC_PROG_CC
+AM_PROG_AS
 AC_PROG_CXX
 gl_EARLY
 
@@ -79,18 +80,13 @@ if test "$use_accel" != "no"; then
 case $host_cpu in
   i?86 | x86_64 | amd64)
     GCC_FLAG_ADD([-maes -mpclmul],[X86])
-    AC_CHECK_PROGS(YASM, yasm)
-
-    if test "x$YASM" != "x";then
-      if test "x$X86" = "xyes";then
-        if test "$host_cpu" = "x86_64" -o "$host_cpu" = "amd64";then
-          hw_accel="x86-64"
-        else
-          hw_accel="x86"
-        fi
+
+    if test "x$X86" = "xyes";then
+      if test "$host_cpu" = "x86_64" -o "$host_cpu" = "amd64";then
+        hw_accel="x86-64"
+      else
+        hw_accel="x86"
       fi
-    else
-      AC_MSG_WARN([[yasm assembler not found. Disabling AES-NI compilation.]])
     fi
   ;;
   *)
diff --git a/lib/accelerated/intel/Makefile.am 
b/lib/accelerated/intel/Makefile.am
index f57b098..5efe894 100644
--- a/lib/accelerated/intel/Makefile.am
+++ b/lib/accelerated/intel/Makefile.am
@@ -32,25 +32,16 @@ if ENABLE_MINITASN1
 AM_CPPFLAGS += -I$(srcdir)/../../minitasn1
 endif
 
-EXTRA_DIST = aes-x86.h README license.txt iaes_asm_interface.h iaesni.h \
-       asm/x64_iaesx64.s asm/x86_iaesx86.s
+EXTRA_DIST = aes-x86.h README license.txt
 
 noinst_LTLIBRARIES = libintel.la
 
 libintel_la_SOURCES = aes-x86.c
 libintel_la_LIBADD =
 
-YASM_OPTS = -D__linux__
-
-x64_iaesx64.o: asm/x64_iaesx64.s
-       $(YASM) $(YASM_OPTS) -f elf64 $^ -o $@
-
-x86_iaesx86.o: asm/x86_iaesx86.s
-       $(YASM) $(YASM_OPTS) -f elf32 $^ -o $@
-
 if ASM_X86_64
-libintel_la_LIBADD += x64_iaesx64.o
+libintel_la_SOURCES += asm/appro-aes-x86-64.s
 else
-libintel_la_LIBADD += x86_iaesx86.o
+libintel_la_SOURCES += asm/appro-aes-x86.s
 endif
 
diff --git a/lib/accelerated/intel/README b/lib/accelerated/intel/README
index 187242b..dc1f43f 100644
--- a/lib/accelerated/intel/README
+++ b/lib/accelerated/intel/README
@@ -1,3 +1,3 @@
-The Intel AES library is not part of the GnuTLS library, but is used with
-GnuTLS. The license of Intel AES library is included in license.txt.
+The AES-NI implementation by Andy Polyakov is not part of the GnuTLS library, 
but is 
+used with GnuTLS. Its license is included in license.txt.
 
diff --git a/lib/accelerated/intel/aes-x86.c b/lib/accelerated/intel/aes-x86.c
index df4a49b..f1a24aa 100644
--- a/lib/accelerated/intel/aes-x86.c
+++ b/lib/accelerated/intel/aes-x86.c
@@ -31,33 +31,44 @@
 #include <gnutls_errors.h>
 #include <aes-x86.h>
 #include <x86.h>
-#include "iaes_asm_interface.h"
 
 #ifdef __GNUC__
-# define ALIGN16 __attribute__ ((aligned (16))) 
+# define ALIGN16 __attribute__ ((aligned (16)))
 #else
 # define ALIGN16
 #endif
 
-typedef void (*enc_func)(sAesData*);
-
-struct aes_ctx {
-       uint8_t ALIGN16 expanded_key[16*16];
-       uint8_t ALIGN16 expanded_key_dec[16*16];
-       uint8_t iv[16];
-       enc_func enc;
-       enc_func dec;
-       size_t keysize;
+#define AES_MAXNR 14
+typedef struct
+{
+  uint32_t ALIGN16 rd_key[4 * (AES_MAXNR + 1)];
+  int rounds;
+} AES_KEY;
+
+void aesni_cbc_encrypt (const unsigned char *in, unsigned char *out,
+                        size_t len, const AES_KEY * key,
+                        unsigned char *ivec, const int enc);
+int aesni_set_decrypt_key (const unsigned char *userKey, const int bits,
+                           AES_KEY * key);
+int aesni_set_encrypt_key (const unsigned char *userKey, const int bits,
+                           AES_KEY * key);
+
+struct aes_ctx
+{
+  AES_KEY expanded_key;
+  AES_KEY expanded_key_dec;
+  uint8_t iv[16];
 };
 
 static int
 aes_cipher_init (gnutls_cipher_algorithm_t algorithm, void **_ctx)
 {
   struct aes_ctx *ctx;
-  
+
   /* we use key size to distinguish */
-  if (algorithm != GNUTLS_CIPHER_AES_128_CBC && algorithm != 
GNUTLS_CIPHER_AES_192_CBC 
-    && algorithm != GNUTLS_CIPHER_AES_256_CBC)
+  if (algorithm != GNUTLS_CIPHER_AES_128_CBC
+      && algorithm != GNUTLS_CIPHER_AES_192_CBC
+      && algorithm != GNUTLS_CIPHER_AES_256_CBC)
     return GNUTLS_E_INVALID_REQUEST;
 
   *_ctx = gnutls_calloc (1, sizeof (struct aes_ctx));
@@ -75,33 +86,16 @@ aes_cipher_init (gnutls_cipher_algorithm_t algorithm, void 
**_ctx)
 static int
 aes_cipher_setkey (void *_ctx, const void *userkey, size_t keysize)
 {
-struct aes_ctx *ctx = _ctx;
+  struct aes_ctx *ctx = _ctx;
+  int ret;
 
-  if (keysize == 128/8)
-    {
-      iEncExpandKey128((void*)userkey, ctx->expanded_key);
-      iDecExpandKey128((void*)userkey, ctx->expanded_key_dec);
-      ctx->enc = iEnc128_CBC;
-      ctx->dec = iDec128_CBC;
-    }
-  else if (keysize == 192/8)
-    {
-      iEncExpandKey192((void*)userkey, ctx->expanded_key);
-      iDecExpandKey192((void*)userkey, ctx->expanded_key_dec);
-      ctx->enc = iEnc192_CBC;
-      ctx->dec = iDec192_CBC;
-    }
-  else if (keysize == 256/8)
-    {
-      iEncExpandKey256((void*)userkey, ctx->expanded_key);
-      iDecExpandKey256((void*)userkey, ctx->expanded_key_dec);
-      ctx->enc = iEnc256_CBC;
-      ctx->dec = iDec256_CBC;
-    }
-  else
-    return gnutls_assert_val(GNUTLS_E_INVALID_REQUEST);
+  ret = aesni_set_encrypt_key (userkey, keysize * 8, &ctx->expanded_key);
+  if (ret != 0)
+    return gnutls_assert_val (GNUTLS_E_ENCRYPTION_FAILED);
 
-  ctx->keysize = keysize;
+  ret = aesni_set_decrypt_key (userkey, keysize * 8, &ctx->expanded_key_dec);
+  if (ret != 0)
+    return gnutls_assert_val (GNUTLS_E_ENCRYPTION_FAILED);
 
   return 0;
 }
@@ -117,36 +111,22 @@ aes_setiv (void *_ctx, const void *iv, size_t iv_size)
 
 static int
 aes_encrypt (void *_ctx, const void *plain, size_t plainsize,
-                   void *encr, size_t length)
+             void *encr, size_t length)
 {
-struct aes_ctx *ctx = _ctx;
-sAesData aesData;
-  
-  aesData.iv = ctx->iv;
-  aesData.in_block = (void*)plain;
-  aesData.out_block = encr;
-  aesData.expanded_key = ctx->expanded_key;
-  aesData.num_blocks = (plainsize + 1) / 16;
-
-  ctx->enc(&aesData);
+  struct aes_ctx *ctx = _ctx;
 
+  aesni_cbc_encrypt (plain, encr, plainsize, &ctx->expanded_key, ctx->iv, 1);
   return 0;
 }
 
 static int
 aes_decrypt (void *_ctx, const void *encr, size_t encrsize,
-                   void *plain, size_t length)
+             void *plain, size_t length)
 {
-struct aes_ctx *ctx = _ctx;
-sAesData aesData;
-
-  aesData.iv = ctx->iv;
-  aesData.in_block = (void*)encr;
-  aesData.out_block = plain;
-  aesData.expanded_key = ctx->expanded_key_dec;
-  aesData.num_blocks = (encrsize + 1) / 16;
+  struct aes_ctx *ctx = _ctx;
 
-  ctx->dec(&aesData);
+  aesni_cbc_encrypt (encr, plain, encrsize,
+                     &ctx->expanded_key_dec, ctx->iv, 0);
 
   return 0;
 }
@@ -166,38 +146,46 @@ static const gnutls_crypto_cipher_st cipher_struct = {
   .deinit = aes_deinit,
 };
 
-static unsigned check_optimized_aes(void)
+static unsigned
+check_optimized_aes (void)
 {
-unsigned int a,b,c,d;
-  cpuid(1, a,b,c,d);
-  
+  unsigned int a, b, c, d;
+  cpuid (1, a, b, c, d);
+
   return (c & 0x2000000);
 }
 
 void
 register_x86_crypto (void)
 {
-int ret;
-       if (check_optimized_aes()) {
-               fprintf(stderr, "Intel AES accelerator was detected\n");
-               ret = gnutls_crypto_single_cipher_register 
(GNUTLS_CIPHER_AES_128_CBC, 80, &cipher_struct);
-               if (ret < 0)
-               {
-                 gnutls_assert ();
-               }
-
-               ret = gnutls_crypto_single_cipher_register 
(GNUTLS_CIPHER_AES_192_CBC, 80, &cipher_struct);
-               if (ret < 0)
-               {
-                 gnutls_assert ();
-               }
-
-               ret = gnutls_crypto_single_cipher_register 
(GNUTLS_CIPHER_AES_256_CBC, 80, &cipher_struct);
-               if (ret < 0)
-               {
-                 gnutls_assert ();
-               }
-       }
-
-       return;
+  int ret;
+  if (check_optimized_aes ())
+    {
+      fprintf (stderr, "Intel AES accelerator was detected\n");
+      ret =
+        gnutls_crypto_single_cipher_register (GNUTLS_CIPHER_AES_128_CBC, 80,
+                                              &cipher_struct);
+      if (ret < 0)
+        {
+          gnutls_assert ();
+        }
+
+      ret =
+        gnutls_crypto_single_cipher_register (GNUTLS_CIPHER_AES_192_CBC, 80,
+                                              &cipher_struct);
+      if (ret < 0)
+        {
+          gnutls_assert ();
+        }
+
+      ret =
+        gnutls_crypto_single_cipher_register (GNUTLS_CIPHER_AES_256_CBC, 80,
+                                              &cipher_struct);
+      if (ret < 0)
+        {
+          gnutls_assert ();
+        }
+    }
+
+  return;
 }
diff --git a/lib/accelerated/intel/asm/appro-aes-x86-64.s 
b/lib/accelerated/intel/asm/appro-aes-x86-64.s
new file mode 100644
index 0000000..96b7b6e
--- /dev/null
+++ b/lib/accelerated/intel/asm/appro-aes-x86-64.s
@@ -0,0 +1,782 @@
+# Copyright (c) 2006, Andy Polyakov by <address@hidden>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 
+#     *        Redistributions of source code must retain copyright notices,
+#      this list of conditions and the following disclaimer.
+#
+#     *        Redistributions in binary form must reproduce the above
+#      copyright notice, this list of conditions and the following
+#      disclaimer in the documentation and/or other materials
+#      provided with the distribution.
+#
+#     *        Neither the name of the Andy Polyakov nor the names of its
+#      copyright holder and contributors may be used to endorse or
+#      promote products derived from this software without specific
+#      prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+.text  
+.globl aesni_encrypt
+.type  aesni_encrypt,@function
+.align 16
+aesni_encrypt:
+       movups  (%rdi),%xmm0
+       movl    240(%rdx),%eax
+       movaps  (%rdx),%xmm4
+       movaps  16(%rdx),%xmm5
+       leaq    32(%rdx),%rdx
+       pxor    %xmm4,%xmm0
+.Loop_enc1_1:
+.byte  102,15,56,220,197
+       decl    %eax
+       movaps  (%rdx),%xmm5
+       leaq    16(%rdx),%rdx
+       jnz     .Loop_enc1_1    
+.byte  102,15,56,221,197
+       movups  %xmm0,(%rsi)
+       .byte   0xf3,0xc3
+.size  aesni_encrypt,.-aesni_encrypt
+
+.globl aesni_decrypt
+.type  aesni_decrypt,@function
+.align 16
+aesni_decrypt:
+       movups  (%rdi),%xmm0
+       movl    240(%rdx),%eax
+       movaps  (%rdx),%xmm4
+       movaps  16(%rdx),%xmm5
+       leaq    32(%rdx),%rdx
+       pxor    %xmm4,%xmm0
+.Loop_dec1_2:
+.byte  102,15,56,222,197
+       decl    %eax
+       movaps  (%rdx),%xmm5
+       leaq    16(%rdx),%rdx
+       jnz     .Loop_dec1_2    
+.byte  102,15,56,223,197
+       movups  %xmm0,(%rsi)
+       .byte   0xf3,0xc3
+.size  aesni_decrypt, .-aesni_decrypt
+.type  _aesni_encrypt3,@function
+.align 16
+_aesni_encrypt3:
+       movaps  (%rcx),%xmm4
+       shrl    $1,%eax
+       movaps  16(%rcx),%xmm5
+       leaq    32(%rcx),%rcx
+       pxor    %xmm4,%xmm0
+       pxor    %xmm4,%xmm1
+       pxor    %xmm4,%xmm2
+
+.Lenc_loop3:
+.byte  102,15,56,220,197
+       movaps  (%rcx),%xmm4
+.byte  102,15,56,220,205
+       decl    %eax
+.byte  102,15,56,220,213
+.byte  102,15,56,220,196
+       movaps  16(%rcx),%xmm5
+.byte  102,15,56,220,204
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,220,212
+       jnz     .Lenc_loop3
+
+.byte  102,15,56,220,197
+       movaps  (%rcx),%xmm4
+.byte  102,15,56,220,205
+.byte  102,15,56,220,213
+.byte  102,15,56,221,196
+.byte  102,15,56,221,204
+.byte  102,15,56,221,212
+       .byte   0xf3,0xc3
+.size  _aesni_encrypt3,.-_aesni_encrypt3
+.type  _aesni_decrypt3,@function
+.align 16
+_aesni_decrypt3:
+       movaps  (%rcx),%xmm4
+       shrl    $1,%eax
+       movaps  16(%rcx),%xmm5
+       leaq    32(%rcx),%rcx
+       pxor    %xmm4,%xmm0
+       pxor    %xmm4,%xmm1
+       pxor    %xmm4,%xmm2
+
+.Ldec_loop3:
+.byte  102,15,56,222,197
+       movaps  (%rcx),%xmm4
+.byte  102,15,56,222,205
+       decl    %eax
+.byte  102,15,56,222,213
+.byte  102,15,56,222,196
+       movaps  16(%rcx),%xmm5
+.byte  102,15,56,222,204
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,222,212
+       jnz     .Ldec_loop3
+
+.byte  102,15,56,222,197
+       movaps  (%rcx),%xmm4
+.byte  102,15,56,222,205
+.byte  102,15,56,222,213
+.byte  102,15,56,223,196
+.byte  102,15,56,223,204
+.byte  102,15,56,223,212
+       .byte   0xf3,0xc3
+.size  _aesni_decrypt3,.-_aesni_decrypt3
+.type  _aesni_encrypt4,@function
+.align 16
+_aesni_encrypt4:
+       movaps  (%rcx),%xmm4
+       shrl    $1,%eax
+       movaps  16(%rcx),%xmm5
+       leaq    32(%rcx),%rcx
+       pxor    %xmm4,%xmm0
+       pxor    %xmm4,%xmm1
+       pxor    %xmm4,%xmm2
+       pxor    %xmm4,%xmm3
+
+.Lenc_loop4:
+.byte  102,15,56,220,197
+       movaps  (%rcx),%xmm4
+.byte  102,15,56,220,205
+       decl    %eax
+.byte  102,15,56,220,213
+.byte  102,15,56,220,221
+.byte  102,15,56,220,196
+       movaps  16(%rcx),%xmm5
+.byte  102,15,56,220,204
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,220,212
+.byte  102,15,56,220,220
+       jnz     .Lenc_loop4
+
+.byte  102,15,56,220,197
+       movaps  (%rcx),%xmm4
+.byte  102,15,56,220,205
+.byte  102,15,56,220,213
+.byte  102,15,56,220,221
+.byte  102,15,56,221,196
+.byte  102,15,56,221,204
+.byte  102,15,56,221,212
+.byte  102,15,56,221,220
+       .byte   0xf3,0xc3
+.size  _aesni_encrypt4,.-_aesni_encrypt4
+.type  _aesni_decrypt4,@function
+.align 16
+_aesni_decrypt4:
+       movaps  (%rcx),%xmm4
+       shrl    $1,%eax
+       movaps  16(%rcx),%xmm5
+       leaq    32(%rcx),%rcx
+       pxor    %xmm4,%xmm0
+       pxor    %xmm4,%xmm1
+       pxor    %xmm4,%xmm2
+       pxor    %xmm4,%xmm3
+
+.Ldec_loop4:
+.byte  102,15,56,222,197
+       movaps  (%rcx),%xmm4
+.byte  102,15,56,222,205
+       decl    %eax
+.byte  102,15,56,222,213
+.byte  102,15,56,222,221
+.byte  102,15,56,222,196
+       movaps  16(%rcx),%xmm5
+.byte  102,15,56,222,204
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,222,212
+.byte  102,15,56,222,220
+       jnz     .Ldec_loop4
+
+.byte  102,15,56,222,197
+       movaps  (%rcx),%xmm4
+.byte  102,15,56,222,205
+.byte  102,15,56,222,213
+.byte  102,15,56,222,221
+.byte  102,15,56,223,196
+.byte  102,15,56,223,204
+.byte  102,15,56,223,212
+.byte  102,15,56,223,220
+       .byte   0xf3,0xc3
+.size  _aesni_decrypt4,.-_aesni_decrypt4
+.globl aesni_ecb_encrypt
+.type  aesni_ecb_encrypt,@function
+.align 16
+aesni_ecb_encrypt:
+       cmpq    $16,%rdx
+       jb      .Lecb_ret
+
+       movl    240(%rcx),%eax
+       andq    $-16,%rdx
+       movq    %rcx,%r11
+       testl   %r8d,%r8d
+       movl    %eax,%r10d
+       jz      .Lecb_decrypt
+
+       subq    $64,%rdx
+       jbe     .Lecb_enc_tail
+       jmp     .Lecb_enc_loop3
+.align 16
+.Lecb_enc_loop3:
+       movups  (%rdi),%xmm0
+       movups  16(%rdi),%xmm1
+       movups  32(%rdi),%xmm2
+       call    _aesni_encrypt3
+       subq    $48,%rdx
+       leaq    48(%rdi),%rdi
+       leaq    48(%rsi),%rsi
+       movups  %xmm0,-48(%rsi)
+       movl    %r10d,%eax
+       movups  %xmm1,-32(%rsi)
+       movq    %r11,%rcx
+       movups  %xmm2,-16(%rsi)
+       ja      .Lecb_enc_loop3
+
+.Lecb_enc_tail:
+       addq    $64,%rdx
+       jz      .Lecb_ret
+
+       cmpq    $16,%rdx
+       movups  (%rdi),%xmm0
+       je      .Lecb_enc_one
+       cmpq    $32,%rdx
+       movups  16(%rdi),%xmm1
+       je      .Lecb_enc_two
+       cmpq    $48,%rdx
+       movups  32(%rdi),%xmm2
+       je      .Lecb_enc_three
+       movups  48(%rdi),%xmm3
+       call    _aesni_encrypt4
+       movups  %xmm0,(%rsi)
+       movups  %xmm1,16(%rsi)
+       movups  %xmm2,32(%rsi)
+       movups  %xmm3,48(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_enc_one:
+       movaps  (%rcx),%xmm4
+       movaps  16(%rcx),%xmm5
+       leaq    32(%rcx),%rcx
+       pxor    %xmm4,%xmm0
+.Loop_enc1_3:
+.byte  102,15,56,220,197
+       decl    %eax
+       movaps  (%rcx),%xmm5
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_enc1_3    
+.byte  102,15,56,221,197
+       movups  %xmm0,(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_enc_two:
+       call    _aesni_encrypt3
+       movups  %xmm0,(%rsi)
+       movups  %xmm1,16(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_enc_three:
+       call    _aesni_encrypt3
+       movups  %xmm0,(%rsi)
+       movups  %xmm1,16(%rsi)
+       movups  %xmm2,32(%rsi)
+       jmp     .Lecb_ret
+
+.align 16
+.Lecb_decrypt:
+       subq    $64,%rdx
+       jbe     .Lecb_dec_tail
+       jmp     .Lecb_dec_loop3
+.align 16
+.Lecb_dec_loop3:
+       movups  (%rdi),%xmm0
+       movups  16(%rdi),%xmm1
+       movups  32(%rdi),%xmm2
+       call    _aesni_decrypt3
+       subq    $48,%rdx
+       leaq    48(%rdi),%rdi
+       leaq    48(%rsi),%rsi
+       movups  %xmm0,-48(%rsi)
+       movl    %r10d,%eax
+       movups  %xmm1,-32(%rsi)
+       movq    %r11,%rcx
+       movups  %xmm2,-16(%rsi)
+       ja      .Lecb_dec_loop3
+
+.Lecb_dec_tail:
+       addq    $64,%rdx
+       jz      .Lecb_ret
+
+       cmpq    $16,%rdx
+       movups  (%rdi),%xmm0
+       je      .Lecb_dec_one
+       cmpq    $32,%rdx
+       movups  16(%rdi),%xmm1
+       je      .Lecb_dec_two
+       cmpq    $48,%rdx
+       movups  32(%rdi),%xmm2
+       je      .Lecb_dec_three
+       movups  48(%rdi),%xmm3
+       call    _aesni_decrypt4
+       movups  %xmm0,(%rsi)
+       movups  %xmm1,16(%rsi)
+       movups  %xmm2,32(%rsi)
+       movups  %xmm3,48(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_dec_one:
+       movaps  (%rcx),%xmm4
+       movaps  16(%rcx),%xmm5
+       leaq    32(%rcx),%rcx
+       pxor    %xmm4,%xmm0
+.Loop_dec1_4:
+.byte  102,15,56,222,197
+       decl    %eax
+       movaps  (%rcx),%xmm5
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_dec1_4    
+.byte  102,15,56,223,197
+       movups  %xmm0,(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_dec_two:
+       call    _aesni_decrypt3
+       movups  %xmm0,(%rsi)
+       movups  %xmm1,16(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_dec_three:
+       call    _aesni_decrypt3
+       movups  %xmm0,(%rsi)
+       movups  %xmm1,16(%rsi)
+       movups  %xmm2,32(%rsi)
+
+.Lecb_ret:
+       .byte   0xf3,0xc3
+.size  aesni_ecb_encrypt,.-aesni_ecb_encrypt
+.globl aesni_cbc_encrypt
+.type  aesni_cbc_encrypt,@function
+.align 16
+aesni_cbc_encrypt:
+       testq   %rdx,%rdx
+       jz      .Lcbc_ret
+
+       movl    240(%rcx),%r10d
+       movq    %rcx,%r11
+       testl   %r9d,%r9d
+       jz      .Lcbc_decrypt
+
+       movups  (%r8),%xmm0
+       cmpq    $16,%rdx
+       movl    %r10d,%eax
+       jb      .Lcbc_enc_tail
+       subq    $16,%rdx
+       jmp     .Lcbc_enc_loop
+.align 16
+.Lcbc_enc_loop:
+       movups  (%rdi),%xmm1
+       leaq    16(%rdi),%rdi
+       pxor    %xmm1,%xmm0
+       movaps  (%rcx),%xmm4
+       movaps  16(%rcx),%xmm5
+       leaq    32(%rcx),%rcx
+       pxor    %xmm4,%xmm0
+.Loop_enc1_5:
+.byte  102,15,56,220,197
+       decl    %eax
+       movaps  (%rcx),%xmm5
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_enc1_5    
+.byte  102,15,56,221,197
+       subq    $16,%rdx
+       leaq    16(%rsi),%rsi
+       movl    %r10d,%eax
+       movq    %r11,%rcx
+       movups  %xmm0,-16(%rsi)
+       jnc     .Lcbc_enc_loop
+       addq    $16,%rdx
+       jnz     .Lcbc_enc_tail
+       movups  %xmm0,(%r8)
+       jmp     .Lcbc_ret
+
+.Lcbc_enc_tail:
+       movq    %rdx,%rcx
+       xchgq   %rdi,%rsi
+.long  0x9066A4F3      
+       movl    $16,%ecx
+       subq    %rdx,%rcx
+       xorl    %eax,%eax
+.long  0x9066AAF3      
+       leaq    -16(%rdi),%rdi
+       movl    %r10d,%eax
+       movq    %rdi,%rsi
+       movq    %r11,%rcx
+       xorq    %rdx,%rdx
+       jmp     .Lcbc_enc_loop  
+
+.align 16
+.Lcbc_decrypt:
+       movups  (%r8),%xmm6
+       subq    $64,%rdx
+       movl    %r10d,%eax
+       jbe     .Lcbc_dec_tail
+       jmp     .Lcbc_dec_loop3
+.align 16
+.Lcbc_dec_loop3:
+       movups  (%rdi),%xmm0
+       movups  16(%rdi),%xmm1
+       movups  32(%rdi),%xmm2
+       movaps  %xmm0,%xmm7
+       movaps  %xmm1,%xmm8
+       movaps  %xmm2,%xmm9
+       call    _aesni_decrypt3
+       subq    $48,%rdx
+       leaq    48(%rdi),%rdi
+       leaq    48(%rsi),%rsi
+       pxor    %xmm6,%xmm0
+       pxor    %xmm7,%xmm1
+       movaps  %xmm9,%xmm6
+       pxor    %xmm8,%xmm2
+       movups  %xmm0,-48(%rsi)
+       movl    %r10d,%eax
+       movups  %xmm1,-32(%rsi)
+       movq    %r11,%rcx
+       movups  %xmm2,-16(%rsi)
+       ja      .Lcbc_dec_loop3
+
+.Lcbc_dec_tail:
+       addq    $64,%rdx
+       movups  %xmm6,(%r8)
+       jz      .Lcbc_dec_ret
+
+       movups  (%rdi),%xmm0
+       cmpq    $16,%rdx
+       movaps  %xmm0,%xmm7
+       jbe     .Lcbc_dec_one
+       movups  16(%rdi),%xmm1
+       cmpq    $32,%rdx
+       movaps  %xmm1,%xmm8
+       jbe     .Lcbc_dec_two
+       movups  32(%rdi),%xmm2
+       cmpq    $48,%rdx
+       movaps  %xmm2,%xmm9
+       jbe     .Lcbc_dec_three
+       movups  48(%rdi),%xmm3
+       call    _aesni_decrypt4
+       pxor    %xmm6,%xmm0
+       movups  48(%rdi),%xmm6
+       pxor    %xmm7,%xmm1
+       movups  %xmm0,(%rsi)
+       pxor    %xmm8,%xmm2
+       movups  %xmm1,16(%rsi)
+       pxor    %xmm9,%xmm3
+       movups  %xmm2,32(%rsi)
+       movaps  %xmm3,%xmm0
+       leaq    48(%rsi),%rsi
+       jmp     .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_one:
+       movaps  (%rcx),%xmm4
+       movaps  16(%rcx),%xmm5
+       leaq    32(%rcx),%rcx
+       pxor    %xmm4,%xmm0
+.Loop_dec1_6:
+.byte  102,15,56,222,197
+       decl    %eax
+       movaps  (%rcx),%xmm5
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_dec1_6    
+.byte  102,15,56,223,197
+       pxor    %xmm6,%xmm0
+       movaps  %xmm7,%xmm6
+       jmp     .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_two:
+       call    _aesni_decrypt3
+       pxor    %xmm6,%xmm0
+       pxor    %xmm7,%xmm1
+       movups  %xmm0,(%rsi)
+       movaps  %xmm8,%xmm6
+       movaps  %xmm1,%xmm0
+       leaq    16(%rsi),%rsi
+       jmp     .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_three:
+       call    _aesni_decrypt3
+       pxor    %xmm6,%xmm0
+       pxor    %xmm7,%xmm1
+       movups  %xmm0,(%rsi)
+       pxor    %xmm8,%xmm2
+       movups  %xmm1,16(%rsi)
+       movaps  %xmm9,%xmm6
+       movaps  %xmm2,%xmm0
+       leaq    32(%rsi),%rsi
+       jmp     .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_tail_collected:
+       andq    $15,%rdx
+       movups  %xmm6,(%r8)
+       jnz     .Lcbc_dec_tail_partial
+       movups  %xmm0,(%rsi)
+       jmp     .Lcbc_dec_ret
+.Lcbc_dec_tail_partial:
+       movaps  %xmm0,-24(%rsp)
+       movq    %rsi,%rdi
+       movq    %rdx,%rcx
+       leaq    -24(%rsp),%rsi
+.long  0x9066A4F3      
+
+.Lcbc_dec_ret:
+.Lcbc_ret:
+       .byte   0xf3,0xc3
+.size  aesni_cbc_encrypt,.-aesni_cbc_encrypt
+.globl aesni_set_decrypt_key
+.type  aesni_set_decrypt_key,@function
+.align 16
+aesni_set_decrypt_key:
+.byte  0x48,0x83,0xEC,0x08     
+       call    _aesni_set_encrypt_key
+       shll    $4,%esi
+       testl   %eax,%eax
+       jnz     .Ldec_key_ret
+       leaq    16(%rdx,%rsi,1),%rdi
+
+       movaps  (%rdx),%xmm0
+       movaps  (%rdi),%xmm1
+       movaps  %xmm0,(%rdi)
+       movaps  %xmm1,(%rdx)
+       leaq    16(%rdx),%rdx
+       leaq    -16(%rdi),%rdi
+
+.Ldec_key_inverse:
+       movaps  (%rdx),%xmm0
+       movaps  (%rdi),%xmm1
+.byte  102,15,56,219,192
+.byte  102,15,56,219,201
+       leaq    16(%rdx),%rdx
+       leaq    -16(%rdi),%rdi
+       cmpq    %rdx,%rdi
+       movaps  %xmm0,16(%rdi)
+       movaps  %xmm1,-16(%rdx)
+       ja      .Ldec_key_inverse
+
+       movaps  (%rdx),%xmm0
+.byte  102,15,56,219,192
+       movaps  %xmm0,(%rdi)
+.Ldec_key_ret:
+       addq    $8,%rsp
+       .byte   0xf3,0xc3
+.LSEH_end_set_decrypt_key:
+.size  aesni_set_decrypt_key,.-aesni_set_decrypt_key
+.globl aesni_set_encrypt_key
+.type  aesni_set_encrypt_key,@function
+.align 16
+aesni_set_encrypt_key:
+_aesni_set_encrypt_key:
+.byte  0x48,0x83,0xEC,0x08     
+       testq   %rdi,%rdi
+       movq    $-1,%rax
+       jz      .Lenc_key_ret
+       testq   %rdx,%rdx
+       jz      .Lenc_key_ret
+
+       movups  (%rdi),%xmm0
+       pxor    %xmm4,%xmm4
+       leaq    16(%rdx),%rax
+       cmpl    $256,%esi
+       je      .L14rounds
+       cmpl    $192,%esi
+       je      .L12rounds
+       cmpl    $128,%esi
+       jne     .Lbad_keybits
+
+.L10rounds:
+       movl    $9,%esi
+       movaps  %xmm0,(%rdx)
+.byte  102,15,58,223,200,1
+       call    .Lkey_expansion_128_cold
+.byte  102,15,58,223,200,2
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,4
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,8
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,16
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,32
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,64
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,128
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,27
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,54
+       call    .Lkey_expansion_128
+       movaps  %xmm0,(%rax)
+       movl    %esi,80(%rax)
+       xorl    %eax,%eax
+       jmp     .Lenc_key_ret
+
+.align 16
+.L12rounds:
+       movq    16(%rdi),%xmm2
+       movl    $11,%esi
+       movaps  %xmm0,(%rdx)
+.byte  102,15,58,223,202,1
+       call    .Lkey_expansion_192a_cold
+.byte  102,15,58,223,202,2
+       call    .Lkey_expansion_192b
+.byte  102,15,58,223,202,4
+       call    .Lkey_expansion_192a
+.byte  102,15,58,223,202,8
+       call    .Lkey_expansion_192b
+.byte  102,15,58,223,202,16
+       call    .Lkey_expansion_192a
+.byte  102,15,58,223,202,32
+       call    .Lkey_expansion_192b
+.byte  102,15,58,223,202,64
+       call    .Lkey_expansion_192a
+.byte  102,15,58,223,202,128
+       call    .Lkey_expansion_192b
+       movaps  %xmm0,(%rax)
+       movl    %esi,48(%rax)
+       xorq    %rax,%rax
+       jmp     .Lenc_key_ret
+
+.align 16
+.L14rounds:
+       movups  16(%rdi),%xmm2
+       movl    $13,%esi
+       leaq    16(%rax),%rax
+       movaps  %xmm0,(%rdx)
+       movaps  %xmm2,16(%rdx)
+.byte  102,15,58,223,202,1
+       call    .Lkey_expansion_256a_cold
+.byte  102,15,58,223,200,1
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,2
+       call    .Lkey_expansion_256a
+.byte  102,15,58,223,200,2
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,4
+       call    .Lkey_expansion_256a
+.byte  102,15,58,223,200,4
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,8
+       call    .Lkey_expansion_256a
+.byte  102,15,58,223,200,8
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,16
+       call    .Lkey_expansion_256a
+.byte  102,15,58,223,200,16
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,32
+       call    .Lkey_expansion_256a
+.byte  102,15,58,223,200,32
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,64
+       call    .Lkey_expansion_256a
+       movaps  %xmm0,(%rax)
+       movl    %esi,16(%rax)
+       xorq    %rax,%rax
+       jmp     .Lenc_key_ret
+
+.align 16
+.Lbad_keybits:
+       movq    $-2,%rax
+.Lenc_key_ret:
+       addq    $8,%rsp
+       .byte   0xf3,0xc3
+.LSEH_end_set_encrypt_key:
+
+.align 16
+.Lkey_expansion_128:
+       movaps  %xmm0,(%rax)
+       leaq    16(%rax),%rax
+.Lkey_expansion_128_cold:
+       shufps  $16,%xmm0,%xmm4
+       pxor    %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       pxor    %xmm4,%xmm0
+       pshufd  $255,%xmm1,%xmm1
+       pxor    %xmm1,%xmm0
+       .byte   0xf3,0xc3
+
+.align 16
+.Lkey_expansion_192a:
+       movaps  %xmm0,(%rax)
+       leaq    16(%rax),%rax
+.Lkey_expansion_192a_cold:
+       movaps  %xmm2,%xmm5
+.Lkey_expansion_192b_warm:
+       shufps  $16,%xmm0,%xmm4
+       movaps  %xmm2,%xmm3
+       pxor    %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       pslldq  $4,%xmm3
+       pxor    %xmm4,%xmm0
+       pshufd  $85,%xmm1,%xmm1
+       pxor    %xmm3,%xmm2
+       pxor    %xmm1,%xmm0
+       pshufd  $255,%xmm0,%xmm3
+       pxor    %xmm3,%xmm2
+       .byte   0xf3,0xc3
+
+.align 16
+.Lkey_expansion_192b:
+       movaps  %xmm0,%xmm3
+       shufps  $68,%xmm0,%xmm5
+       movaps  %xmm5,(%rax)
+       shufps  $78,%xmm2,%xmm3
+       movaps  %xmm3,16(%rax)
+       leaq    32(%rax),%rax
+       jmp     .Lkey_expansion_192b_warm
+
+.align 16
+.Lkey_expansion_256a:
+       movaps  %xmm2,(%rax)
+       leaq    16(%rax),%rax
+.Lkey_expansion_256a_cold:
+       shufps  $16,%xmm0,%xmm4
+       pxor    %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       pxor    %xmm4,%xmm0
+       pshufd  $255,%xmm1,%xmm1
+       pxor    %xmm1,%xmm0
+       .byte   0xf3,0xc3
+
+.align 16
+.Lkey_expansion_256b:
+       movaps  %xmm0,(%rax)
+       leaq    16(%rax),%rax
+
+       shufps  $16,%xmm2,%xmm4
+       pxor    %xmm4,%xmm2
+       shufps  $140,%xmm2,%xmm4
+       pxor    %xmm4,%xmm2
+       pshufd  $170,%xmm1,%xmm1
+       pxor    %xmm1,%xmm2
+       .byte   0xf3,0xc3
+.size  aesni_set_encrypt_key,.-aesni_set_encrypt_key
+.byte  
65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
diff --git a/lib/accelerated/intel/asm/appro-aes-x86.s 
b/lib/accelerated/intel/asm/appro-aes-x86.s
new file mode 100644
index 0000000..981e356
--- /dev/null
+++ b/lib/accelerated/intel/asm/appro-aes-x86.s
@@ -0,0 +1,798 @@
+# Copyright (c) 2006, Andy Polyakov by <address@hidden>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 
+#     *        Redistributions of source code must retain copyright notices,
+#      this list of conditions and the following disclaimer.
+#
+#     *        Redistributions in binary form must reproduce the above
+#      copyright notice, this list of conditions and the following
+#      disclaimer in the documentation and/or other materials
+#      provided with the distribution.
+#
+#     *        Neither the name of the Andy Polyakov nor the names of its
+#      copyright holder and contributors may be used to endorse or
+#      promote products derived from this software without specific
+#      prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+.file  "aesni-x86.s"
+.text
+.globl aesni_encrypt
+.type  aesni_encrypt,@function
+.align 16
+aesni_encrypt:
+.L_aesni_encrypt_begin:
+       movl    4(%esp),%eax
+       movl    12(%esp),%edx
+       movups  (%eax),%xmm0
+       movl    240(%edx),%ecx
+       movl    8(%esp),%eax
+       movups  (%edx),%xmm3
+       movups  16(%edx),%xmm4
+       leal    32(%edx),%edx
+       pxor    %xmm3,%xmm0
+.L000enc1_loop:
+       aesenc  %xmm4,%xmm0
+       decl    %ecx
+       movups  (%edx),%xmm4
+       leal    16(%edx),%edx
+       jnz     .L000enc1_loop
+       aesenclast      %xmm4,%xmm0
+       movups  %xmm0,(%eax)
+       ret
+.size  aesni_encrypt,.-.L_aesni_encrypt_begin
+.globl aesni_decrypt
+.type  aesni_decrypt,@function
+.align 16
+aesni_decrypt:
+.L_aesni_decrypt_begin:
+       movl    4(%esp),%eax
+       movl    12(%esp),%edx
+       movups  (%eax),%xmm0
+       movl    240(%edx),%ecx
+       movl    8(%esp),%eax
+       movups  (%edx),%xmm3
+       movups  16(%edx),%xmm4
+       leal    32(%edx),%edx
+       pxor    %xmm3,%xmm0
+.L001dec1_loop:
+       aesdec  %xmm4,%xmm0
+       decl    %ecx
+       movups  (%edx),%xmm4
+       leal    16(%edx),%edx
+       jnz     .L001dec1_loop
+       aesdeclast      %xmm4,%xmm0
+       movups  %xmm0,(%eax)
+       ret
+.size  aesni_decrypt,.-.L_aesni_decrypt_begin
+.type  _aesni_encrypt3,@function
+.align 16
+_aesni_encrypt3:
+       movups  (%edx),%xmm3
+       shrl    $1,%ecx
+       movups  16(%edx),%xmm4
+       leal    32(%edx),%edx
+       pxor    %xmm3,%xmm0
+       pxor    %xmm3,%xmm1
+       pxor    %xmm3,%xmm2
+       jmp     .L002enc3_loop
+.align 16
+.L002enc3_loop:
+       aesenc  %xmm4,%xmm0
+       movups  (%edx),%xmm3
+       aesenc  %xmm4,%xmm1
+       decl    %ecx
+       aesenc  %xmm4,%xmm2
+       movups  16(%edx),%xmm4
+       aesenc  %xmm3,%xmm0
+       leal    32(%edx),%edx
+       aesenc  %xmm3,%xmm1
+       aesenc  %xmm3,%xmm2
+       jnz     .L002enc3_loop
+       aesenc  %xmm4,%xmm0
+       movups  (%edx),%xmm3
+       aesenc  %xmm4,%xmm1
+       aesenc  %xmm4,%xmm2
+       aesenclast      %xmm3,%xmm0
+       aesenclast      %xmm3,%xmm1
+       aesenclast      %xmm3,%xmm2
+       ret
+.size  _aesni_encrypt3,.-_aesni_encrypt3
+.type  _aesni_decrypt3,@function
+.align 16
+_aesni_decrypt3:
+       movups  (%edx),%xmm3
+       shrl    $1,%ecx
+       movups  16(%edx),%xmm4
+       leal    32(%edx),%edx
+       pxor    %xmm3,%xmm0
+       pxor    %xmm3,%xmm1
+       pxor    %xmm3,%xmm2
+       jmp     .L003dec3_loop
+.align 16
+.L003dec3_loop:
+       aesdec  %xmm4,%xmm0
+       movups  (%edx),%xmm3
+       aesdec  %xmm4,%xmm1
+       decl    %ecx
+       aesdec  %xmm4,%xmm2
+       movups  16(%edx),%xmm4
+       aesdec  %xmm3,%xmm0
+       leal    32(%edx),%edx
+       aesdec  %xmm3,%xmm1
+       aesdec  %xmm3,%xmm2
+       jnz     .L003dec3_loop
+       aesdec  %xmm4,%xmm0
+       movups  (%edx),%xmm3
+       aesdec  %xmm4,%xmm1
+       aesdec  %xmm4,%xmm2
+       aesdeclast      %xmm3,%xmm0
+       aesdeclast      %xmm3,%xmm1
+       aesdeclast      %xmm3,%xmm2
+       ret
+.size  _aesni_decrypt3,.-_aesni_decrypt3
+.type  _aesni_encrypt4,@function
+.align 16
+_aesni_encrypt4:
+       movups  (%edx),%xmm3
+       movups  16(%edx),%xmm4
+       shrl    $1,%ecx
+       leal    32(%edx),%edx
+       pxor    %xmm3,%xmm0
+       pxor    %xmm3,%xmm1
+       pxor    %xmm3,%xmm2
+       pxor    %xmm3,%xmm7
+       jmp     .L004enc3_loop
+.align 16
+.L004enc3_loop:
+       aesenc  %xmm4,%xmm0
+       movups  (%edx),%xmm3
+       aesenc  %xmm4,%xmm1
+       decl    %ecx
+       aesenc  %xmm4,%xmm2
+       aesenc  %xmm4,%xmm7
+       movups  16(%edx),%xmm4
+       aesenc  %xmm3,%xmm0
+       leal    32(%edx),%edx
+       aesenc  %xmm3,%xmm1
+       aesenc  %xmm3,%xmm2
+       aesenc  %xmm3,%xmm7
+       jnz     .L004enc3_loop
+       aesenc  %xmm4,%xmm0
+       movups  (%edx),%xmm3
+       aesenc  %xmm4,%xmm1
+       aesenc  %xmm4,%xmm2
+       aesenc  %xmm4,%xmm7
+       aesenclast      %xmm3,%xmm0
+       aesenclast      %xmm3,%xmm1
+       aesenclast      %xmm3,%xmm2
+       aesenclast      %xmm3,%xmm7
+       ret
+.size  _aesni_encrypt4,.-_aesni_encrypt4
+.type  _aesni_decrypt4,@function
+.align 16
+_aesni_decrypt4:
+       movups  (%edx),%xmm3
+       movups  16(%edx),%xmm4
+       shrl    $1,%ecx
+       leal    32(%edx),%edx
+       pxor    %xmm3,%xmm0
+       pxor    %xmm3,%xmm1
+       pxor    %xmm3,%xmm2
+       pxor    %xmm3,%xmm7
+       jmp     .L005dec3_loop
+.align 16
+.L005dec3_loop:
+       aesdec  %xmm4,%xmm0
+       movups  (%edx),%xmm3
+       aesdec  %xmm4,%xmm1
+       decl    %ecx
+       aesdec  %xmm4,%xmm2
+       aesdec  %xmm4,%xmm7
+       movups  16(%edx),%xmm4
+       aesdec  %xmm3,%xmm0
+       leal    32(%edx),%edx
+       aesdec  %xmm3,%xmm1
+       aesdec  %xmm3,%xmm2
+       aesdec  %xmm3,%xmm7
+       jnz     .L005dec3_loop
+       aesdec  %xmm4,%xmm0
+       movups  (%edx),%xmm3
+       aesdec  %xmm4,%xmm1
+       aesdec  %xmm4,%xmm2
+       aesdec  %xmm4,%xmm7
+       aesdeclast      %xmm3,%xmm0
+       aesdeclast      %xmm3,%xmm1
+       aesdeclast      %xmm3,%xmm2
+       aesdeclast      %xmm3,%xmm7
+       ret
+.size  _aesni_decrypt4,.-_aesni_decrypt4
+.globl aesni_ecb_encrypt
+.type  aesni_ecb_encrypt,@function
+.align 16
+aesni_ecb_encrypt:
+.L_aesni_ecb_encrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%esi
+       movl    24(%esp),%edi
+       movl    28(%esp),%eax
+       movl    32(%esp),%edx
+       movl    36(%esp),%ecx
+       cmpl    $16,%eax
+       jb      .L006ecb_ret
+       andl    $-16,%eax
+       testl   %ecx,%ecx
+       movl    240(%edx),%ecx
+       movl    %edx,%ebp
+       movl    %ecx,%ebx
+       jz      .L007ecb_decrypt
+       subl    $64,%eax
+       jbe     .L008ecb_enc_tail
+       jmp     .L009ecb_enc_loop3
+.align 16
+.L009ecb_enc_loop3:
+       movups  (%esi),%xmm0
+       movups  16(%esi),%xmm1
+       movups  32(%esi),%xmm2
+       call    _aesni_encrypt3
+       subl    $48,%eax
+       leal    48(%esi),%esi
+       leal    48(%edi),%edi
+       movups  %xmm0,-48(%edi)
+       movl    %ebp,%edx
+       movups  %xmm1,-32(%edi)
+       movl    %ebx,%ecx
+       movups  %xmm2,-16(%edi)
+       ja      .L009ecb_enc_loop3
+.L008ecb_enc_tail:
+       addl    $64,%eax
+       jz      .L006ecb_ret
+       cmpl    $16,%eax
+       movups  (%esi),%xmm0
+       je      .L010ecb_enc_one
+       cmpl    $32,%eax
+       movups  16(%esi),%xmm1
+       je      .L011ecb_enc_two
+       cmpl    $48,%eax
+       movups  32(%esi),%xmm2
+       je      .L012ecb_enc_three
+       movups  48(%esi),%xmm7
+       call    _aesni_encrypt4
+       movups  %xmm0,(%edi)
+       movups  %xmm1,16(%edi)
+       movups  %xmm2,32(%edi)
+       movups  %xmm7,48(%edi)
+       jmp     .L006ecb_ret
+.align 16
+.L010ecb_enc_one:
+       movups  (%edx),%xmm3
+       movups  16(%edx),%xmm4
+       leal    32(%edx),%edx
+       pxor    %xmm3,%xmm0
+.L013enc1_loop:
+       aesenc  %xmm4,%xmm0
+       decl    %ecx
+       movups  (%edx),%xmm4
+       leal    16(%edx),%edx
+       jnz     .L013enc1_loop
+       aesenclast      %xmm4,%xmm0
+       movups  %xmm0,(%edi)
+       jmp     .L006ecb_ret
+.align 16
+.L011ecb_enc_two:
+       call    _aesni_encrypt3
+       movups  %xmm0,(%edi)
+       movups  %xmm1,16(%edi)
+       jmp     .L006ecb_ret
+.align 16
+.L012ecb_enc_three:
+       call    _aesni_encrypt3
+       movups  %xmm0,(%edi)
+       movups  %xmm1,16(%edi)
+       movups  %xmm2,32(%edi)
+       jmp     .L006ecb_ret
+.align 16
+.L007ecb_decrypt:
+       subl    $64,%eax
+       jbe     .L014ecb_dec_tail
+       jmp     .L015ecb_dec_loop3
+.align 16
+.L015ecb_dec_loop3:
+       movups  (%esi),%xmm0
+       movups  16(%esi),%xmm1
+       movups  32(%esi),%xmm2
+       call    _aesni_decrypt3
+       subl    $48,%eax
+       leal    48(%esi),%esi
+       leal    48(%edi),%edi
+       movups  %xmm0,-48(%edi)
+       movl    %ebp,%edx
+       movups  %xmm1,-32(%edi)
+       movl    %ebx,%ecx
+       movups  %xmm2,-16(%edi)
+       ja      .L015ecb_dec_loop3
+.L014ecb_dec_tail:
+       addl    $64,%eax
+       jz      .L006ecb_ret
+       cmpl    $16,%eax
+       movups  (%esi),%xmm0
+       je      .L016ecb_dec_one
+       cmpl    $32,%eax
+       movups  16(%esi),%xmm1
+       je      .L017ecb_dec_two
+       cmpl    $48,%eax
+       movups  32(%esi),%xmm2
+       je      .L018ecb_dec_three
+       movups  48(%esi),%xmm7
+       call    _aesni_decrypt4
+       movups  %xmm0,(%edi)
+       movups  %xmm1,16(%edi)
+       movups  %xmm2,32(%edi)
+       movups  %xmm7,48(%edi)
+       jmp     .L006ecb_ret
+.align 16
+.L016ecb_dec_one:
+       movups  (%edx),%xmm3
+       movups  16(%edx),%xmm4
+       leal    32(%edx),%edx
+       pxor    %xmm3,%xmm0
+.L019dec1_loop:
+       aesdec  %xmm4,%xmm0
+       decl    %ecx
+       movups  (%edx),%xmm4
+       leal    16(%edx),%edx
+       jnz     .L019dec1_loop
+       aesdeclast      %xmm4,%xmm0
+       movups  %xmm0,(%edi)
+       jmp     .L006ecb_ret
+.align 16
+.L017ecb_dec_two:
+       call    _aesni_decrypt3
+       movups  %xmm0,(%edi)
+       movups  %xmm1,16(%edi)
+       jmp     .L006ecb_ret
+.align 16
+.L018ecb_dec_three:
+       call    _aesni_decrypt3
+       movups  %xmm0,(%edi)
+       movups  %xmm1,16(%edi)
+       movups  %xmm2,32(%edi)
+.L006ecb_ret:
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
+.globl aesni_cbc_encrypt
+.type  aesni_cbc_encrypt,@function
+.align 16
+aesni_cbc_encrypt:
+.L_aesni_cbc_encrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%esi
+       movl    24(%esp),%edi
+       movl    28(%esp),%eax
+       movl    32(%esp),%edx
+       testl   %eax,%eax
+       movl    36(%esp),%ebp
+       jz      .L020cbc_ret
+       cmpl    $0,40(%esp)
+       movups  (%ebp),%xmm5
+       movl    240(%edx),%ecx
+       movl    %edx,%ebp
+       movl    %ecx,%ebx
+       je      .L021cbc_decrypt
+       movaps  %xmm5,%xmm0
+       cmpl    $16,%eax
+       jb      .L022cbc_enc_tail
+       subl    $16,%eax
+       jmp     .L023cbc_enc_loop
+.align 16
+.L023cbc_enc_loop:
+       movups  (%esi),%xmm5
+       leal    16(%esi),%esi
+       pxor    %xmm5,%xmm0
+       movups  (%edx),%xmm3
+       movups  16(%edx),%xmm4
+       leal    32(%edx),%edx
+       pxor    %xmm3,%xmm0
+.L024enc1_loop:
+       aesenc  %xmm4,%xmm0
+       decl    %ecx
+       movups  (%edx),%xmm4
+       leal    16(%edx),%edx
+       jnz     .L024enc1_loop
+       aesenclast      %xmm4,%xmm0
+       subl    $16,%eax
+       leal    16(%edi),%edi
+       movl    %ebx,%ecx
+       movl    %ebp,%edx
+       movups  %xmm0,-16(%edi)
+       jnc     .L023cbc_enc_loop
+       addl    $16,%eax
+       jnz     .L022cbc_enc_tail
+       movaps  %xmm0,%xmm5
+       jmp     .L020cbc_ret
+.L022cbc_enc_tail:
+       movl    %eax,%ecx
+.long  2767451785
+       movl    $16,%ecx
+       subl    %eax,%ecx
+       xorl    %eax,%eax
+.long  2868115081
+       leal    -16(%edi),%edi
+       movl    %ebx,%ecx
+       movl    %edi,%esi
+       movl    %ebp,%edx
+       jmp     .L023cbc_enc_loop
+.align 16
+.L021cbc_decrypt:
+       subl    $64,%eax
+       jbe     .L025cbc_dec_tail
+       jmp     .L026cbc_dec_loop3
+.align 16
+.L026cbc_dec_loop3:
+       movups  (%esi),%xmm0
+       movups  16(%esi),%xmm1
+       movups  32(%esi),%xmm2
+       movaps  %xmm0,%xmm6
+       movaps  %xmm1,%xmm7
+       call    _aesni_decrypt3
+       subl    $48,%eax
+       leal    48(%esi),%esi
+       leal    48(%edi),%edi
+       pxor    %xmm5,%xmm0
+       pxor    %xmm6,%xmm1
+       movups  -16(%esi),%xmm5
+       pxor    %xmm7,%xmm2
+       movups  %xmm0,-48(%edi)
+       movl    %ebx,%ecx
+       movups  %xmm1,-32(%edi)
+       movl    %ebp,%edx
+       movups  %xmm2,-16(%edi)
+       ja      .L026cbc_dec_loop3
+.L025cbc_dec_tail:
+       addl    $64,%eax
+       jz      .L020cbc_ret
+       movups  (%esi),%xmm0
+       cmpl    $16,%eax
+       movaps  %xmm0,%xmm6
+       jbe     .L027cbc_dec_one
+       movups  16(%esi),%xmm1
+       cmpl    $32,%eax
+       movaps  %xmm1,%xmm7
+       jbe     .L028cbc_dec_two
+       movups  32(%esi),%xmm2
+       cmpl    $48,%eax
+       jbe     .L029cbc_dec_three
+       movups  48(%esi),%xmm7
+       call    _aesni_decrypt4
+       movups  16(%esi),%xmm3
+       movups  32(%esi),%xmm4
+       pxor    %xmm5,%xmm0
+       pxor    %xmm6,%xmm1
+       movups  48(%esi),%xmm5
+       movups  %xmm0,(%edi)
+       pxor    %xmm3,%xmm2
+       pxor    %xmm4,%xmm7
+       movups  %xmm1,16(%edi)
+       movups  %xmm2,32(%edi)
+       movaps  %xmm7,%xmm0
+       leal    48(%edi),%edi
+       jmp     .L030cbc_dec_tail_collected
+.L027cbc_dec_one:
+       movups  (%edx),%xmm3
+       movups  16(%edx),%xmm4
+       leal    32(%edx),%edx
+       pxor    %xmm3,%xmm0
+.L031dec1_loop:
+       aesdec  %xmm4,%xmm0
+       decl    %ecx
+       movups  (%edx),%xmm4
+       leal    16(%edx),%edx
+       jnz     .L031dec1_loop
+       aesdeclast      %xmm4,%xmm0
+       pxor    %xmm5,%xmm0
+       movaps  %xmm6,%xmm5
+       jmp     .L030cbc_dec_tail_collected
+.L028cbc_dec_two:
+       call    _aesni_decrypt3
+       pxor    %xmm5,%xmm0
+       pxor    %xmm6,%xmm1
+       movups  %xmm0,(%edi)
+       movaps  %xmm1,%xmm0
+       movaps  %xmm7,%xmm5
+       leal    16(%edi),%edi
+       jmp     .L030cbc_dec_tail_collected
+.L029cbc_dec_three:
+       call    _aesni_decrypt3
+       pxor    %xmm5,%xmm0
+       pxor    %xmm6,%xmm1
+       pxor    %xmm7,%xmm2
+       movups  %xmm0,(%edi)
+       movups  %xmm1,16(%edi)
+       movaps  %xmm2,%xmm0
+       movups  32(%esi),%xmm5
+       leal    32(%edi),%edi
+.L030cbc_dec_tail_collected:
+       andl    $15,%eax
+       jnz     .L032cbc_dec_tail_partial
+       movups  %xmm0,(%edi)
+       jmp     .L020cbc_ret
+.L032cbc_dec_tail_partial:
+       movl    %esp,%ebp
+       subl    $16,%esp
+       andl    $-16,%esp
+       movaps  %xmm0,(%esp)
+       movl    %esp,%esi
+       movl    %eax,%ecx
+.long  2767451785
+       movl    %ebp,%esp
+.L020cbc_ret:
+       movl    36(%esp),%ebp
+       movups  %xmm5,(%ebp)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
+.type  _aesni_set_encrypt_key,@function
+.align 16
+_aesni_set_encrypt_key:
+       testl   %eax,%eax
+       jz      .L033bad_pointer
+       testl   %edx,%edx
+       jz      .L033bad_pointer
+       movups  (%eax),%xmm0
+       pxor    %xmm4,%xmm4
+       leal    16(%edx),%edx
+       cmpl    $256,%ecx
+       je      .L03414rounds
+       cmpl    $192,%ecx
+       je      .L03512rounds
+       cmpl    $128,%ecx
+       jne     .L036bad_keybits
+.align 16
+.L03710rounds:
+       movl    $9,%ecx
+       movups  %xmm0,-16(%edx)
+       aeskeygenassist $1,%xmm0,%xmm1
+       call    .L038key_128_cold
+       aeskeygenassist $2,%xmm0,%xmm1
+       call    .L039key_128
+       aeskeygenassist $4,%xmm0,%xmm1
+       call    .L039key_128
+       aeskeygenassist $8,%xmm0,%xmm1
+       call    .L039key_128
+       aeskeygenassist $16,%xmm0,%xmm1
+       call    .L039key_128
+       aeskeygenassist $32,%xmm0,%xmm1
+       call    .L039key_128
+       aeskeygenassist $64,%xmm0,%xmm1
+       call    .L039key_128
+       aeskeygenassist $128,%xmm0,%xmm1
+       call    .L039key_128
+       aeskeygenassist $27,%xmm0,%xmm1
+       call    .L039key_128
+       aeskeygenassist $54,%xmm0,%xmm1
+       call    .L039key_128
+       movups  %xmm0,(%edx)
+       movl    %ecx,80(%edx)
+       xorl    %eax,%eax
+       ret
+.align 16
+.L039key_128:
+       movups  %xmm0,(%edx)
+       leal    16(%edx),%edx
+.L038key_128_cold:
+       shufps  $16,%xmm0,%xmm4
+       pxor    %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       pxor    %xmm4,%xmm0
+       pshufd  $255,%xmm1,%xmm1
+       pxor    %xmm1,%xmm0
+       ret
+.align 16
+.L03512rounds:
+       movq    16(%eax),%xmm2
+       movl    $11,%ecx
+       movups  %xmm0,-16(%edx)
+       aeskeygenassist $1,%xmm2,%xmm1
+       call    .L040key_192a_cold
+       aeskeygenassist $2,%xmm2,%xmm1
+       call    .L041key_192b
+       aeskeygenassist $4,%xmm2,%xmm1
+       call    .L042key_192a
+       aeskeygenassist $8,%xmm2,%xmm1
+       call    .L041key_192b
+       aeskeygenassist $16,%xmm2,%xmm1
+       call    .L042key_192a
+       aeskeygenassist $32,%xmm2,%xmm1
+       call    .L041key_192b
+       aeskeygenassist $64,%xmm2,%xmm1
+       call    .L042key_192a
+       aeskeygenassist $128,%xmm2,%xmm1
+       call    .L041key_192b
+       movups  %xmm0,(%edx)
+       movl    %ecx,48(%edx)
+       xorl    %eax,%eax
+       ret
+.align 16
+.L042key_192a:
+       movups  %xmm0,(%edx)
+       leal    16(%edx),%edx
+.align 16
+.L040key_192a_cold:
+       movaps  %xmm2,%xmm5
+.L043key_192b_warm:
+       shufps  $16,%xmm0,%xmm4
+       movaps  %xmm2,%xmm3
+       pxor    %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       pslldq  $4,%xmm3
+       pxor    %xmm4,%xmm0
+       pshufd  $85,%xmm1,%xmm1
+       pxor    %xmm3,%xmm2
+       pxor    %xmm1,%xmm0
+       pshufd  $255,%xmm0,%xmm3
+       pxor    %xmm3,%xmm2
+       ret
+.align 16
+.L041key_192b:
+       movaps  %xmm0,%xmm3
+       shufps  $68,%xmm0,%xmm5
+       movups  %xmm5,(%edx)
+       shufps  $78,%xmm2,%xmm3
+       movups  %xmm3,16(%edx)
+       leal    32(%edx),%edx
+       jmp     .L043key_192b_warm
+.align 16
+.L03414rounds:
+       movups  16(%eax),%xmm2
+       movl    $13,%ecx
+       leal    16(%edx),%edx
+       movups  %xmm0,-32(%edx)
+       movups  %xmm2,-16(%edx)
+       aeskeygenassist $1,%xmm2,%xmm1
+       call    .L044key_256a_cold
+       aeskeygenassist $1,%xmm0,%xmm1
+       call    .L045key_256b
+       aeskeygenassist $2,%xmm2,%xmm1
+       call    .L046key_256a
+       aeskeygenassist $2,%xmm0,%xmm1
+       call    .L045key_256b
+       aeskeygenassist $4,%xmm2,%xmm1
+       call    .L046key_256a
+       aeskeygenassist $4,%xmm0,%xmm1
+       call    .L045key_256b
+       aeskeygenassist $8,%xmm2,%xmm1
+       call    .L046key_256a
+       aeskeygenassist $8,%xmm0,%xmm1
+       call    .L045key_256b
+       aeskeygenassist $16,%xmm2,%xmm1
+       call    .L046key_256a
+       aeskeygenassist $16,%xmm0,%xmm1
+       call    .L045key_256b
+       aeskeygenassist $32,%xmm2,%xmm1
+       call    .L046key_256a
+       aeskeygenassist $32,%xmm0,%xmm1
+       call    .L045key_256b
+       aeskeygenassist $64,%xmm2,%xmm1
+       call    .L046key_256a
+       movups  %xmm0,(%edx)
+       movl    %ecx,16(%edx)
+       xorl    %eax,%eax
+       ret
+.align 16
+.L046key_256a:
+       movups  %xmm2,(%edx)
+       leal    16(%edx),%edx
+.L044key_256a_cold:
+       shufps  $16,%xmm0,%xmm4
+       pxor    %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       pxor    %xmm4,%xmm0
+       pshufd  $255,%xmm1,%xmm1
+       pxor    %xmm1,%xmm0
+       ret
+.align 16
+.L045key_256b:
+       movups  %xmm0,(%edx)
+       leal    16(%edx),%edx
+       shufps  $16,%xmm2,%xmm4
+       pxor    %xmm4,%xmm2
+       shufps  $140,%xmm2,%xmm4
+       pxor    %xmm4,%xmm2
+       pshufd  $170,%xmm1,%xmm1
+       pxor    %xmm1,%xmm2
+       ret
+.align 4
+.L033bad_pointer:
+       movl    $-1,%eax
+       ret
+.align 4
+.L036bad_keybits:
+       movl    $-2,%eax
+       ret
+.size  _aesni_set_encrypt_key,.-_aesni_set_encrypt_key
+.globl aesni_set_encrypt_key
+.type  aesni_set_encrypt_key,@function
+.align 16
+aesni_set_encrypt_key:
+.L_aesni_set_encrypt_key_begin:
+       movl    4(%esp),%eax
+       movl    8(%esp),%ecx
+       movl    12(%esp),%edx
+       call    _aesni_set_encrypt_key
+       ret
+.size  aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
+.globl aesni_set_decrypt_key
+.type  aesni_set_decrypt_key,@function
+.align 16
+aesni_set_decrypt_key:
+.L_aesni_set_decrypt_key_begin:
+       movl    4(%esp),%eax
+       movl    8(%esp),%ecx
+       movl    12(%esp),%edx
+       call    _aesni_set_encrypt_key
+       movl    12(%esp),%edx
+       shll    $4,%ecx
+       testl   %eax,%eax
+       jnz     .L047dec_key_ret
+       leal    16(%edx,%ecx,1),%eax
+       movups  (%edx),%xmm0
+       movups  (%eax),%xmm1
+       movups  %xmm0,(%eax)
+       movups  %xmm1,(%edx)
+       leal    16(%edx),%edx
+       leal    -16(%eax),%eax
+.L048dec_key_inverse:
+       movups  (%edx),%xmm0
+       movups  (%eax),%xmm1
+       aesimc  %xmm0,%xmm0
+       aesimc  %xmm1,%xmm1
+       leal    16(%edx),%edx
+       leal    -16(%eax),%eax
+       cmpl    %edx,%eax
+       movups  %xmm0,16(%eax)
+       movups  %xmm1,-16(%edx)
+       ja      .L048dec_key_inverse
+       movups  (%edx),%xmm0
+       aesimc  %xmm0,%xmm0
+       movups  %xmm0,(%edx)
+       xorl    %eax,%eax
+.L047dec_key_ret:
+       ret
+.size  aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
+.byte  65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+.byte  83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+.byte  32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+.byte  115,108,46,111,114,103,62,0
diff --git a/lib/accelerated/intel/asm/x64_iaesx64.s 
b/lib/accelerated/intel/asm/x64_iaesx64.s
deleted file mode 100755
index 433d177..0000000
--- a/lib/accelerated/intel/asm/x64_iaesx64.s
+++ /dev/null
@@ -1,2054 +0,0 @@
-[bits 64]
-[CPU intelnop]
-
-; Copyright (c) 2010, Intel Corporation
-; All rights reserved.
-; 
-; Redistribution and use in source and binary forms, with or without 
-; modification, are permitted provided that the following conditions are met:
-; 
-;     * Redistributions of source code must retain the above copyright notice, 
-;       this list of conditions and the following disclaimer.
-;     * Redistributions in binary form must reproduce the above copyright 
notice, 
-;       this list of conditions and the following disclaimer in the 
documentation 
-;       and/or other materials provided with the distribution.
-;     * Neither the name of Intel Corporation nor the names of its 
contributors 
-;       may be used to endorse or promote products derived from this software 
-;       without specific prior written permission.
-; 
-; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND 
-; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
IMPLIED 
-; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
DISCLAIMED. 
-; IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY 
DIRECT, 
-; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
(INCLUDING, 
-; BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
USE, 
-; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
OF 
-; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
NEGLIGENCE 
-; OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
-; ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-%macro linux_setup 0
-%ifdef __linux__
-       mov rcx, rdi
-       mov rdx, rsi
-%endif
-%endmacro
-
-%macro inversekey 1
-       movdqu  xmm1,%1
-       aesimc  xmm0,xmm1
-       movdqu  %1,xmm0
-%endmacro
-
-%macro aesdeclast1 1
-       aesdeclast      xmm0,%1
-%endmacro
-
-%macro aesenclast1 1
-       aesenclast      xmm0,%1
-%endmacro
-
-%macro aesdec1 1
-       aesdec  xmm0,%1
-%endmacro
-
-%macro aesenc1 1
-       aesenc  xmm0,%1
-%endmacro
-
-
-%macro aesdeclast1_u 1
-       movdqu xmm4,%1
-       aesdeclast      xmm0,xmm4
-%endmacro
-
-%macro aesenclast1_u 1
-       movdqu xmm4,%1
-       aesenclast      xmm0,xmm4
-%endmacro
-
-%macro aesdec1_u 1
-       movdqu xmm4,%1
-       aesdec  xmm0,xmm4
-%endmacro
-
-%macro aesenc1_u 1
-       movdqu xmm4,%1
-       aesenc  xmm0,xmm4
-%endmacro
- 
-%macro aesdec4 1
-       movdqa  xmm4,%1
-
-       aesdec  xmm0,xmm4
-       aesdec  xmm1,xmm4
-       aesdec  xmm2,xmm4
-       aesdec  xmm3,xmm4
-
-%endmacro
-
-%macro aesdeclast4 1
-       movdqa  xmm4,%1
-
-       aesdeclast      xmm0,xmm4
-       aesdeclast      xmm1,xmm4
-       aesdeclast      xmm2,xmm4
-       aesdeclast      xmm3,xmm4
-
-%endmacro
-
-
-%macro aesenc4 1
-       movdqa  xmm4,%1
-
-       aesenc  xmm0,xmm4
-       aesenc  xmm1,xmm4
-       aesenc  xmm2,xmm4
-       aesenc  xmm3,xmm4
-
-%endmacro
-
-%macro aesenclast4 1
-       movdqa  xmm4,%1
-
-       aesenclast      xmm0,xmm4
-       aesenclast      xmm1,xmm4
-       aesenclast      xmm2,xmm4
-       aesenclast      xmm3,xmm4
-
-%endmacro
-
-
-%macro load_and_inc4 1
-       movdqa  xmm4,%1
-       movdqa  xmm0,xmm5
-       movdqa  xmm1,xmm5
-       paddq   xmm1,[counter_add_one wrt rip]
-       movdqa  xmm2,xmm5
-       paddq   xmm2,[counter_add_two wrt rip]
-       movdqa  xmm3,xmm5
-       paddq   xmm3,[counter_add_three wrt rip]
-       pxor    xmm0,xmm4
-       paddq   xmm5,[counter_add_four wrt rip]
-       pxor    xmm1,xmm4
-       pxor    xmm2,xmm4
-       pxor    xmm3,xmm4
-%endmacro
-
-%macro xor_with_input4 1
-       movdqu xmm4,[%1]
-       pxor xmm0,xmm4
-       movdqu xmm4,[%1+16]
-       pxor xmm1,xmm4
-       movdqu xmm4,[%1+32]
-       pxor xmm2,xmm4
-       movdqu xmm4,[%1+48]
-       pxor xmm3,xmm4
-%endmacro
-
-
-
-%macro load_and_xor4 2
-       movdqa  xmm4,%2
-       movdqu  xmm0,[%1 + 0*16]
-       pxor    xmm0,xmm4
-       movdqu  xmm1,[%1 + 1*16]
-       pxor    xmm1,xmm4
-       movdqu  xmm2,[%1 + 2*16]
-       pxor    xmm2,xmm4
-       movdqu  xmm3,[%1 + 3*16]
-       pxor    xmm3,xmm4
-%endmacro
-
-%macro store4 1
-       movdqu [%1 + 0*16],xmm0
-       movdqu [%1 + 1*16],xmm1
-       movdqu [%1 + 2*16],xmm2
-       movdqu [%1 + 3*16],xmm3
-%endmacro
-
-%macro copy_round_keys 3
-       movdqu xmm4,[%2 + ((%3)*16)]
-       movdqa [%1 + ((%3)*16)],xmm4
-%endmacro
-
-
-%macro key_expansion_1_192 1
-               ;; Assumes the xmm3 includes all zeros at this point. 
-        pshufd xmm2, xmm2, 11111111b        
-        shufps xmm3, xmm1, 00010000b        
-        pxor xmm1, xmm3        
-        shufps xmm3, xmm1, 10001100b
-        pxor xmm1, xmm3        
-               pxor xmm1, xmm2         
-               movdqu [rdx+%1], xmm1                   
-%endmacro
-
-; Calculate w10 and w11 using calculated w9 and known w4-w5
-%macro key_expansion_2_192 1                           
-               movdqa xmm5, xmm4
-               pslldq xmm5, 4
-               shufps xmm6, xmm1, 11110000b
-               pxor xmm6, xmm5
-               pxor xmm4, xmm6
-               pshufd xmm7, xmm4, 00001110b 
-               movdqu [rdx+%1], xmm7
-%endmacro
-
-
-section .data
-align 16
-shuffle_mask:
-DD 0FFFFFFFFh
-DD 03020100h
-DD 07060504h
-DD 0B0A0908h
-
-
-align 16
-counter_add_one:
-DD 1
-DD 0
-DD 0
-DD 0
-
-counter_add_two:
-DD 2
-DD 0
-DD 0
-DD 0
-
-counter_add_three:
-DD 3
-DD 0
-DD 0
-DD 0
-
-counter_add_four:
-DD 4
-DD 0
-DD 0
-DD 0
-
-
-
-section .text
-
-align 16
-key_expansion256:
-
-    pshufd xmm2, xmm2, 011111111b
-
-    movdqa xmm4, xmm1
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pxor xmm1, xmm2
-
-    movdqu [rdx], xmm1
-    add rdx, 0x10
-    
-    aeskeygenassist xmm4, xmm1, 0
-    pshufd xmm2, xmm4, 010101010b
-
-    movdqa xmm4, xmm3
-    pshufb xmm4, xmm5
-    pxor xmm3, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm3, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm3, xmm4
-    pxor xmm3, xmm2
-
-    movdqu [rdx], xmm3
-    add rdx, 0x10
-
-    ret
-
-
-
-align 16
-key_expansion128: 
-    pshufd xmm2, xmm2, 0xFF;
-    movdqa xmm3, xmm1
-    pshufb xmm3, xmm5
-    pxor xmm1, xmm3
-    pshufb xmm3, xmm5
-    pxor xmm1, xmm3
-    pshufb xmm3, xmm5
-    pxor xmm1, xmm3
-    pxor xmm1, xmm2
-
-    ; storing the result in the key schedule array
-    movdqu [rdx], xmm1
-    add rdx, 0x10                    
-    ret
-
-
-
-
-
-
-align 16
-global iEncExpandKey128
-iEncExpandKey128:
-
-               linux_setup
-
-        movdqu xmm1, [rcx]    ; loading the key
-
-        movdqu [rdx], xmm1
-
-        movdqa xmm5, [shuffle_mask wrt rip]
-
-        add rdx,16
-
-        aeskeygenassist xmm2, xmm1, 0x1     ; Generating round key 1
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x2     ; Generating round key 2
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x4     ; Generating round key 3
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x8     ; Generating round key 4
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x10    ; Generating round key 5
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x20    ; Generating round key 6
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x40    ; Generating round key 7
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x80    ; Generating round key 8
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x1b    ; Generating round key 9
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x36    ; Generating round key 10
-        call key_expansion128
-
-               ret 
-
-
-
-align 16
-global iEncExpandKey192
-iEncExpandKey192:
-
-               linux_setup
-               sub rsp,64+8
-               movdqa  [rsp],xmm6
-               movdqa  [rsp+16],xmm7
-
-
-        movq xmm7, [rcx+16]    ; loading the AES key
-        movq [rdx+16], xmm7  ; Storing key in memory where all key expansion 
-        pshufd xmm4, xmm7, 01001111b
-        movdqu xmm1, [rcx]     ; loading the AES key
-        movdqu [rdx], xmm1  ; Storing key in memory where all key expansion 
-                       
-        pxor xmm3, xmm3                ; Set xmm3 to be all zeros. Required 
for the key_expansion. 
-        pxor xmm6, xmm6                ; Set xmm3 to be all zeros. Required 
for the key_expansion. 
-
-        aeskeygenassist xmm2, xmm4, 0x1     ; Complete round key 1 and 
generate round key 2 
-        key_expansion_1_192 24
-               key_expansion_2_192 40                          
-
-        aeskeygenassist xmm2, xmm4, 0x2     ; Generate round key 3 and part of 
round key 4
-        key_expansion_1_192 48
-               key_expansion_2_192 64                          
-
-        aeskeygenassist xmm2, xmm4, 0x4     ; Complete round key 4 and 
generate round key 5
-        key_expansion_1_192 72
-               key_expansion_2_192 88
-               
-        aeskeygenassist xmm2, xmm4, 0x8     ; Generate round key 6 and part of 
round key 7
-        key_expansion_1_192 96
-               key_expansion_2_192 112
-               
-        aeskeygenassist xmm2, xmm4, 0x10     ; Complete round key 7 and 
generate round key 8 
-        key_expansion_1_192 120
-               key_expansion_2_192 136                         
-
-        aeskeygenassist xmm2, xmm4, 0x20     ; Generate round key 9 and part 
of round key 10
-        key_expansion_1_192 144
-               key_expansion_2_192 160                         
-
-        aeskeygenassist xmm2, xmm4, 0x40     ; Complete round key 10 and 
generate round key 11
-        key_expansion_1_192 168
-               key_expansion_2_192 184                         
-
-        aeskeygenassist xmm2, xmm4, 0x80     ; Generate round key 12
-        key_expansion_1_192 192
-
-
-               movdqa  xmm6,[rsp]
-               movdqa  xmm7,[rsp+16]
-               add rsp,64+8
-
-               ret 
-
-
-
-
-align 16
-global iDecExpandKey128
-iDecExpandKey128:
-
-       linux_setup
-       push rcx
-       push rdx
-       sub rsp,16+8
-
-       call iEncExpandKey128
-
-       add rsp,16+8
-       pop rdx
-       pop rcx
-
-       inversekey [rdx + 1*16]
-       inversekey [rdx + 2*16]
-       inversekey [rdx + 3*16]
-       inversekey [rdx + 4*16]
-       inversekey [rdx + 5*16]
-       inversekey [rdx + 6*16]
-       inversekey [rdx + 7*16]
-       inversekey [rdx + 8*16]
-       inversekey [rdx + 9*16]
-
-       ret
-
-
-align 16
-global iDecExpandKey192
-iDecExpandKey192:
-
-       linux_setup
-       push rcx
-       push rdx
-       sub rsp,16+8
-
-       call iEncExpandKey192
-
-       add rsp,16+8
-       pop rdx
-       pop rcx
-
-       
-       inversekey [rdx + 1*16]
-       inversekey [rdx + 2*16]
-       inversekey [rdx + 3*16]
-       inversekey [rdx + 4*16]
-       inversekey [rdx + 5*16]
-       inversekey [rdx + 6*16]
-       inversekey [rdx + 7*16]
-       inversekey [rdx + 8*16]
-       inversekey [rdx + 9*16]
-       inversekey [rdx + 10*16]
-       inversekey [rdx + 11*16]
-
-       ret
-
-
-
-align 16
-global iDecExpandKey256
-iDecExpandKey256:
-
-       linux_setup
-       push rcx
-       push rdx
-       sub rsp,16+8
-
-       call iEncExpandKey256
-
-       add rsp,16+8
-       pop rdx
-       pop rcx
-
-       inversekey [rdx + 1*16]
-       inversekey [rdx + 2*16]
-       inversekey [rdx + 3*16]
-       inversekey [rdx + 4*16]
-       inversekey [rdx + 5*16]
-       inversekey [rdx + 6*16]
-       inversekey [rdx + 7*16]
-       inversekey [rdx + 8*16]
-       inversekey [rdx + 9*16]
-       inversekey [rdx + 10*16]
-       inversekey [rdx + 11*16]
-       inversekey [rdx + 12*16]
-       inversekey [rdx + 13*16]
-
-       ret
-       
-
-       
-       
-align 16
-global iEncExpandKey256
-iEncExpandKey256:
-
-       linux_setup
-
-    movdqu xmm1, [rcx]    ; loading the key
-    movdqu xmm3, [rcx+16]
-    movdqu [rdx], xmm1  ; Storing key in memory where all key schedule will be 
stored
-    movdqu [rdx+16], xmm3 
-    
-    add rdx,32
-
-    movdqa xmm5, [shuffle_mask wrt rip]  ; this mask is used by key_expansion
-
-    aeskeygenassist xmm2, xmm3, 0x1     ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x2     ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x4     ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x8     ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x10    ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x20    ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x40    ; 
-;    call key_expansion256 
-
-    pshufd xmm2, xmm2, 011111111b
-
-    movdqa xmm4, xmm1
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pxor xmm1, xmm2
-
-    movdqu [rdx], xmm1
-
-
-       ret 
-       
-       
-       
-
-
-
-align 16
-global iDec128
-iDec128:
-
-       linux_setup
-       sub rsp,16*16+8
-
-
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       sub r8,rdx
-
-       test eax,eax
-       jz end_dec128
-
-       cmp eax,4
-       jl      lp128decsingle
-
-       test    rcx,0xf
-       jz              lp128decfour
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       mov rcx,rsp     
-       
-       
-
-align 16
-lp128decfour:
-       
-       test eax,eax
-       jz end_dec128
-
-       cmp eax,4
-       jl      lp128decsingle
-
-       load_and_xor4 rdx, [rcx+10*16]
-       add rdx,16*4
-       aesdec4 [rcx+9*16]
-       aesdec4 [rcx+8*16]
-       aesdec4 [rcx+7*16]
-       aesdec4 [rcx+6*16]
-       aesdec4 [rcx+5*16]
-       aesdec4 [rcx+4*16]
-       aesdec4 [rcx+3*16]
-       aesdec4 [rcx+2*16]
-       aesdec4 [rcx+1*16]
-       aesdeclast4 [rcx+0*16]
-       
-       sub eax,4
-       store4 r8+rdx-(16*4)
-       jmp lp128decfour
-
-
-       align 16
-lp128decsingle:
-
-       movdqu xmm0, [rdx]
-       movdqu xmm4,[rcx+10*16]
-       pxor xmm0, xmm4
-       aesdec1_u [rcx+9*16]
-       aesdec1_u [rcx+8*16]
-       aesdec1_u [rcx+7*16]
-       aesdec1_u [rcx+6*16]
-       aesdec1_u [rcx+5*16]
-       aesdec1_u [rcx+4*16]
-       aesdec1_u [rcx+3*16]
-       aesdec1_u [rcx+2*16]
-       aesdec1_u [rcx+1*16]
-       aesdeclast1_u [rcx+0*16]
-
-       add rdx, 16
-       movdqu  [r8 + rdx - 16], xmm0
-       dec eax
-       jnz lp128decsingle
-
-end_dec128:
-
-       add rsp,16*16+8
-       ret
-
-
-align 16
-global iDec128_CBC
-iDec128_CBC:
-       
-       linux_setup
-       sub rsp,16*16+8
-
-       mov r9,rcx
-       mov rax,[rcx+24]
-       movdqu  xmm5,[rax]
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       
-       sub r8,rdx
-
-
-       test eax,eax
-       jz end_dec128_CBC
-
-       cmp eax,4
-       jl      lp128decsingle_CBC
-
-       test    rcx,0xf
-       jz              lp128decfour_CBC
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       mov rcx,rsp     
-
-
-align 16
-lp128decfour_CBC:
-       
-       test eax,eax
-       jz end_dec128_CBC
-
-       cmp eax,4
-       jl      lp128decsingle_CBC
-
-       load_and_xor4 rdx, [rcx+10*16]
-       add rdx,16*4
-       aesdec4 [rcx+9*16]
-       aesdec4 [rcx+8*16]
-       aesdec4 [rcx+7*16]
-       aesdec4 [rcx+6*16]
-       aesdec4 [rcx+5*16]
-       aesdec4 [rcx+4*16]
-       aesdec4 [rcx+3*16]
-       aesdec4 [rcx+2*16]
-       aesdec4 [rcx+1*16]
-       aesdeclast4 [rcx+0*16]
-
-       pxor    xmm0,xmm5
-       movdqu  xmm4,[rdx - 16*4 + 0*16]
-       pxor    xmm1,xmm4
-       movdqu  xmm4,[rdx - 16*4 + 1*16]
-       pxor    xmm2,xmm4
-       movdqu  xmm4,[rdx - 16*4 + 2*16]
-       pxor    xmm3,xmm4
-       movdqu  xmm5,[rdx - 16*4 + 3*16]
-       
-       sub eax,4
-       store4 r8+rdx-(16*4)
-       jmp lp128decfour_CBC
-
-
-       align 16
-lp128decsingle_CBC:
-
-       movdqu xmm0, [rdx]
-       movdqa  xmm1,xmm0
-       movdqu xmm4,[rcx+10*16]
-       pxor xmm0, xmm4
-       aesdec1_u [rcx+9*16]
-       aesdec1_u [rcx+8*16]
-       aesdec1_u [rcx+7*16]
-       aesdec1_u [rcx+6*16]
-       aesdec1_u [rcx+5*16]
-       aesdec1_u [rcx+4*16]
-       aesdec1_u [rcx+3*16]
-       aesdec1_u [rcx+2*16]
-       aesdec1_u [rcx+1*16]
-       aesdeclast1_u [rcx+0*16]
-
-       pxor    xmm0,xmm5
-       movdqa  xmm5,xmm1
-       add rdx, 16
-       movdqu  [r8 + rdx - 16], xmm0
-       dec eax
-       jnz lp128decsingle_CBC
-
-end_dec128_CBC:
-
-       mov        r9,[r9+24]
-       movdqu [r9],xmm5
-       add rsp,16*16+8
-       ret
-
-
-align 16
-global iDec192_CBC
-iDec192_CBC:
-
-       linux_setup
-       sub rsp,16*16+8
-
-       mov r9,rcx
-       mov rax,[rcx+24]
-       movdqu  xmm5,[rax]
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       
-       sub r8,rdx
-
-       test eax,eax
-       jz end_dec192_CBC
-
-       cmp eax,4
-       jl      lp192decsingle_CBC
-
-       test    rcx,0xf
-       jz              lp192decfour_CBC
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       copy_round_keys rsp,rcx,11
-       copy_round_keys rsp,rcx,12
-       mov rcx,rsp     
-
-
-align 16
-lp192decfour_CBC:
-       
-       test eax,eax
-       jz end_dec192_CBC
-
-       cmp eax,4
-       jl      lp192decsingle_CBC
-
-       load_and_xor4 rdx, [rcx+12*16]
-       add rdx,16*4
-       aesdec4 [rcx+11*16]
-       aesdec4 [rcx+10*16]
-       aesdec4 [rcx+9*16]
-       aesdec4 [rcx+8*16]
-       aesdec4 [rcx+7*16]
-       aesdec4 [rcx+6*16]
-       aesdec4 [rcx+5*16]
-       aesdec4 [rcx+4*16]
-       aesdec4 [rcx+3*16]
-       aesdec4 [rcx+2*16]
-       aesdec4 [rcx+1*16]
-       aesdeclast4 [rcx+0*16]
-
-       pxor    xmm0,xmm5
-       movdqu  xmm4,[rdx - 16*4 + 0*16]
-       pxor    xmm1,xmm4
-       movdqu  xmm4,[rdx - 16*4 + 1*16]
-       pxor    xmm2,xmm4
-       movdqu  xmm4,[rdx - 16*4 + 2*16]
-       pxor    xmm3,xmm4
-       movdqu  xmm5,[rdx - 16*4 + 3*16]
-       
-       sub eax,4
-       store4 r8+rdx-(16*4)
-       jmp lp192decfour_CBC
-
-
-       align 16
-lp192decsingle_CBC:
-
-       movdqu xmm0, [rdx]
-       movdqu xmm4,[rcx+12*16]
-       movdqa  xmm1,xmm0
-       pxor xmm0, xmm4
-       aesdec1_u [rcx+11*16]
-       aesdec1_u [rcx+10*16]
-       aesdec1_u [rcx+9*16]
-       aesdec1_u [rcx+8*16]
-       aesdec1_u [rcx+7*16]
-       aesdec1_u [rcx+6*16]
-       aesdec1_u [rcx+5*16]
-       aesdec1_u [rcx+4*16]
-       aesdec1_u [rcx+3*16]
-       aesdec1_u [rcx+2*16]
-       aesdec1_u [rcx+1*16]
-       aesdeclast1_u [rcx+0*16]
-
-       pxor    xmm0,xmm5
-       movdqa  xmm5,xmm1
-       add rdx, 16
-       movdqu  [r8 + rdx - 16], xmm0
-       dec eax
-       jnz lp192decsingle_CBC
-
-end_dec192_CBC:
-
-       mov        r9,[r9+24]
-       movdqu [r9],xmm5
-       add rsp,16*16+8
-       ret
-
-
-
-
-align 16
-global iDec256_CBC
-iDec256_CBC:
-
-       linux_setup
-       sub rsp,16*16+8
-
-       mov r9,rcx
-       mov rax,[rcx+24]
-       movdqu  xmm5,[rax]
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       
-       sub r8,rdx
-
-       test eax,eax
-       jz end_dec256_CBC
-
-       cmp eax,4
-       jl      lp256decsingle_CBC
-
-       test    rcx,0xf
-       jz              lp256decfour_CBC
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       copy_round_keys rsp,rcx,11
-       copy_round_keys rsp,rcx,12
-       copy_round_keys rsp,rcx,13
-       copy_round_keys rsp,rcx,14
-       mov rcx,rsp     
-
-align 16
-lp256decfour_CBC:
-       
-       test eax,eax
-       jz end_dec256_CBC
-
-       cmp eax,4
-       jl      lp256decsingle_CBC
-
-       load_and_xor4 rdx, [rcx+14*16]
-       add rdx,16*4
-       aesdec4 [rcx+13*16]
-       aesdec4 [rcx+12*16]
-       aesdec4 [rcx+11*16]
-       aesdec4 [rcx+10*16]
-       aesdec4 [rcx+9*16]
-       aesdec4 [rcx+8*16]
-       aesdec4 [rcx+7*16]
-       aesdec4 [rcx+6*16]
-       aesdec4 [rcx+5*16]
-       aesdec4 [rcx+4*16]
-       aesdec4 [rcx+3*16]
-       aesdec4 [rcx+2*16]
-       aesdec4 [rcx+1*16]
-       aesdeclast4 [rcx+0*16]
-
-       pxor    xmm0,xmm5
-       movdqu  xmm4,[rdx - 16*4 + 0*16]
-       pxor    xmm1,xmm4
-       movdqu  xmm4,[rdx - 16*4 + 1*16]
-       pxor    xmm2,xmm4
-       movdqu  xmm4,[rdx - 16*4 + 2*16]
-       pxor    xmm3,xmm4
-       movdqu  xmm5,[rdx - 16*4 + 3*16]
-       
-       sub eax,4
-       store4 r8+rdx-(16*4)
-       jmp lp256decfour_CBC
-
-
-       align 16
-lp256decsingle_CBC:
-
-       movdqu xmm0, [rdx]
-       movdqu xmm4,[rcx+14*16]
-       movdqa  xmm1,xmm0
-       pxor xmm0, xmm4
-       aesdec1_u [rcx+13*16]
-       aesdec1_u [rcx+12*16]
-       aesdec1_u [rcx+11*16]
-       aesdec1_u [rcx+10*16]
-       aesdec1_u [rcx+9*16]
-       aesdec1_u [rcx+8*16]
-       aesdec1_u [rcx+7*16]
-       aesdec1_u [rcx+6*16]
-       aesdec1_u [rcx+5*16]
-       aesdec1_u [rcx+4*16]
-       aesdec1_u [rcx+3*16]
-       aesdec1_u [rcx+2*16]
-       aesdec1_u [rcx+1*16]
-       aesdeclast1_u [rcx+0*16]
-
-       pxor    xmm0,xmm5
-       movdqa  xmm5,xmm1
-       add rdx, 16
-       movdqu  [r8 + rdx - 16], xmm0
-       dec eax
-       jnz lp256decsingle_CBC
-
-end_dec256_CBC:
-
-       mov        r9,[r9+24]
-       movdqu [r9],xmm5
-       add rsp,16*16+8
-       ret
-
-
-
-
-
-align 16
-global iDec192
-iDec192:
-
-       linux_setup
-       sub rsp,16*16+8
-
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       sub r8,rdx
-       
-       test eax,eax
-       jz end_dec192
-
-       cmp eax,4
-       jl      lp192decsingle
-       
-       test    rcx,0xf
-       jz              lp192decfour
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       copy_round_keys rsp,rcx,11
-       copy_round_keys rsp,rcx,12
-       mov rcx,rsp     
-
-align 16
-lp192decfour:
-       
-       test eax,eax
-       jz end_dec192
-
-       cmp eax,4
-       jl      lp192decsingle
-
-       load_and_xor4 rdx, [rcx+12*16]
-       add rdx,16*4
-       aesdec4 [rcx+11*16]
-       aesdec4 [rcx+10*16]
-       aesdec4 [rcx+9*16]
-       aesdec4 [rcx+8*16]
-       aesdec4 [rcx+7*16]
-       aesdec4 [rcx+6*16]
-       aesdec4 [rcx+5*16]
-       aesdec4 [rcx+4*16]
-       aesdec4 [rcx+3*16]
-       aesdec4 [rcx+2*16]
-       aesdec4 [rcx+1*16]
-       aesdeclast4 [rcx+0*16]
-       
-       sub eax,4
-       store4 r8+rdx-(16*4)
-       jmp lp192decfour
-
-
-       align 16
-lp192decsingle:
-
-       movdqu xmm0, [rdx]
-       movdqu xmm4,[rcx+12*16]
-       pxor xmm0, xmm4
-       aesdec1_u [rcx+11*16]
-       aesdec1_u [rcx+10*16]
-       aesdec1_u [rcx+9*16]
-       aesdec1_u [rcx+8*16]
-       aesdec1_u [rcx+7*16]
-       aesdec1_u [rcx+6*16]
-       aesdec1_u [rcx+5*16]
-       aesdec1_u [rcx+4*16]
-       aesdec1_u [rcx+3*16]
-       aesdec1_u [rcx+2*16]
-       aesdec1_u [rcx+1*16]
-       aesdeclast1_u [rcx+0*16]
-
-       add rdx, 16
-       movdqu  [r8 + rdx - 16], xmm0
-       dec eax
-       jnz lp192decsingle
-
-end_dec192:
-
-       add rsp,16*16+8
-       ret
-
-
-
-
-align 16
-global iDec256
-iDec256:
-
-       linux_setup
-       sub rsp,16*16+8
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       sub r8,rdx
-
-
-       test eax,eax
-       jz end_dec256
-       
-       cmp eax,4
-       jl lp256dec
-
-       test    rcx,0xf
-       jz              lp256dec4
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       copy_round_keys rsp,rcx,11
-       copy_round_keys rsp,rcx,12
-       copy_round_keys rsp,rcx,13
-       copy_round_keys rsp,rcx,14
-       mov rcx,rsp     
-
-       
-       align 16
-lp256dec4:     
-       test eax,eax
-       jz end_dec256
-       
-       cmp eax,4
-       jl lp256dec
-       
-       load_and_xor4 rdx,[rcx+14*16]
-       add rdx, 4*16
-       aesdec4 [rcx+13*16]
-       aesdec4 [rcx+12*16]
-       aesdec4 [rcx+11*16]
-       aesdec4 [rcx+10*16]
-       aesdec4 [rcx+9*16]
-       aesdec4 [rcx+8*16]
-       aesdec4 [rcx+7*16]
-       aesdec4 [rcx+6*16]
-       aesdec4 [rcx+5*16]
-       aesdec4 [rcx+4*16]
-       aesdec4 [rcx+3*16]
-       aesdec4 [rcx+2*16]
-       aesdec4 [rcx+1*16]
-       aesdeclast4 [rcx+0*16]
-
-       store4 r8+rdx-16*4
-       sub eax,4
-       jmp lp256dec4   
-       
-       align 16
-lp256dec:
-
-       movdqu xmm0, [rdx]
-       movdqu xmm4,[rcx+14*16]
-       add rdx, 16
-       pxor xmm0, xmm4                    ; Round 0 (only xor)
-       aesdec1_u [rcx+13*16]
-       aesdec1_u [rcx+12*16]
-       aesdec1_u [rcx+11*16]
-       aesdec1_u [rcx+10*16]
-       aesdec1_u [rcx+9*16]
-       aesdec1_u [rcx+8*16]
-       aesdec1_u [rcx+7*16]
-       aesdec1_u [rcx+6*16]
-       aesdec1_u [rcx+5*16]
-       aesdec1_u [rcx+4*16]
-       aesdec1_u [rcx+3*16]
-       aesdec1_u [rcx+2*16]
-       aesdec1_u [rcx+1*16]
-       aesdeclast1_u [rcx+0*16]
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [r8+rdx-16], xmm0
-       dec eax
-       jnz lp256dec
-
-end_dec256:
-       
-       add rsp,16*16+8
-       ret
-
-
-
-
-
-
-align 16
-global iEnc128
-iEnc128:
-
-       linux_setup
-       sub rsp,16*16+8
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       sub r8,rdx
-
-
-       test eax,eax
-       jz end_enc128
-       
-       cmp eax,4
-       jl lp128encsingle
-
-       test    rcx,0xf
-       jz              lpenc128four
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       mov rcx,rsp     
-
-
-       align 16        
-       
-lpenc128four:
-       
-       test eax,eax
-       jz end_enc128
-       
-       cmp eax,4
-       jl lp128encsingle
-
-       load_and_xor4 rdx,[rcx+0*16]
-       add rdx,4*16
-       aesenc4 [rcx+1*16]
-       aesenc4 [rcx+2*16]
-       aesenc4 [rcx+3*16]
-       aesenc4 [rcx+4*16]
-       aesenc4 [rcx+5*16]
-       aesenc4 [rcx+6*16]
-       aesenc4 [rcx+7*16]
-       aesenc4 [rcx+8*16]
-       aesenc4 [rcx+9*16]
-       aesenclast4     [rcx+10*16]
-       
-       store4 r8+rdx-16*4
-       sub eax,4
-       jmp lpenc128four
-       
-       align 16
-lp128encsingle:
-
-       movdqu xmm0, [rdx]
-       movdqu xmm4,[rcx+0*16]
-       add rdx, 16
-       pxor xmm0, xmm4
-       aesenc1_u [rcx+1*16]
-       aesenc1_u [rcx+2*16]
-       aesenc1_u [rcx+3*16]
-       aesenc1_u [rcx+4*16]     
-       aesenc1_u [rcx+5*16]
-       aesenc1_u [rcx+6*16]
-       aesenc1_u [rcx+7*16]
-       aesenc1_u [rcx+8*16]
-       aesenc1_u [rcx+9*16]
-       aesenclast1_u [rcx+10*16]
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [r8+rdx-16], xmm0
-       dec eax
-       jnz lp128encsingle
-
-end_enc128:
-
-       add rsp,16*16+8
-       ret
-
-
-align 16
-global iEnc128_CTR
-iEnc128_CTR:
-
-       linux_setup
-
-       mov r9,rcx
-       mov rax,[rcx+24]
-       movdqu xmm5,[rax]
-
-
-       sub rsp,16*16+8
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       sub r8,rdx
-
-
-       test eax,eax
-       jz end_encctr128
-       
-       cmp eax,4
-       jl lp128encctrsingle
-
-       test    rcx,0xf
-       jz              lpencctr128four
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       mov rcx,rsp     
-
-
-       align 16        
-       
-lpencctr128four:
-       
-       test eax,eax
-       jz end_encctr128
-       
-       cmp eax,4
-       jl lp128encctrsingle
-
-       load_and_inc4 [rcx+0*16]
-       add rdx,4*16
-       aesenc4 [rcx+1*16]
-       aesenc4 [rcx+2*16]
-       aesenc4 [rcx+3*16]
-       aesenc4 [rcx+4*16]
-       aesenc4 [rcx+5*16]
-       aesenc4 [rcx+6*16]
-       aesenc4 [rcx+7*16]
-       aesenc4 [rcx+8*16]
-       aesenc4 [rcx+9*16]
-       aesenclast4     [rcx+10*16]
-       xor_with_input4 rdx-(4*16)
-       
-       store4 r8+rdx-16*4
-       sub eax,4
-       jmp lpencctr128four
-       
-       align 16
-lp128encctrsingle:
-
-       movdqa xmm0,xmm5
-       paddq   xmm5,[counter_add_one wrt rip]
-       add rdx, 16
-       movdqu xmm4,[rcx+0*16]
-       pxor xmm0, xmm4
-       aesenc1_u [rcx+1*16]
-       aesenc1_u [rcx+2*16]
-       aesenc1_u [rcx+3*16]
-       aesenc1_u [rcx+4*16]     
-       aesenc1_u [rcx+5*16]
-       aesenc1_u [rcx+6*16]
-       aesenc1_u [rcx+7*16]
-       aesenc1_u [rcx+8*16]
-       aesenc1_u [rcx+9*16]
-       aesenclast1_u [rcx+10*16]
-       movdqu xmm4, [rdx-16]
-       pxor  xmm0,xmm4
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [r8+rdx-16], xmm0
-       dec eax
-       jnz lp128encctrsingle
-
-end_encctr128:
-
-       mov        r9,[r9+24]
-       movdqu [r9],xmm5
-       add rsp,16*16+8
-       ret
-
-
-
-align 16
-global iEnc192_CTR
-iEnc192_CTR:
-
-       linux_setup
-
-       mov r9,rcx
-       mov rax,[rcx+24]
-       movdqu xmm5,[rax]
-
-
-       sub rsp,16*16+8
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       sub r8,rdx
-
-
-       test eax,eax
-       jz end_encctr192
-       
-       cmp eax,4
-       jl lp192encctrsingle
-
-       test    rcx,0xf
-       jz              lpencctr192four
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       copy_round_keys rsp,rcx,11
-       copy_round_keys rsp,rcx,12
-       mov rcx,rsp     
-
-
-       align 16        
-       
-lpencctr192four:
-       
-       test eax,eax
-       jz end_encctr192
-       
-       cmp eax,4
-       jl lp192encctrsingle
-
-       load_and_inc4 [rcx+0*16]
-       add rdx,4*16
-       aesenc4 [rcx+1*16]
-       aesenc4 [rcx+2*16]
-       aesenc4 [rcx+3*16]
-       aesenc4 [rcx+4*16]
-       aesenc4 [rcx+5*16]
-       aesenc4 [rcx+6*16]
-       aesenc4 [rcx+7*16]
-       aesenc4 [rcx+8*16]
-       aesenc4 [rcx+9*16]
-       aesenc4 [rcx+10*16]
-       aesenc4 [rcx+11*16]
-       aesenclast4     [rcx+12*16]
-       xor_with_input4 rdx-(4*16)
-       
-       store4 r8+rdx-16*4
-       sub eax,4
-       jmp lpencctr192four
-       
-       align 16
-lp192encctrsingle:
-
-       movdqa xmm5,xmm0
-       movdqu xmm4,[rcx+0*16]
-       paddq   xmm5,[counter_add_one wrt rip]
-       add rdx, 16
-       pxor xmm0, xmm4
-       aesenc1_u [rcx+1*16]
-       aesenc1_u [rcx+2*16]
-       aesenc1_u [rcx+3*16]
-       aesenc1_u [rcx+4*16]     
-       aesenc1_u [rcx+5*16]
-       aesenc1_u [rcx+6*16]
-       aesenc1_u [rcx+7*16]
-       aesenc1_u [rcx+8*16]
-       aesenc1_u [rcx+9*16]
-       aesenc1_u [rcx+10*16]
-       aesenc1_u [rcx+11*16]
-       aesenclast1_u [rcx+12*16]
-       movdqu xmm4, [rdx]
-       pxor  xmm0,xmm4
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [r8+rdx-16], xmm0
-       dec eax
-       jnz lp192encctrsingle
-
-end_encctr192:
-
-       mov        r9,[r9+24]
-       movdqu [r9],xmm5
-       add rsp,16*16+8
-       ret
-
-
-align 16
-global iEnc256_CTR
-iEnc256_CTR:
-
-       linux_setup
-
-       mov r9,rcx
-       mov rax,[rcx+24]
-       movdqu xmm5,[rax]
-
-
-       sub rsp,16*16+8
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       sub r8,rdx
-
-
-       test eax,eax
-       jz end_encctr256
-       
-       cmp eax,4
-       jl lp256encctrsingle
-
-       test    rcx,0xf
-       jz              lpencctr256four
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       copy_round_keys rsp,rcx,11
-       copy_round_keys rsp,rcx,12
-       copy_round_keys rsp,rcx,13
-       copy_round_keys rsp,rcx,14
-       mov rcx,rsp     
-
-
-       align 16        
-       
-lpencctr256four:
-       
-       test eax,eax
-       jz end_encctr256
-       
-       cmp eax,4
-       jl lp256encctrsingle
-
-       load_and_inc4 [rcx+0*16]
-       add rdx,4*16
-       aesenc4 [rcx+1*16]
-       aesenc4 [rcx+2*16]
-       aesenc4 [rcx+3*16]
-       aesenc4 [rcx+4*16]
-       aesenc4 [rcx+5*16]
-       aesenc4 [rcx+6*16]
-       aesenc4 [rcx+7*16]
-       aesenc4 [rcx+8*16]
-       aesenc4 [rcx+9*16]
-       aesenc4 [rcx+10*16]
-       aesenc4 [rcx+11*16]
-       aesenc4 [rcx+12*16]
-       aesenc4 [rcx+13*16]
-       aesenclast4     [rcx+14*16]
-       xor_with_input4 rdx-(4*16)
-       
-       store4 r8+rdx-16*4
-       sub eax,4
-       jmp lpencctr256four
-       
-       align 16
-lp256encctrsingle:
-
-       movdqa xmm5,xmm0
-       movdqu xmm4,[rcx+0*16]
-       paddq   xmm5,[counter_add_one wrt rip]
-       add rdx, 16
-       pxor xmm0, xmm4
-       aesenc1_u [rcx+1*16]
-       aesenc1_u [rcx+2*16]
-       aesenc1_u [rcx+3*16]
-       aesenc1_u [rcx+4*16]     
-       aesenc1_u [rcx+5*16]
-       aesenc1_u [rcx+6*16]
-       aesenc1_u [rcx+7*16]
-       aesenc1_u [rcx+8*16]
-       aesenc1_u [rcx+9*16]
-       aesenc1_u [rcx+10*16]
-       aesenc1_u [rcx+11*16]
-       aesenc1_u [rcx+12*16]
-       aesenc1_u [rcx+13*16]
-       aesenclast1_u [rcx+14*16]
-       movdqu xmm4, [rdx]
-       pxor  xmm0,xmm4
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [r8+rdx-16], xmm0
-       dec eax
-       jnz lp256encctrsingle
-
-end_encctr256:
-
-       mov        r9,[r9+24]
-       movdqu [r9],xmm5
-       add rsp,16*16+8
-       ret
-
-
-
-
-
-
-
-align 16
-global iEnc128_CBC
-iEnc128_CBC:
-
-       linux_setup
-       sub rsp,16*16+8
-       
-       mov r9,rcx
-       mov rax,[rcx+24]
-       movdqu xmm1,[rax]
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       sub r8,rdx
-
-
-       test    rcx,0xf
-       jz              lp128encsingle_CBC
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       mov rcx,rsp     
-
-
-       align 16        
-       
-lp128encsingle_CBC:
-
-       movdqu xmm0, [rdx]
-       movdqu xmm4,[rcx+0*16]
-       add rdx, 16
-       pxor xmm0, xmm1
-       pxor xmm0, xmm4
-       aesenc1 [rcx+1*16]
-       aesenc1 [rcx+2*16]
-       aesenc1 [rcx+3*16]
-       aesenc1 [rcx+4*16]     
-       aesenc1 [rcx+5*16]
-       aesenc1 [rcx+6*16]
-       aesenc1 [rcx+7*16]
-       aesenc1 [rcx+8*16]
-       aesenc1 [rcx+9*16]
-       aesenclast1 [rcx+10*16]
-       movdqa xmm1,xmm0
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [r8+rdx-16], xmm0
-       dec eax
-       jnz lp128encsingle_CBC
-
-       mov        r9,[r9+24]
-       movdqu [r9],xmm1
-       add rsp,16*16+8
-       ret
-
-
-align 16
-global iEnc192_CBC
-iEnc192_CBC:
-
-       linux_setup
-       sub rsp,16*16+8
-       mov r9,rcx
-       mov rax,[rcx+24]
-       movdqu xmm1,[rax]
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       sub r8,rdx
-
-       test    rcx,0xf
-       jz              lp192encsingle_CBC
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       copy_round_keys rsp,rcx,11
-       copy_round_keys rsp,rcx,12
-       mov rcx,rsp     
-
-
-
-       align 16        
-       
-lp192encsingle_CBC:
-
-       movdqu xmm0, [rdx]
-       movdqu xmm4, [rcx+0*16]
-       add rdx, 16
-       pxor xmm0, xmm1
-       pxor xmm0, xmm4
-       aesenc1 [rcx+1*16]
-       aesenc1 [rcx+2*16]
-       aesenc1 [rcx+3*16]
-       aesenc1 [rcx+4*16]     
-       aesenc1 [rcx+5*16]
-       aesenc1 [rcx+6*16]
-       aesenc1 [rcx+7*16]
-       aesenc1 [rcx+8*16]
-       aesenc1 [rcx+9*16]
-       aesenc1 [rcx+10*16]
-       aesenc1 [rcx+11*16]
-       aesenclast1 [rcx+12*16]
-       movdqa xmm1,xmm0
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [r8+rdx-16], xmm0
-       dec eax
-       jnz lp192encsingle_CBC
-
-       mov        r9,[r9+24]
-       movdqu [r9],xmm1
-
-       add rsp,16*16+8
-       ret
-
-
-align 16
-global iEnc256_CBC
-iEnc256_CBC:
-
-       linux_setup
-       sub rsp,16*16+8
-       
-       mov r9,rcx
-       mov rax,[rcx+24]
-       movdqu xmm1,[rax]
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       sub r8,rdx
-
-       test    rcx,0xf
-       jz              lp256encsingle_CBC
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       copy_round_keys rsp,rcx,11
-       copy_round_keys rsp,rcx,12
-       copy_round_keys rsp,rcx,13
-       copy_round_keys rsp,rcx,14
-       mov rcx,rsp     
-
-       align 16        
-       
-lp256encsingle_CBC:
-
-       movdqu xmm0, [rdx]
-       movdqu xmm4, [rcx+0*16]
-       add rdx, 16
-       pxor xmm0, xmm1
-       pxor xmm0, xmm4
-       aesenc1 [rcx+1*16]
-       aesenc1 [rcx+2*16]
-       aesenc1 [rcx+3*16]
-       aesenc1 [rcx+4*16]     
-       aesenc1 [rcx+5*16]
-       aesenc1 [rcx+6*16]
-       aesenc1 [rcx+7*16]
-       aesenc1 [rcx+8*16]
-       aesenc1 [rcx+9*16]
-       aesenc1 [rcx+10*16]
-       aesenc1 [rcx+11*16]
-       aesenc1 [rcx+12*16]
-       aesenc1 [rcx+13*16]
-       aesenclast1 [rcx+14*16]
-       movdqa xmm1,xmm0
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [r8+rdx-16], xmm0
-       dec eax
-       jnz lp256encsingle_CBC
-
-       mov        r9,[r9+24]
-       movdqu [r9],xmm1
-       add rsp,16*16+8
-       ret
-
-
-
-
-align 16
-global iEnc192
-iEnc192:
-
-       linux_setup
-       sub rsp,16*16+8
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-       
-       sub r8,rdx
-
-       test eax,eax
-       jz end_enc192
-       
-       cmp eax,4
-       jl lp192encsingle
-
-       test    rcx,0xf
-       jz              lpenc192four
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       copy_round_keys rsp,rcx,11
-       copy_round_keys rsp,rcx,12
-       mov rcx,rsp     
-
-
-       align 16        
-       
-lpenc192four:
-       
-       test eax,eax
-       jz end_enc192
-       
-       cmp eax,4
-       jl lp192encsingle
-
-       load_and_xor4 rdx,[rcx+0*16]
-       add rdx,4*16
-       aesenc4 [rcx+1*16]
-       aesenc4 [rcx+2*16]
-       aesenc4 [rcx+3*16]
-       aesenc4 [rcx+4*16]
-       aesenc4 [rcx+5*16]
-       aesenc4 [rcx+6*16]
-       aesenc4 [rcx+7*16]
-       aesenc4 [rcx+8*16]
-       aesenc4 [rcx+9*16]
-       aesenc4 [rcx+10*16]
-       aesenc4 [rcx+11*16]
-       aesenclast4     [rcx+12*16]
-       
-       store4 r8+rdx-16*4
-       sub eax,4
-       jmp lpenc192four
-       
-       align 16
-lp192encsingle:
-
-       movdqu xmm0, [rdx]
-       movdqu xmm4, [rcx+0*16]
-       add rdx, 16
-       pxor xmm0, xmm4
-       aesenc1_u [rcx+1*16]
-       aesenc1_u [rcx+2*16]
-       aesenc1_u [rcx+3*16]
-       aesenc1_u [rcx+4*16]     
-       aesenc1_u [rcx+5*16]
-       aesenc1_u [rcx+6*16]
-       aesenc1_u [rcx+7*16]
-       aesenc1_u [rcx+8*16]
-       aesenc1_u [rcx+9*16]
-       aesenc1_u [rcx+10*16]
-       aesenc1_u [rcx+11*16]
-       aesenclast1_u [rcx+12*16]
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [r8+rdx-16], xmm0
-       dec eax
-       jnz lp192encsingle
-
-end_enc192:
-
-       add rsp,16*16+8
-       ret
-
-
-
-
-
-
-align 16
-global iEnc256
-iEnc256:
-
-       linux_setup
-       sub rsp,16*16+8
-       
-       mov eax,[rcx+32] ; numblocks
-       mov rdx,[rcx]
-       mov r8,[rcx+8]
-       mov rcx,[rcx+16]
-
-       sub r8,rdx      
-
-
-       test eax,eax
-       jz end_enc256
-
-       cmp eax,4
-       jl lp256enc
-
-       test    rcx,0xf
-       jz              lp256enc4
-       
-       copy_round_keys rsp,rcx,0
-       copy_round_keys rsp,rcx,1
-       copy_round_keys rsp,rcx,2
-       copy_round_keys rsp,rcx,3
-       copy_round_keys rsp,rcx,4
-       copy_round_keys rsp,rcx,5
-       copy_round_keys rsp,rcx,6
-       copy_round_keys rsp,rcx,7
-       copy_round_keys rsp,rcx,8
-       copy_round_keys rsp,rcx,9
-       copy_round_keys rsp,rcx,10
-       copy_round_keys rsp,rcx,11
-       copy_round_keys rsp,rcx,12
-       copy_round_keys rsp,rcx,13
-       copy_round_keys rsp,rcx,14
-       mov rcx,rsp     
-
-
-       align 16
-       
-lp256enc4:
-       test eax,eax
-       jz end_enc256
-
-       cmp eax,4
-       jl lp256enc
-
-
-       load_and_xor4 rdx,[rcx+0*16]
-       add rdx, 16*4
-       aesenc4 [rcx+1*16]
-       aesenc4 [rcx+2*16]
-       aesenc4 [rcx+3*16]
-       aesenc4 [rcx+4*16]
-       aesenc4 [rcx+5*16]
-       aesenc4 [rcx+6*16]
-       aesenc4 [rcx+7*16]
-       aesenc4 [rcx+8*16]
-       aesenc4 [rcx+9*16]
-       aesenc4 [rcx+10*16]
-       aesenc4 [rcx+11*16]
-       aesenc4 [rcx+12*16]
-       aesenc4 [rcx+13*16]
-       aesenclast4 [rcx+14*16]
-
-       store4  r8+rdx-16*4
-       sub eax,4
-       jmp lp256enc4
-       
-       align 16
-lp256enc:
-
-       movdqu xmm0, [rdx]
-       movdqu xmm4, [rcx+0*16]
-       add rdx, 16
-       pxor xmm0, xmm4
-       aesenc1_u [rcx+1*16]
-       aesenc1_u [rcx+2*16]
-       aesenc1_u [rcx+3*16]
-       aesenc1_u [rcx+4*16]
-       aesenc1_u [rcx+5*16]
-       aesenc1_u [rcx+6*16]
-       aesenc1_u [rcx+7*16]
-       aesenc1_u [rcx+8*16]
-       aesenc1_u [rcx+9*16]
-       aesenc1_u [rcx+10*16]
-       aesenc1_u [rcx+11*16]
-       aesenc1_u [rcx+12*16]
-       aesenc1_u [rcx+13*16]
-       aesenclast1_u [rcx+14*16]
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [r8+rdx-16], xmm0
-       dec eax
-       jnz lp256enc
-
-end_enc256:
-
-       add rsp,16*16+8
-       ret
diff --git a/lib/accelerated/intel/asm/x86_iaesx86.s 
b/lib/accelerated/intel/asm/x86_iaesx86.s
deleted file mode 100755
index 16fda3b..0000000
--- a/lib/accelerated/intel/asm/x86_iaesx86.s
+++ /dev/null
@@ -1,2183 +0,0 @@
-[bits 32]
-[CPU intelnop]
-
-; Copyright (c) 2010, Intel Corporation
-; All rights reserved.
-; 
-; Redistribution and use in source and binary forms, with or without 
-; modification, are permitted provided that the following conditions are met:
-; 
-;     * Redistributions of source code must retain the above copyright notice, 
-;       this list of conditions and the following disclaimer.
-;     * Redistributions in binary form must reproduce the above copyright 
notice, 
-;       this list of conditions and the following disclaimer in the 
documentation 
-;       and/or other materials provided with the distribution.
-;     * Neither the name of Intel Corporation nor the names of its 
contributors 
-;       may be used to endorse or promote products derived from this software 
-;       without specific prior written permission.
-; 
-; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND 
-; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
IMPLIED 
-; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
DISCLAIMED. 
-; IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY 
DIRECT, 
-; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
(INCLUDING, 
-; BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
USE, 
-; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
OF 
-; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
NEGLIGENCE 
-; OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
-; ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-%macro inversekey 1
-       movdqu  xmm1,%1
-       aesimc  xmm0,xmm1
-       movdqu  %1,xmm0
-%endmacro
-
-
-%macro aesdec4 1
-       movdqa  xmm4,%1
-
-       aesdec  xmm0,xmm4
-       aesdec  xmm1,xmm4
-       aesdec  xmm2,xmm4
-       aesdec  xmm3,xmm4
-
-%endmacro
-
-
-%macro aesdeclast4 1
-       movdqa  xmm4,%1
-
-       aesdeclast      xmm0,xmm4
-       aesdeclast      xmm1,xmm4
-       aesdeclast      xmm2,xmm4
-       aesdeclast      xmm3,xmm4
-
-%endmacro
-
-
-%macro aesenc4 1
-       movdqa  xmm4,%1
-
-       aesenc  xmm0,xmm4
-       aesenc  xmm1,xmm4
-       aesenc  xmm2,xmm4
-       aesenc  xmm3,xmm4
-
-%endmacro
-
-%macro aesenclast4 1
-       movdqa  xmm4,%1
-
-       aesenclast      xmm0,xmm4
-       aesenclast      xmm1,xmm4
-       aesenclast      xmm2,xmm4
-       aesenclast      xmm3,xmm4
-
-%endmacro
-
-
-%macro aesdeclast1 1
-       aesdeclast      xmm0,%1
-%endmacro
-
-%macro aesenclast1 1
-       aesenclast      xmm0,%1
-%endmacro
-
-%macro aesdec1 1
-       aesdec  xmm0,%1
-%endmacro
-
-;abab
-%macro aesenc1 1
-       aesenc  xmm0,%1
-%endmacro
-
-
-%macro aesdeclast1_u 1
-       movdqu xmm4,%1
-       aesdeclast      xmm0,xmm4
-%endmacro
-
-%macro aesenclast1_u 1
-       movdqu xmm4,%1
-       aesenclast      xmm0,xmm4
-%endmacro
-
-%macro aesdec1_u 1
-       movdqu xmm4,%1
-       aesdec  xmm0,xmm4
-%endmacro
-
-%macro aesenc1_u 1
-       movdqu xmm4,%1
-       aesenc  xmm0,xmm4
-%endmacro
-
-
-%macro load_and_xor4 2
-       movdqa  xmm4,%2
-       movdqu  xmm0,[%1 + 0*16]
-       pxor    xmm0,xmm4
-       movdqu  xmm1,[%1 + 1*16]
-       pxor    xmm1,xmm4
-       movdqu  xmm2,[%1 + 2*16]
-       pxor    xmm2,xmm4
-       movdqu  xmm3,[%1 + 3*16]
-       pxor    xmm3,xmm4
-%endmacro
-
-
-%macro load_and_inc4 1
-       movdqa  xmm4,%1
-       movdqa  xmm0,xmm5
-       movdqa  xmm1,xmm5
-       paddq   xmm1,[counter_add_one]
-       movdqa  xmm2,xmm5
-       paddq   xmm2,[counter_add_two]
-       movdqa  xmm3,xmm5
-       paddq   xmm3,[counter_add_three]
-       pxor    xmm0,xmm4
-       paddq   xmm5,[counter_add_four]
-       pxor    xmm1,xmm4
-       pxor    xmm2,xmm4
-       pxor    xmm3,xmm4
-%endmacro
-
-%macro xor_with_input4 1
-       movdqu xmm4,[%1]
-       pxor xmm0,xmm4
-       movdqu xmm4,[%1+16]
-       pxor xmm1,xmm4
-       movdqu xmm4,[%1+32]
-       pxor xmm2,xmm4
-       movdqu xmm4,[%1+48]
-       pxor xmm3,xmm4
-%endmacro
-
-%macro store4 1
-       movdqu [%1 + 0*16],xmm0
-       movdqu [%1 + 1*16],xmm1
-       movdqu [%1 + 2*16],xmm2
-       movdqu [%1 + 3*16],xmm3
-%endmacro
-
-
-%macro copy_round_keys 3
-       movdqu xmm4,[%2 + ((%3)*16)]
-       movdqa [%1 + ((%3)*16)],xmm4
-%endmacro
-
-;abab
-%macro copy_round_keyx 3
-       movdqu xmm4,[%2 + ((%3)*16)]
-       movdqa %1,xmm4
-%endmacro
-
-
-
-%macro key_expansion_1_192 1
-               ;; Assumes the xmm3 includes all zeros at this point. 
-        pshufd xmm2, xmm2, 11111111b        
-        shufps xmm3, xmm1, 00010000b        
-        pxor xmm1, xmm3        
-        shufps xmm3, xmm1, 10001100b
-        pxor xmm1, xmm3        
-               pxor xmm1, xmm2         
-               movdqu [edx+%1], xmm1                   
-%endmacro
-
-; Calculate w10 and w11 using calculated w9 and known w4-w5
-%macro key_expansion_2_192 1                           
-               movdqa xmm5, xmm4
-               pslldq xmm5, 4
-               shufps xmm6, xmm1, 11110000b
-               pxor xmm6, xmm5
-               pxor xmm4, xmm6
-               pshufd xmm7, xmm4, 00001110b 
-               movdqu [edx+%1], xmm7
-%endmacro
-
-
-
-
-
-section .data
-align 16
-shuffle_mask:
-DD 0FFFFFFFFh
-DD 03020100h
-DD 07060504h
-DD 0B0A0908h
-
-align 16
-counter_add_one:
-DD 1
-DD 0
-DD 0
-DD 0
-
-counter_add_two:
-DD 2
-DD 0
-DD 0
-DD 0
-
-counter_add_three:
-DD 3
-DD 0
-DD 0
-DD 0
-
-counter_add_four:
-DD 4
-DD 0
-DD 0
-DD 0
-
-
-section .text
-
-
-
-align 16
-key_expansion256:
-
-    pshufd xmm2, xmm2, 011111111b
-
-    movdqu xmm4, xmm1
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pxor xmm1, xmm2
-
-    movdqu [edx], xmm1
-    add edx, 0x10
-    
-    aeskeygenassist xmm4, xmm1, 0
-    pshufd xmm2, xmm4, 010101010b
-
-    movdqu xmm4, xmm3
-    pshufb xmm4, xmm5
-    pxor xmm3, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm3, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm3, xmm4
-    pxor xmm3, xmm2
-
-    movdqu [edx], xmm3
-    add edx, 0x10
-
-    ret
-
-
-
-align 16
-key_expansion128: 
-    pshufd xmm2, xmm2, 0xFF;
-    movdqu xmm3, xmm1
-    pshufb xmm3, xmm5
-    pxor xmm1, xmm3
-    pshufb xmm3, xmm5
-    pxor xmm1, xmm3
-    pshufb xmm3, xmm5
-    pxor xmm1, xmm3
-    pxor xmm1, xmm2
-
-    ; storing the result in the key schedule array
-    movdqu [edx], xmm1
-    add edx, 0x10                    
-    ret
-
-
-
-align 16
-global _iEncExpandKey128
-_iEncExpandKey128:
-
-       mov ecx,[esp-4+8]               ;input
-       mov edx,[esp-4+12]              ;ctx
-
-        movdqu xmm1, [ecx]    ; loading the key
-
-        movdqu [edx], xmm1
-
-        movdqa xmm5, [shuffle_mask]
-
-        add edx,16
-
-        aeskeygenassist xmm2, xmm1, 0x1     ; Generating round key 1
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x2     ; Generating round key 2
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x4     ; Generating round key 3
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x8     ; Generating round key 4
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x10    ; Generating round key 5
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x20    ; Generating round key 6
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x40    ; Generating round key 7
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x80    ; Generating round key 8
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x1b    ; Generating round key 9
-        call key_expansion128
-        aeskeygenassist xmm2, xmm1, 0x36    ; Generating round key 10
-        call key_expansion128
-
-       ret
-
-
-align 16
-global _iEncExpandKey192
-_iEncExpandKey192:
-
-       mov ecx,[esp-4+8]               ;input
-       mov edx,[esp-4+12]              ;ctx
-
-        movq xmm7, [ecx+16]    ; loading the AES key
-        movq [edx+16], xmm7  ; Storing key in memory where all key expansion 
-        pshufd xmm4, xmm7, 01001111b
-        movdqu xmm1, [ecx]     ; loading the AES key
-        movdqu [edx], xmm1  ; Storing key in memory where all key expansion 
-                       
-        pxor xmm3, xmm3                ; Set xmm3 to be all zeros. Required 
for the key_expansion. 
-        pxor xmm6, xmm6                ; Set xmm3 to be all zeros. Required 
for the key_expansion. 
-
-        aeskeygenassist xmm2, xmm4, 0x1     ; Complete round key 1 and 
generate round key 2 
-        key_expansion_1_192 24
-       key_expansion_2_192 40                          
-
-        aeskeygenassist xmm2, xmm4, 0x2     ; Generate round key 3 and part of 
round key 4
-        key_expansion_1_192 48
-       key_expansion_2_192 64                          
-
-        aeskeygenassist xmm2, xmm4, 0x4     ; Complete round key 4 and 
generate round key 5
-        key_expansion_1_192 72
-       key_expansion_2_192 88
-               
-        aeskeygenassist xmm2, xmm4, 0x8     ; Generate round key 6 and part of 
round key 7
-        key_expansion_1_192 96
-       key_expansion_2_192 112
-               
-        aeskeygenassist xmm2, xmm4, 0x10     ; Complete round key 7 and 
generate round key 8 
-        key_expansion_1_192 120
-       key_expansion_2_192 136                         
-
-        aeskeygenassist xmm2, xmm4, 0x20     ; Generate round key 9 and part 
of round key 10
-        key_expansion_1_192 144
-       key_expansion_2_192 160                         
-
-        aeskeygenassist xmm2, xmm4, 0x40     ; Complete round key 10 and 
generate round key 11
-        key_expansion_1_192 168
-       key_expansion_2_192 184                         
-
-        aeskeygenassist xmm2, xmm4, 0x80     ; Generate round key 12
-        key_expansion_1_192 192
-
-       ret
-
-
-
-
-
-
-align 16
-global _iDecExpandKey128
-_iDecExpandKey128:
-       push DWORD [esp+8]
-       push DWORD [esp+8]
-       
-       call _iEncExpandKey128
-       add esp,8
-
-       mov edx,[esp-4+12]              ;ctx
-       
-       inversekey      [edx + 1*16]
-       inversekey      [edx + 2*16]
-       inversekey      [edx + 3*16]
-       inversekey      [edx + 4*16]
-       inversekey      [edx + 5*16]
-       inversekey      [edx + 6*16]
-       inversekey      [edx + 7*16]
-       inversekey      [edx + 8*16]
-       inversekey      [edx + 9*16]
-
-       ret
-
-
-
-
-align 16
-global _iDecExpandKey192
-_iDecExpandKey192:
-       push DWORD [esp+8]
-       push DWORD [esp+8]
-       
-       call _iEncExpandKey192
-       add esp,8
-
-       mov edx,[esp-4+12]              ;ctx
-       
-       inversekey      [edx + 1*16]
-       inversekey      [edx + 2*16]
-       inversekey      [edx + 3*16]
-       inversekey      [edx + 4*16]
-       inversekey      [edx + 5*16]
-       inversekey      [edx + 6*16]
-       inversekey      [edx + 7*16]
-       inversekey      [edx + 8*16]
-       inversekey      [edx + 9*16]
-       inversekey      [edx + 10*16]
-       inversekey      [edx + 11*16]
-
-       ret
-
-
-
-
-align 16
-global _iDecExpandKey256
-_iDecExpandKey256:
-       push DWORD [esp+8]
-       push DWORD [esp+8]
-       
-       call _iEncExpandKey256
-       add esp, 8
-
-       mov edx, [esp-4+12]             ;expanded key
-       
-       inversekey      [edx + 1*16]
-       inversekey      [edx + 2*16]
-       inversekey      [edx + 3*16]
-       inversekey      [edx + 4*16]
-       inversekey      [edx + 5*16]
-       inversekey      [edx + 6*16]
-       inversekey      [edx + 7*16]
-       inversekey      [edx + 8*16]
-       inversekey      [edx + 9*16]
-       inversekey      [edx + 10*16]
-       inversekey      [edx + 11*16]
-       inversekey      [edx + 12*16]
-       inversekey      [edx + 13*16]
-
-       ret
-       
-
-       
-       
-align 16
-global _iEncExpandKey256
-_iEncExpandKey256:
-       mov ecx, [esp-4+8]              ;input
-       mov edx, [esp-4+12]             ;expanded key
-
-
-    movdqu xmm1, [ecx]    ; loading the key
-    movdqu xmm3, [ecx+16]
-    movdqu [edx], xmm1  ; Storing key in memory where all key schedule will be 
stored
-    movdqu [edx+16], xmm3 
-    
-    add edx,32
-
-    movdqa xmm5, [shuffle_mask]  ; this mask is used by key_expansion
-
-    aeskeygenassist xmm2, xmm3, 0x1     ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x2     ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x4     ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x8     ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x10    ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x20    ; 
-    call key_expansion256
-    aeskeygenassist xmm2, xmm3, 0x40    ; 
-;    call key_expansion256 
-
-    pshufd xmm2, xmm2, 011111111b
-
-    movdqu xmm4, xmm1
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pshufb xmm4, xmm5
-    pxor xmm1, xmm4
-    pxor xmm1, xmm2
-
-    movdqu [edx], xmm1
-
-
-       ret
-       
-       
-       
-       
-       
-
-align 16
-global _iDec128
-_iDec128:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-       
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-
-       sub edi,esi
-       
-       test eax,eax
-       jz end_dec128
-
-       cmp eax,4
-       jl      lp128decsingle
-
-       test    ecx,0xf
-       jz              lp128decfour
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       mov ecx,esp     
-       
-
-align 16
-lp128decfour:
-       
-       test eax,eax
-       jz end_dec128
-
-       cmp eax,4
-       jl      lp128decsingle
-
-       load_and_xor4 esi, [ecx+10*16]
-       add esi,16*4
-       aesdec4 [ecx+9*16]
-       aesdec4 [ecx+8*16]
-       aesdec4 [ecx+7*16]
-       aesdec4 [ecx+6*16]
-       aesdec4 [ecx+5*16]
-       aesdec4 [ecx+4*16]
-       aesdec4 [ecx+3*16]
-       aesdec4 [ecx+2*16]
-       aesdec4 [ecx+1*16]
-       aesdeclast4 [ecx+0*16]
-       
-       sub eax,4
-       store4 esi+edi-(16*4)
-       jmp lp128decfour
-
-
-       align 16
-lp128decsingle:
-
-       movdqu xmm0, [esi]
-       movdqu xmm4,[ecx+10*16]
-       pxor xmm0, xmm4
-       aesdec1_u  [ecx+9*16]
-       aesdec1_u  [ecx+8*16]
-       aesdec1_u  [ecx+7*16]
-       aesdec1_u  [ecx+6*16]
-       aesdec1_u  [ecx+5*16]
-       aesdec1_u  [ecx+4*16]
-       aesdec1_u  [ecx+3*16]
-       aesdec1_u  [ecx+2*16]
-       aesdec1_u  [ecx+1*16]
-       aesdeclast1_u [ecx+0*16]
-
-       add esi, 16
-       movdqu  [edi+esi - 16], xmm0
-       dec eax
-       jnz lp128decsingle
-
-end_dec128:
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-       
-       ret
-
-
-
-align 16
-global _iDec128_CBC
-_iDec128_CBC:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       sub esp,16*16
-       and esp,0xfffffff0
-       
-       mov eax,[ecx+12]
-       movdqu xmm5,[eax]       ;iv
-       
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       
-       sub edi,esi
-
-       test eax,eax
-       jz end_dec128_CBC
-
-       cmp eax,4
-       jl      lp128decsingle_CBC
-
-       test    ecx,0xf
-       jz              lp128decfour_CBC
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       mov ecx,esp     
-
-
-align 16
-lp128decfour_CBC:
-       
-       test eax,eax
-       jz end_dec128_CBC
-
-       cmp eax,4
-       jl      lp128decsingle_CBC
-
-       load_and_xor4 esi, [ecx+10*16]
-       add esi,16*4
-       aesdec4 [ecx+9*16]
-       aesdec4 [ecx+8*16]
-       aesdec4 [ecx+7*16]
-       aesdec4 [ecx+6*16]
-       aesdec4 [ecx+5*16]
-       aesdec4 [ecx+4*16]
-       aesdec4 [ecx+3*16]
-       aesdec4 [ecx+2*16]
-       aesdec4 [ecx+1*16]
-       aesdeclast4 [ecx+0*16]
-       
-       pxor    xmm0,xmm5
-       movdqu  xmm4,[esi- 16*4 + 0*16]
-       pxor    xmm1,xmm4
-       movdqu  xmm4,[esi- 16*4 + 1*16]
-       pxor    xmm2,xmm4
-       movdqu  xmm4,[esi- 16*4 + 2*16]
-       pxor    xmm3,xmm4
-       movdqu  xmm5,[esi- 16*4 + 3*16]
-       
-       sub eax,4
-       store4 esi+edi-(16*4)
-       jmp lp128decfour_CBC
-
-
-       align 16
-lp128decsingle_CBC:
-
-       movdqu xmm0, [esi]
-       movdqa xmm1,xmm0
-       movdqu xmm4,[ecx+10*16]
-       pxor xmm0, xmm4
-       aesdec1_u  [ecx+9*16]
-       aesdec1_u  [ecx+8*16]
-       aesdec1_u  [ecx+7*16]
-       aesdec1_u  [ecx+6*16]
-       aesdec1_u  [ecx+5*16]
-       aesdec1_u  [ecx+4*16]
-       aesdec1_u  [ecx+3*16]
-       aesdec1_u  [ecx+2*16]
-       aesdec1_u  [ecx+1*16]
-       aesdeclast1_u [ecx+0*16]
-       
-       pxor    xmm0,xmm5
-       movdqa  xmm5,xmm1
-       
-       add esi, 16
-       movdqu  [edi+esi - 16], xmm0
-       dec eax
-       jnz lp128decsingle_CBC
-
-end_dec128_CBC:
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-
-       mov ecx,[esp-4+8]   ; first arg
-       mov ecx,[ecx+12]
-       movdqu  [ecx],xmm5 ; store last iv for chaining
-       
-       ret
-
-
-
-
-
-
-align 16
-global _iDec192
-_iDec192:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-       
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       
-       sub edi,esi
-
-       test eax,eax
-       jz end_dec192
-
-       cmp eax,4
-       jl      lp192decsingle
-
-       test    ecx,0xf
-       jz              lp192decfour
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       copy_round_keys esp,ecx,11
-       copy_round_keys esp,ecx,12
-       mov ecx,esp     
-
-
-align 16
-lp192decfour:
-       
-       test eax,eax
-       jz end_dec192
-
-       cmp eax,4
-       jl      lp192decsingle
-
-       load_and_xor4 esi, [ecx+12*16]
-       add esi,16*4
-       aesdec4 [ecx+11*16]
-       aesdec4 [ecx+10*16]
-       aesdec4 [ecx+9*16]
-       aesdec4 [ecx+8*16]
-       aesdec4 [ecx+7*16]
-       aesdec4 [ecx+6*16]
-       aesdec4 [ecx+5*16]
-       aesdec4 [ecx+4*16]
-       aesdec4 [ecx+3*16]
-       aesdec4 [ecx+2*16]
-       aesdec4 [ecx+1*16]
-       aesdeclast4 [ecx+0*16]
-       
-       sub eax,4
-       store4 esi+edi-(16*4)
-       jmp lp192decfour
-
-
-       align 16
-lp192decsingle:
-
-       movdqu xmm0, [esi]
-       movdqu xmm4,[ecx+12*16]
-       pxor xmm0, xmm4
-       aesdec1_u [ecx+11*16]
-       aesdec1_u  [ecx+10*16]
-       aesdec1_u  [ecx+9*16]
-       aesdec1_u  [ecx+8*16]
-       aesdec1_u  [ecx+7*16]
-       aesdec1_u  [ecx+6*16]
-       aesdec1_u  [ecx+5*16]
-       aesdec1_u  [ecx+4*16]
-       aesdec1_u  [ecx+3*16]
-       aesdec1_u  [ecx+2*16]
-       aesdec1_u  [ecx+1*16]
-       aesdeclast1_u  [ecx+0*16]
-
-       add esi, 16
-       movdqu  [edi+esi - 16], xmm0
-       dec eax
-       jnz lp192decsingle
-
-end_dec192:
-
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-       
-       ret
-
-
-align 16
-global _iDec192_CBC
-_iDec192_CBC:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov eax,[ecx+12]
-       movdqu xmm5,[eax]       ;iv
-       
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       
-       sub edi,esi
-
-       test eax,eax
-       jz end_dec192_CBC
-
-       cmp eax,4
-       jl      lp192decsingle_CBC
-
-       test    ecx,0xf
-       jz              lp192decfour_CBC
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       copy_round_keys esp,ecx,11
-       copy_round_keys esp,ecx,12
-       mov ecx,esp     
-
-align 16
-lp192decfour_CBC:
-       
-       test eax,eax
-       jz end_dec192_CBC
-
-       cmp eax,4
-       jl      lp192decsingle_CBC
-
-       load_and_xor4 esi, [ecx+12*16]
-       add esi,16*4
-       aesdec4 [ecx+11*16]
-       aesdec4 [ecx+10*16]
-       aesdec4 [ecx+9*16]
-       aesdec4 [ecx+8*16]
-       aesdec4 [ecx+7*16]
-       aesdec4 [ecx+6*16]
-       aesdec4 [ecx+5*16]
-       aesdec4 [ecx+4*16]
-       aesdec4 [ecx+3*16]
-       aesdec4 [ecx+2*16]
-       aesdec4 [ecx+1*16]
-       aesdeclast4 [ecx+0*16]
-       
-       pxor    xmm0,xmm5
-       movdqu  xmm4,[esi- 16*4 + 0*16]
-       pxor    xmm1,xmm4
-       movdqu  xmm4,[esi- 16*4 + 1*16]
-       pxor    xmm2,xmm4
-       movdqu  xmm4,[esi- 16*4 + 2*16]
-       pxor    xmm3,xmm4
-       movdqu  xmm5,[esi- 16*4 + 3*16]
-       
-       sub eax,4
-       store4 esi+edi-(16*4)
-       jmp lp192decfour_CBC
-
-
-       align 16
-lp192decsingle_CBC:
-
-       movdqu xmm0, [esi]
-       movdqu xmm4,[ecx+12*16]
-       movdqa xmm1,xmm0
-       pxor xmm0, xmm4
-       aesdec1_u [ecx+11*16]
-       aesdec1_u [ecx+10*16]
-       aesdec1_u [ecx+9*16]
-       aesdec1_u [ecx+8*16]
-       aesdec1_u [ecx+7*16]
-       aesdec1_u [ecx+6*16]
-       aesdec1_u [ecx+5*16]
-       aesdec1_u [ecx+4*16]
-       aesdec1_u [ecx+3*16]
-       aesdec1_u [ecx+2*16]
-       aesdec1_u [ecx+1*16]
-       aesdeclast1_u [ecx+0*16]
-       
-       pxor    xmm0,xmm5
-       movdqa  xmm5,xmm1
-       
-       add esi, 16
-       movdqu  [edi+esi - 16], xmm0
-       dec eax
-       jnz lp192decsingle_CBC
-
-end_dec192_CBC:
-
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-
-       mov ecx,[esp-4+8]
-       mov ecx,[ecx+12]
-       movdqu  [ecx],xmm5 ; store last iv for chaining
-       
-       ret
-
-
-
-
-
-align 16
-global _iDec256
-_iDec256:
-       mov ecx, [esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       
-       sub edi,esi
-
-
-       test eax,eax
-       jz end_dec256
-       
-       cmp eax,4
-       jl lp256dec
-
-       test    ecx,0xf
-       jz      lp256dec4
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       copy_round_keys esp,ecx,11
-       copy_round_keys esp,ecx,12
-       copy_round_keys esp,ecx,13
-       copy_round_keys esp,ecx,14
-       mov ecx,esp     
-       
-       align 16
-lp256dec4:
-       test eax,eax
-       jz end_dec256
-       
-       cmp eax,4
-       jl lp256dec
-       
-       load_and_xor4 esi,[ecx+14*16]
-       add esi, 4*16
-       aesdec4 [ecx+13*16]
-       aesdec4 [ecx+12*16]
-       aesdec4 [ecx+11*16]
-       aesdec4 [ecx+10*16]
-       aesdec4 [ecx+9*16]
-       aesdec4 [ecx+8*16]
-       aesdec4 [ecx+7*16]
-       aesdec4 [ecx+6*16]
-       aesdec4 [ecx+5*16]
-       aesdec4 [ecx+4*16]
-       aesdec4 [ecx+3*16]
-       aesdec4 [ecx+2*16]
-       aesdec4 [ecx+1*16]
-       aesdeclast4 [ecx+0*16]
-
-       store4 esi+edi-16*4
-       sub eax,4
-       jmp lp256dec4   
-       
-       align 16
-lp256dec:
-
-       movdqu xmm0, [esi]
-       movdqu xmm4,[ecx+14*16]
-       add esi, 16
-       pxor xmm0, xmm4                     ; Round 0 (only xor)
-       aesdec1_u  [ecx+13*16]
-       aesdec1_u  [ecx+12*16]
-       aesdec1_u  [ecx+11*16]
-       aesdec1_u  [ecx+10*16]
-       aesdec1_u  [ecx+9*16]
-       aesdec1_u  [ecx+8*16]
-       aesdec1_u  [ecx+7*16]
-       aesdec1_u  [ecx+6*16]
-       aesdec1_u  [ecx+5*16]
-       aesdec1_u  [ecx+4*16]
-       aesdec1_u  [ecx+3*16]
-       aesdec1_u  [ecx+2*16]
-       aesdec1_u  [ecx+1*16]
-       aesdeclast1_u  [ecx+0*16]
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [esi+edi-16], xmm0
-       dec eax
-       jnz lp256dec
-
-end_dec256:
-
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-       
-       ret
-
-
-
-
-align 16
-global _iDec256_CBC
-_iDec256_CBC:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov eax,[ecx+12]
-       movdqu xmm5,[eax]       ;iv
-       
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       
-       sub edi,esi
-
-       test eax,eax
-       jz end_dec256_CBC
-
-       cmp eax,4
-       jl      lp256decsingle_CBC
-
-       test    ecx,0xf
-       jz      lp256decfour_CBC
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       copy_round_keys esp,ecx,11
-       copy_round_keys esp,ecx,12
-       copy_round_keys esp,ecx,13
-       copy_round_keys esp,ecx,14
-       mov ecx,esp     
-
-align 16
-lp256decfour_CBC:
-       
-       test eax,eax
-       jz end_dec256_CBC
-
-       cmp eax,4
-       jl      lp256decsingle_CBC
-
-       load_and_xor4 esi, [ecx+14*16]
-       add esi,16*4
-       aesdec4 [ecx+13*16]
-       aesdec4 [ecx+12*16]
-       aesdec4 [ecx+11*16]
-       aesdec4 [ecx+10*16]
-       aesdec4 [ecx+9*16]
-       aesdec4 [ecx+8*16]
-       aesdec4 [ecx+7*16]
-       aesdec4 [ecx+6*16]
-       aesdec4 [ecx+5*16]
-       aesdec4 [ecx+4*16]
-       aesdec4 [ecx+3*16]
-       aesdec4 [ecx+2*16]
-       aesdec4 [ecx+1*16]
-       aesdeclast4 [ecx+0*16]
-       
-       pxor    xmm0,xmm5
-       movdqu  xmm4,[esi- 16*4 + 0*16]
-       pxor    xmm1,xmm4
-       movdqu  xmm4,[esi- 16*4 + 1*16]
-       pxor    xmm2,xmm4
-       movdqu  xmm4,[esi- 16*4 + 2*16]
-       pxor    xmm3,xmm4
-       movdqu  xmm5,[esi- 16*4 + 3*16]
-       
-       sub eax,4
-       store4 esi+edi-(16*4)
-       jmp lp256decfour_CBC
-
-
-       align 16
-lp256decsingle_CBC:
-
-       movdqu xmm0, [esi]
-       movdqa xmm1,xmm0
-       movdqu xmm4, [ecx+14*16]
-       pxor xmm0, xmm4
-       aesdec1_u  [ecx+13*16]
-       aesdec1_u  [ecx+12*16]
-       aesdec1_u  [ecx+11*16]
-       aesdec1_u  [ecx+10*16]
-       aesdec1_u  [ecx+9*16]
-       aesdec1_u  [ecx+8*16]
-       aesdec1_u  [ecx+7*16]
-       aesdec1_u  [ecx+6*16]
-       aesdec1_u  [ecx+5*16]
-       aesdec1_u  [ecx+4*16]
-       aesdec1_u  [ecx+3*16]
-       aesdec1_u  [ecx+2*16]
-       aesdec1_u  [ecx+1*16]
-       aesdeclast1_u  [ecx+0*16]
-       
-       pxor    xmm0,xmm5
-       movdqa  xmm5,xmm1
-       
-       add esi, 16
-       movdqu  [edi+esi - 16], xmm0
-       dec eax
-       jnz lp256decsingle_CBC
-
-end_dec256_CBC:
-
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-
-       mov ecx,[esp-4+8]  ; first arg
-       mov ecx,[ecx+12]
-       movdqu  [ecx],xmm5 ; store last iv for chaining
-       
-       ret
-
-
-
-
-
-
-
-
-
-align 16
-global _iEnc128
-_iEnc128:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       
-       sub edi,esi
-
-       test eax,eax
-       jz end_enc128
-       
-       cmp eax,4
-       jl lp128encsingle
-
-       test    ecx,0xf
-       jz              lpenc128four
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       mov ecx,esp     
-
-
-       align 16        
-       
-lpenc128four:
-       
-       test eax,eax
-       jz end_enc128
-       
-       cmp eax,4
-       jl lp128encsingle
-
-       load_and_xor4 esi,[ecx+0*16]
-       add esi,4*16
-       aesenc4 [ecx+1*16]
-       aesenc4 [ecx+2*16]
-       aesenc4 [ecx+3*16]
-       aesenc4 [ecx+4*16]
-       aesenc4 [ecx+5*16]
-       aesenc4 [ecx+6*16]
-       aesenc4 [ecx+7*16]
-       aesenc4 [ecx+8*16]
-       aesenc4 [ecx+9*16]
-       aesenclast4     [ecx+10*16]
-       
-       store4 esi+edi-16*4
-       sub eax,4
-       jmp lpenc128four
-       
-       align 16
-lp128encsingle:
-
-       movdqu xmm0, [esi]
-       add esi, 16
-       movdqu xmm4,[ecx+0*16]
-       pxor xmm0, xmm4
-       aesenc1_u  [ecx+1*16]
-       aesenc1_u  [ecx+2*16]
-       aesenc1_u  [ecx+3*16]
-       aesenc1_u  [ecx+4*16]     
-       aesenc1_u  [ecx+5*16]
-       aesenc1_u  [ecx+6*16]
-       aesenc1_u  [ecx+7*16]
-       aesenc1_u  [ecx+8*16]
-       aesenc1_u  [ecx+9*16]
-       aesenclast1_u  [ecx+10*16]
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [esi+edi-16], xmm0
-       dec eax
-       jnz lp128encsingle
-
-end_enc128:
-
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-       
-       ret
-
-
-align 16
-global _iEnc128_CTR
-_iEnc128_CTR:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov     eax,[ecx+12]
-       movdqu xmm5,[eax]       ;initial counter
-
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       
-       sub edi,esi
-
-       test eax,eax
-       jz end_encctr128
-       
-       cmp eax,4
-       jl lp128encctrsingle
-
-       test    ecx,0xf
-       jz              lpencctr128four
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       mov ecx,esp     
-
-
-       align 16        
-       
-lpencctr128four:
-       
-       test eax,eax
-       jz end_encctr128
-       
-       cmp eax,4
-       jl lp128encsingle
-
-       load_and_inc4 [ecx+0*16]
-       add esi,4*16
-       aesenc4 [ecx+1*16]
-       aesenc4 [ecx+2*16]
-       aesenc4 [ecx+3*16]
-       aesenc4 [ecx+4*16]
-       aesenc4 [ecx+5*16]
-       aesenc4 [ecx+6*16]
-       aesenc4 [ecx+7*16]
-       aesenc4 [ecx+8*16]
-       aesenc4 [ecx+9*16]
-       aesenclast4     [ecx+10*16]
-       xor_with_input4 esi-(4*16)
-       
-       store4 esi+edi-16*4
-       sub eax,4
-       jmp lpencctr128four
-       
-       align 16
-lp128encctrsingle:
-
-       movdqa  xmm0,xmm5
-       paddq   xmm5,[counter_add_one]
-       add esi, 16
-       movdqu xmm4,[ecx+0*16]
-       pxor xmm0, xmm4
-       aesenc1_u [ecx+1*16]
-       aesenc1_u [ecx+2*16]
-       aesenc1_u [ecx+3*16]
-       aesenc1_u [ecx+4*16]     
-       aesenc1_u [ecx+5*16]
-       aesenc1_u [ecx+6*16]
-       aesenc1_u [ecx+7*16]
-       aesenc1_u [ecx+8*16]
-       aesenc1_u [ecx+9*16]
-       aesenclast1_u [ecx+10*16]
-       movdqu xmm4, [esi-16]
-       pxor    xmm0,xmm4
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [esi+edi-16], xmm0
-       dec eax
-       jnz lp128encctrsingle
-
-end_encctr128:
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-
-       mov ecx,[esp-4+8]  ; first arg
-       mov ecx,[ecx+12]
-       movdqu  [ecx],xmm5 ; store last counter for chaining
-       
-       ret
-
-
-align 16
-global _iEnc192_CTR
-_iEnc192_CTR:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov     eax,[ecx+12]
-       movdqu xmm5,[eax]       ;initial counter
-
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       
-       sub edi,esi
-
-       test eax,eax
-       jz end_encctr192
-       
-       cmp eax,4
-       jl lp192encctrsingle
-
-       test    ecx,0xf
-       jz              lpencctr128four
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       copy_round_keys esp,ecx,11
-       copy_round_keys esp,ecx,12
-       mov ecx,esp     
-
-
-       align 16        
-       
-lpencctr192four:
-       
-       test eax,eax
-       jz end_encctr192
-       
-       cmp eax,4
-       jl lp192encsingle
-
-       load_and_inc4 [ecx+0*16]
-       add esi,4*16
-       aesenc4 [ecx+1*16]
-       aesenc4 [ecx+2*16]
-       aesenc4 [ecx+3*16]
-       aesenc4 [ecx+4*16]
-       aesenc4 [ecx+5*16]
-       aesenc4 [ecx+6*16]
-       aesenc4 [ecx+7*16]
-       aesenc4 [ecx+8*16]
-       aesenc4 [ecx+9*16]
-       aesenc4 [ecx+10*16]
-       aesenc4 [ecx+11*16]
-       aesenclast4     [ecx+12*16]
-       xor_with_input4 esi-(4*16)
-       
-       store4 esi+edi-16*4
-       sub eax,4
-       jmp lpencctr192four
-       
-       align 16
-lp192encctrsingle:
-
-       movdqa  xmm0,xmm5
-       paddq   xmm5,[counter_add_one]
-       add esi, 16
-       movdqu xmm4,[ecx+0*16]
-       pxor xmm0, xmm4
-       aesenc1_u  [ecx+1*16]
-       aesenc1_u  [ecx+2*16]
-       aesenc1_u  [ecx+3*16]
-       aesenc1_u  [ecx+4*16]     
-       aesenc1_u  [ecx+5*16]
-       aesenc1_u  [ecx+6*16]
-       aesenc1_u  [ecx+7*16]
-       aesenc1_u  [ecx+8*16]
-       aesenc1_u  [ecx+9*16]
-       aesenc1_u  [ecx+10*16]
-       aesenc1_u  [ecx+11*16]
-       aesenclast1_u  [ecx+12*16]
-       movdqu xmm4, [esi-16]
-       pxor    xmm0,xmm4
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [esi+edi-16], xmm0
-       dec eax
-       jnz lp192encctrsingle
-
-end_encctr192:
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-
-       mov ecx,[esp-4+8]  ; first arg
-       mov ecx,[ecx+12]
-       movdqu  [ecx],xmm5 ; store last counter for chaining
-       
-       ret
-
-
-align 16
-global _iEnc256_CTR
-_iEnc256_CTR:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov     eax,[ecx+12]
-       movdqu xmm5,[eax]       ;initial counter
-
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       
-       sub edi,esi
-
-       test eax,eax
-       jz end_encctr256
-       
-       cmp eax,4
-       jl lp256encctrsingle
-
-       test    ecx,0xf
-       jz              lpencctr128four
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       copy_round_keys esp,ecx,11
-       copy_round_keys esp,ecx,12
-       copy_round_keys esp,ecx,13
-       copy_round_keys esp,ecx,14
-       mov ecx,esp     
-
-
-       align 16        
-       
-lpencctr256four:
-       
-       test eax,eax
-       jz end_encctr256
-       
-       cmp eax,4
-       jl lp256encctrsingle
-
-       load_and_inc4 [ecx+0*16]
-       add esi,4*16
-       aesenc4 [ecx+1*16]
-       aesenc4 [ecx+2*16]
-       aesenc4 [ecx+3*16]
-       aesenc4 [ecx+4*16]
-       aesenc4 [ecx+5*16]
-       aesenc4 [ecx+6*16]
-       aesenc4 [ecx+7*16]
-       aesenc4 [ecx+8*16]
-       aesenc4 [ecx+9*16]
-       aesenc4 [ecx+10*16]
-       aesenc4 [ecx+11*16]
-       aesenc4 [ecx+12*16]
-       aesenc4 [ecx+13*16]
-       aesenclast4     [ecx+14*16]
-       xor_with_input4 esi-(4*16)
-       
-       store4 esi+edi-16*4
-       sub eax,4
-       jmp lpencctr256four
-       
-       align 16
-       
-lp256encctrsingle:
-
-       movdqa  xmm0,xmm5
-       paddq   xmm5,[counter_add_one]
-       add esi, 16
-       movdqu xmm4,[ecx+0*16]
-       pxor xmm0, xmm4
-       aesenc1_u  [ecx+1*16]
-       aesenc1_u  [ecx+2*16]
-       aesenc1_u  [ecx+3*16]
-       aesenc1_u  [ecx+4*16]     
-       aesenc1_u  [ecx+5*16]
-       aesenc1_u  [ecx+6*16]
-       aesenc1_u  [ecx+7*16]
-       aesenc1_u  [ecx+8*16]
-       aesenc1_u  [ecx+9*16]
-       aesenc1_u  [ecx+10*16]
-       aesenc1_u  [ecx+11*16]
-       aesenc1_u  [ecx+12*16]
-       aesenc1_u  [ecx+13*16]
-       aesenclast1_u  [ecx+14*16]
-       movdqu xmm4, [esi-16]
-       pxor    xmm0,xmm4
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [esi+edi-16], xmm0
-       dec eax
-       jnz lp256encctrsingle
-
-end_encctr256:
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-
-       mov ecx,[esp-4+8]  ; first arg
-       mov ecx,[ecx+12]
-       movdqu  [ecx],xmm5 ; store last counter for chaining
-       
-       ret
-
-
-
-
-
-
-align 16
-global _iEnc128_CBC
-_iEnc128_CBC:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov     eax,[ecx+12]
-       movdqu xmm1,[eax]       ;iv     
-       
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       sub edi,esi
-
-       test    ecx,0xf
-       jz              lp128encsingle_CBC
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       mov ecx,esp     
-
-       align 16        
-       
-lp128encsingle_CBC:
-
-       movdqu xmm0, [esi]
-       add esi, 16
-       pxor xmm0, xmm1
-       movdqu xmm4,[ecx+0*16]
-       pxor xmm0, xmm4
-       aesenc1  [ecx+1*16]
-       aesenc1  [ecx+2*16]
-       aesenc1  [ecx+3*16]
-       aesenc1  [ecx+4*16]     
-       aesenc1  [ecx+5*16]
-       aesenc1  [ecx+6*16]
-       aesenc1  [ecx+7*16]
-       aesenc1  [ecx+8*16]
-       aesenc1  [ecx+9*16]
-       aesenclast1  [ecx+10*16]
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [esi+edi-16], xmm0
-       movdqa xmm1,xmm0
-       dec eax
-       jnz lp128encsingle_CBC
-
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-       mov ecx,[esp-4+8]  ; first arg
-       mov ecx,[ecx+12]
-       movdqu  [ecx],xmm1 ; store last iv for chaining
-       
-       ret
-
-
-align 16
-global _iEnc192_CBC
-_iEnc192_CBC:
-       mov ecx,[esp-4+8]  ; first arg
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov     eax,[ecx+12]
-       movdqu xmm1,[eax]       ;iv     
-       
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       sub edi,esi
-
-       test    ecx,0xf
-       jz              lp192encsingle_CBC
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       copy_round_keys esp,ecx,11
-       copy_round_keys esp,ecx,12
-       mov ecx,esp     
-
-       align 16        
-       
-lp192encsingle_CBC:
-
-       movdqu xmm0, [esi]
-       add esi, 16
-       pxor xmm0, xmm1
-       movdqu xmm4,[ecx+0*16]
-       pxor xmm0, xmm4
-       aesenc1  [ecx+1*16]
-       aesenc1  [ecx+2*16]
-       aesenc1  [ecx+3*16]
-       aesenc1  [ecx+4*16]     
-       aesenc1  [ecx+5*16]
-       aesenc1  [ecx+6*16]
-       aesenc1  [ecx+7*16]
-       aesenc1  [ecx+8*16]
-       aesenc1  [ecx+9*16]
-       aesenc1  [ecx+10*16]
-       aesenc1  [ecx+11*16]
-       aesenclast1  [ecx+12*16]
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [esi+edi-16], xmm0
-       movdqa xmm1,xmm0
-       dec eax
-       jnz lp192encsingle_CBC
-
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-       mov ecx,[esp-4+8]  ; first arg
-       mov ecx,[ecx+12]
-       movdqu  [ecx],xmm1 ; store last iv for chaining
-       
-       ret
-
-align 16
-global _iEnc256_CBC
-_iEnc256_CBC:
-       mov ecx,[esp-4+8]  ; first arg
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov     eax,[ecx+12]
-       movdqu xmm1,[eax]       ;iv     
-       
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       sub edi,esi
-
-       test    ecx,0xf
-       jz              lp256encsingle_CBC
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       copy_round_keys esp,ecx,11
-       copy_round_keys esp,ecx,12
-       copy_round_keys esp,ecx,13
-       copy_round_keys esp,ecx,14
-       mov ecx,esp     
-
-       align 16        
-       
-lp256encsingle_CBC:
-
-;abab
-       movdqu xmm0, [esi]
-       add esi, 16
-       pxor xmm0, xmm1
-       movdqu xmm4,[ecx+0*16]
-       pxor xmm0, xmm4
-       aesenc1 [ecx+1*16]
-       aesenc1 [ecx+2*16]
-       aesenc1 [ecx+3*16]
-       aesenc1 [ecx+4*16]     
-       aesenc1 [ecx+5*16]
-       aesenc1 [ecx+6*16]
-       aesenc1 [ecx+7*16]
-       aesenc1 [ecx+8*16]
-       aesenc1 [ecx+9*16]
-       aesenc1 [ecx+10*16]
-       aesenc1 [ecx+11*16]
-       aesenc1 [ecx+12*16]
-       aesenc1 [ecx+13*16]
-       aesenclast1 [ecx+14*16]
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [esi+edi-16], xmm0
-       movdqa xmm1,xmm0
-       dec eax
-       jnz lp256encsingle_CBC
-
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-       mov ecx,[esp-4+8]
-       mov ecx,[ecx+12]
-       movdqu  [ecx],xmm1 ; store last iv for chaining
-       
-       ret
-
-
-
-
-
-align 16
-global _iEnc192
-_iEnc192:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       
-       sub edi,esi
-
-       test eax,eax
-       jz end_enc192
-       
-       cmp eax,4
-       jl lp192encsingle
-
-       test    ecx,0xf
-       jz              lpenc192four
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       copy_round_keys esp,ecx,11
-       copy_round_keys esp,ecx,12
-       mov ecx,esp     
-
-       align 16        
-       
-lpenc192four:
-       
-       test eax,eax
-       jz end_enc192
-       
-       cmp eax,4
-       jl lp192encsingle
-
-       load_and_xor4 esi,[ecx+0*16]
-       add esi,4*16
-       aesenc4 [ecx+1*16]
-       aesenc4 [ecx+2*16]
-       aesenc4 [ecx+3*16]
-       aesenc4 [ecx+4*16]
-       aesenc4 [ecx+5*16]
-       aesenc4 [ecx+6*16]
-       aesenc4 [ecx+7*16]
-       aesenc4 [ecx+8*16]
-       aesenc4 [ecx+9*16]
-       aesenc4 [ecx+10*16]
-       aesenc4 [ecx+11*16]
-       aesenclast4     [ecx+12*16]
-       
-       store4 esi+edi-16*4
-       sub eax,4
-       jmp lpenc192four
-       
-       align 16
-lp192encsingle:
-
-       movdqu xmm0, [esi]
-       add esi, 16
-       movdqu xmm4,[ecx+0*16]
-       pxor xmm0, xmm4
-       aesenc1_u [ecx+1*16]
-       aesenc1_u [ecx+2*16]
-       aesenc1_u [ecx+3*16]
-       aesenc1_u [ecx+4*16]     
-       aesenc1_u [ecx+5*16]
-       aesenc1_u [ecx+6*16]
-       aesenc1_u [ecx+7*16]
-       aesenc1_u [ecx+8*16]
-       aesenc1_u [ecx+9*16]
-       aesenc1_u [ecx+10*16]
-       aesenc1_u [ecx+11*16]
-       aesenclast1_u [ecx+12*16]
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [esi+edi-16], xmm0
-       dec eax
-       jnz lp192encsingle
-
-end_enc192:
-
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-       
-       ret
-
-
-
-
-align 16
-global _iEnc256
-_iEnc256:
-       mov ecx,[esp-4+8]
-       
-       push esi
-       push edi
-       push ebp
-       mov ebp,esp
-       
-       sub esp,16*16
-       and esp,0xfffffff0
-
-       mov eax,[ecx+16] ; numblocks
-       mov esi,[ecx]
-       mov edi,[ecx+4]
-       mov ecx,[ecx+8]
-       
-       sub edi,esi     
-
-       test eax,eax
-       jz end_enc256
-
-       cmp eax,4
-       jl lp256enc
-
-       test    ecx,0xf
-       jz      lp256enc4
-       
-       copy_round_keys esp,ecx,0
-       copy_round_keys esp,ecx,1
-       copy_round_keys esp,ecx,2
-       copy_round_keys esp,ecx,3
-       copy_round_keys esp,ecx,4
-       copy_round_keys esp,ecx,5
-       copy_round_keys esp,ecx,6
-       copy_round_keys esp,ecx,7
-       copy_round_keys esp,ecx,8
-       copy_round_keys esp,ecx,9
-       copy_round_keys esp,ecx,10
-       copy_round_keys esp,ecx,11
-       copy_round_keys esp,ecx,12
-       copy_round_keys esp,ecx,13
-       copy_round_keys esp,ecx,14
-       mov ecx,esp     
-
-
-
-       align 16
-       
-lp256enc4:
-       test eax,eax
-       jz end_enc256
-
-       cmp eax,4
-       jl lp256enc
-
-
-       load_and_xor4 esi,[ecx+0*16]
-       add esi, 16*4
-       aesenc4 [ecx+1*16]
-       aesenc4 [ecx+2*16]
-       aesenc4 [ecx+3*16]
-       aesenc4 [ecx+4*16]
-       aesenc4 [ecx+5*16]
-       aesenc4 [ecx+6*16]
-       aesenc4 [ecx+7*16]
-       aesenc4 [ecx+8*16]
-       aesenc4 [ecx+9*16]
-       aesenc4 [ecx+10*16]
-       aesenc4 [ecx+11*16]
-       aesenc4 [ecx+12*16]
-       aesenc4 [ecx+13*16]
-       aesenclast4 [ecx+14*16]
-
-       store4  esi+edi-16*4
-       sub eax,4
-       jmp lp256enc4
-       
-       align 16
-lp256enc:
-
-       movdqu xmm0, [esi]
-       add esi, 16
-       movdqu xmm4,[ecx+0*16]
-       pxor xmm0, xmm4
-       aesenc1_u [ecx+1*16]
-       aesenc1_u [ecx+2*16]
-       aesenc1_u [ecx+3*16]
-       aesenc1_u [ecx+4*16]
-       aesenc1_u [ecx+5*16]
-       aesenc1_u [ecx+6*16]
-       aesenc1_u [ecx+7*16]
-       aesenc1_u [ecx+8*16]
-       aesenc1_u [ecx+9*16]
-       aesenc1_u [ecx+10*16]
-       aesenc1_u [ecx+11*16]
-       aesenc1_u [ecx+12*16]
-       aesenc1_u [ecx+13*16]
-       aesenclast1_u [ecx+14*16]
-
-               ; Store output encrypted data into CIPHERTEXT array
-       movdqu  [esi+edi-16], xmm0
-       dec eax
-       jnz lp256enc
-
-end_enc256:
-
-
-       mov esp,ebp
-       pop ebp
-       pop edi
-       pop esi
-       
-       ret
diff --git a/lib/accelerated/intel/iaes_asm_interface.h 
b/lib/accelerated/intel/iaes_asm_interface.h
deleted file mode 100755
index 093d2a5..0000000
--- a/lib/accelerated/intel/iaes_asm_interface.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/* 
- * Copyright (c) 2010, Intel Corporation
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without 
- * modification, are permitted provided that the following conditions are met:
- * 
- *     * Redistributions of source code must retain the above copyright 
notice, 
- *       this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright 
notice, 
- *       this list of conditions and the following disclaimer in the 
documentation 
- *       and/or other materials provided with the distribution.
- *     * Neither the name of Intel Corporation nor the names of its 
contributors 
- *       may be used to endorse or promote products derived from this software 
- *       without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND 
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
IMPLIED 
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
DISCLAIMED. 
- * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY 
DIRECT, 
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
(INCLUDING, 
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
USE, 
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
THEORY OF 
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
NEGLIGENCE 
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
-*/
-
-#ifndef _INTEL_AES_ASM_INTERFACE_H__
-#define _INTEL_AES_ASM_INTERFACE_H__
-
-
-#include "iaesni.h"
-
-
-
-//structure to pass aes processing data to asm level functions
-typedef struct _sAesData
-{
-       _AES_IN         UCHAR   *in_block;
-       _AES_OUT        UCHAR   *out_block;
-       _AES_IN         UCHAR   *expanded_key;          
-       _AES_INOUT      UCHAR   *iv;                                    // for 
CBC mode
-       _AES_IN         size_t  num_blocks;
-} sAesData;
-
-#if (__cplusplus)
-extern "C"
-{
-#endif
-#if 0
-#define MYSTDCALL __stdcall
-#else
-#define MYSTDCALL 
-#endif
-
-#ifdef __linux__
-#ifndef __LP64__
-#define iEncExpandKey256 _iEncExpandKey256
-#define iEncExpandKey192 _iEncExpandKey192
-#define iEncExpandKey128 _iEncExpandKey128
-#define iDecExpandKey256 _iDecExpandKey256
-#define iDecExpandKey192 _iDecExpandKey192
-#define iDecExpandKey128 _iDecExpandKey128
-#define iEnc128 _iEnc128
-#define iDec128 _iDec128
-#define iEnc256 _iEnc256
-#define iDec256 _iDec256
-#define iEnc192 _iEnc192
-#define iDec192 _iDec192
-#define iEnc128_CBC _iEnc128_CBC
-#define iDec128_CBC _iDec128_CBC
-#define iEnc256_CBC _iEnc256_CBC
-#define iDec256_CBC _iDec256_CBC
-#define iEnc192_CBC _iEnc192_CBC
-#define iDec192_CBC _iDec192_CBC
-#define iEnc128_CTR _iEnc128_CTR
-#define iEnc192_CTR _iEnc192_CTR
-#define iEnc256_CTR _iEnc256_CTR
-#define do_rdtsc    _do_rdtsc
-#endif
-#endif
-       // prepearing the different key rounds, for enc/dec in asm
-       // expnaded key should be 16-byte aligned
-       // expanded key should have enough space to hold all key rounds (16 
bytes per round) - 256 bytes would cover all cases (AES256 has 14 rounds + 1 
xor)
-       void MYSTDCALL iEncExpandKey256(_AES_IN UCHAR *key, _AES_OUT UCHAR 
*expanded_key);
-       void MYSTDCALL iEncExpandKey192(_AES_IN UCHAR *key, _AES_OUT UCHAR 
*expanded_key);
-       void MYSTDCALL iEncExpandKey128(_AES_IN UCHAR *key, _AES_OUT UCHAR 
*expanded_key);
-
-       void MYSTDCALL iDecExpandKey256(UCHAR *key, _AES_OUT UCHAR 
*expanded_key);
-       void MYSTDCALL iDecExpandKey192(UCHAR *key, _AES_OUT UCHAR 
*expanded_key);
-       void MYSTDCALL iDecExpandKey128(UCHAR *key, _AES_OUT UCHAR 
*expanded_key);
-
-
-       //enc/dec asm functions
-       void MYSTDCALL iEnc128(sAesData *data);
-       void MYSTDCALL iDec128(sAesData *data);
-       void MYSTDCALL iEnc256(sAesData *data);
-       void MYSTDCALL iDec256(sAesData *data);
-       void MYSTDCALL iEnc192(sAesData *data);
-       void MYSTDCALL iDec192(sAesData *data);
-
-       void MYSTDCALL iEnc128_CBC(sAesData *data);
-       void MYSTDCALL iDec128_CBC(sAesData *data);
-       void MYSTDCALL iEnc256_CBC(sAesData *data);
-       void MYSTDCALL iDec256_CBC(sAesData *data);
-       void MYSTDCALL iEnc192_CBC(sAesData *data);
-       void MYSTDCALL iDec192_CBC(sAesData *data);
-
-
-       void MYSTDCALL iEnc128_CTR(sAesData *data);
-       void MYSTDCALL iEnc256_CTR(sAesData *data);
-       void MYSTDCALL iEnc192_CTR(sAesData *data);
-
-       // rdtsc function
-       unsigned long long do_rdtsc(void);
-
-
-#if (__cplusplus)
-}
-#endif
-
-
-#endif
-
diff --git a/lib/accelerated/intel/iaesni.h b/lib/accelerated/intel/iaesni.h
deleted file mode 100755
index b358faa..0000000
--- a/lib/accelerated/intel/iaesni.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/* 
- * Copyright (c) 2010, Intel Corporation
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without 
- * modification, are permitted provided that the following conditions are met:
- * 
- *     * Redistributions of source code must retain the above copyright 
notice, 
- *       this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright 
notice, 
- *       this list of conditions and the following disclaimer in the 
documentation 
- *       and/or other materials provided with the distribution.
- *     * Neither the name of Intel Corporation nor the names of its 
contributors 
- *       may be used to endorse or promote products derived from this software 
- *       without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND 
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
IMPLIED 
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
DISCLAIMED. 
- * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY 
DIRECT, 
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
(INCLUDING, 
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
USE, 
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
THEORY OF 
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
NEGLIGENCE 
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
-*/
-
-
-#ifndef _IAESNI_H__
-#define _IAESNI_H__
-
-#include <stdlib.h>
-
-#define AES_INSTRCTIONS_CPUID_BIT (1<<25)
-
-//indicates input param
-#define _AES_IN        
-
-//indicates output param
-#define _AES_OUT
-
-//indicates input/output param - based on context
-#define _AES_INOUT
-
-typedef unsigned char UCHAR;
-
-
-#ifndef bool
-#define bool BOOL
-#endif
-//test if the processor actually supports the above functions
-//executing one the functions below without processor support will cause UD 
fault
-//bool check_for_aes_instructions(void);
-#if (__cplusplus)
-extern "C" {
-#endif
-int check_for_aes_instructions(void);
-
-#define ROUND_KEYS_UNALIGNED_TESTING
-
-#ifdef __linux__
-
-#ifdef ROUND_KEYS_UNALIGNED_TESTING
-
-#define DEFINE_ROUND_KEYS \
-       UCHAR __attribute__ ((aligned (16))) _expandedKey[16*16];       \
-       UCHAR *expandedKey = _expandedKey + 4;  \
-
-
-#else
-
-
-
-#define DEFINE_ROUND_KEYS \
-       UCHAR __attribute__ ((aligned (16))) _expandedKey[16*16];       \
-       UCHAR *expandedKey = _expandedKey;      \
-
-#endif
-
-#else // if not __linux__
-
-#ifdef ROUND_KEYS_UNALIGNED_TESTING
-
-#define DEFINE_ROUND_KEYS \
-       __declspec(align(16)) UCHAR _expandedKey[16*16];        \
-       UCHAR *expandedKey = _expandedKey + 4;  \
-
-
-#else
-
-
-
-#define DEFINE_ROUND_KEYS \
-       __declspec(align(16)) UCHAR _expandedKey[16*16];        \
-       UCHAR *expandedKey = _expandedKey;      \
-
-
-#endif
-
-#endif
-
-
-
-// encryption functions
-// plainText is pointer to input stream
-// cipherText is pointer to buffer to be filled with encrypted (cipher text) 
data
-// key is pointer to enc key (sizes are 16 bytes for AES-128, 24 bytes for 
AES-192, 32 for AES-256)
-// numBlocks is number of 16 bytes blocks to process - note that encryption is 
done of full 16 byte blocks
-void intel_AES_enc128(_AES_IN UCHAR *plainText, _AES_OUT UCHAR *cipherText, 
_AES_IN UCHAR *key, _AES_IN size_t numBlocks);
-void intel_AES_enc192(_AES_IN UCHAR *plainText, _AES_OUT UCHAR *cipherText, 
_AES_IN UCHAR *key, _AES_IN size_t numBlocks);
-void intel_AES_enc256(_AES_IN UCHAR *plainText, _AES_OUT UCHAR *cipherText, 
_AES_IN UCHAR *key, _AES_IN size_t numBlocks);
-
-
-void intel_AES_enc128_CBC(_AES_IN UCHAR *plainText, _AES_OUT UCHAR 
*cipherText, _AES_IN UCHAR *key, _AES_IN size_t numBlocks, _AES_IN UCHAR *iv);
-void intel_AES_enc192_CBC(_AES_IN UCHAR *plainText, _AES_OUT UCHAR 
*cipherText, _AES_IN UCHAR *key, _AES_IN size_t numBlocks, _AES_IN UCHAR *iv);
-void intel_AES_enc256_CBC(_AES_IN UCHAR *plainText, _AES_OUT UCHAR 
*cipherText, _AES_IN UCHAR *key, _AES_IN size_t numBlocks, _AES_IN UCHAR *iv);
-
-
-// encryption functions
-// cipherText is pointer to encrypted stream
-// plainText is pointer to buffer to be filled with original (plain text) data
-// key is pointer to enc key (sizes are 16 bytes for AES-128, 24 bytes for 
AES-192, 32 for AES-256)
-// numBlocks is number of 16 bytes blocks to process - note that decryption is 
done of full 16 byte blocks
-void intel_AES_dec128(_AES_IN UCHAR *cipherText, _AES_OUT UCHAR *plainText, 
_AES_IN UCHAR *key, _AES_IN size_t numBlocks);
-void intel_AES_dec192(_AES_IN UCHAR *cipherText, _AES_OUT UCHAR *plainText, 
_AES_IN UCHAR *key, _AES_IN size_t numBlocks);
-void intel_AES_dec256(_AES_IN UCHAR *cipherText, _AES_OUT UCHAR *plainText, 
_AES_IN UCHAR *key, _AES_IN size_t numBlocks);
-
-void intel_AES_dec128_CBC(_AES_IN UCHAR *cipherText, _AES_OUT UCHAR 
*plainText, _AES_IN UCHAR *key, _AES_IN size_t numBlocks, _AES_IN UCHAR *iv);
-void intel_AES_dec192_CBC(_AES_IN UCHAR *cipherText, _AES_OUT UCHAR 
*plainText, _AES_IN UCHAR *key, _AES_IN size_t numBlocks, _AES_IN UCHAR *iv);
-void intel_AES_dec256_CBC(_AES_IN UCHAR *cipherText, _AES_OUT UCHAR 
*plainText, _AES_IN UCHAR *key, _AES_IN size_t numBlocks, _AES_IN UCHAR *iv);
-
-void intel_AES_encdec128_CTR(_AES_IN UCHAR *input, _AES_OUT UCHAR *output, 
_AES_IN UCHAR *key, _AES_IN size_t numBlocks, _AES_IN UCHAR *initial_counter);
-void intel_AES_encdec192_CTR(_AES_IN UCHAR *input, _AES_OUT UCHAR *output, 
_AES_IN UCHAR *key, _AES_IN size_t numBlocks, _AES_IN UCHAR *initial_counter);
-void intel_AES_encdec256_CTR(_AES_IN UCHAR *input, _AES_OUT UCHAR *output, 
_AES_IN UCHAR *key, _AES_IN size_t numBlocks, _AES_IN UCHAR *initial_counter);
-
-
-#if (__cplusplus)
-}
-#endif
-
-
-#endif
-
-
-
diff --git a/lib/accelerated/intel/license.txt 
b/lib/accelerated/intel/license.txt
index a93c323..c87ba42 100755
--- a/lib/accelerated/intel/license.txt
+++ b/lib/accelerated/intel/license.txt
@@ -1,34 +1,43 @@
-/* intel_aes_lib source files come from Intel.
- * Modified by Patrick Fay
- *
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright 
notice, 
-      this list of conditions and the following disclaimer in the 
documentation 
-      and/or other materials provided with the distribution.
-    * Neither the name of Intel Corporation nor the names of its contributors 
-      may be used to endorse or promote products derived from this software 
-      without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND 
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
DISCLAIMED. 
-IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY 
DIRECT, 
-INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
OF 
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
NEGLIGENCE 
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
-ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- ---------------------------------------------------------------------------
- Issue Date: Aug 6, 2010
- */
-
-Other source code files use the license shown in the source code file.
+====================================================================
+Written by Andy Polyakov <address@hidden> for the OpenSSL
+project. The module is, however, dual licensed under OpenSSL and
+CRYPTOGAMS licenses depending on where you obtain it. For further
+details see http://www.openssl.org/~appro/cryptogams/.
+====================================================================
+
+Copyright (c) 2006, CRYPTOGAMS by <address@hidden>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+      *        Redistributions of source code must retain copyright notices,
+       this list of conditions and the following disclaimer.
+
+      *        Redistributions in binary form must reproduce the above
+       copyright notice, this list of conditions and the following
+       disclaimer in the documentation and/or other materials
+       provided with the distribution.
+
+      *        Neither the name of the CRYPTOGAMS nor the names of its
+       copyright holder and contributors may be used to endorse or
+       promote products derived from this software without specific
+       prior written permission.
+
+ALTERNATIVELY, provided that this notice is retained in full, this
+product may be distributed under the terms of the GNU General Public
+License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+those given above.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


hooks/post-receive
-- 
GNU gnutls



reply via email to

[Prev in Thread] Current Thread [Next in Thread]