qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 26/28] target-ppc: Altivec 2.07: AES Instructions


From: Tom Musta
Subject: [Qemu-devel] [PATCH 26/28] target-ppc: Altivec 2.07: AES Instructions
Date: Wed, 12 Feb 2014 15:23:17 -0600

This patch adds the Vector AES instructions introduced in Power ISA
Version 2.07:

   - Vector AES Cipher (vcipher)
   - Vector AES Cipher Last (vcipherlast)
   - Vector AES Inverse Cipher (vncipher)
   - Vector AES Inverse Cipher Last (vncipherlast)
   - Vector AES SubBytes (vsbox)

Note that the implementation of vncipher deviates from the RTL in
ISA V2.07.  However it does match the verbal description in the
third paragraph.  The RTL will be fixed in ISA V2.07B.  The
implementation here has been tested against actual P8 hardware.

Signed-off-by: Tom Musta <address@hidden>
---
 target-ppc/helper.h     |    6 +
 target-ppc/int_helper.c |  280 +++++++++++++++++++++++++++++++++++++++++++++++
 target-ppc/translate.c  |   29 +++++
 3 files changed, 315 insertions(+), 0 deletions(-)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index ef6aa58..93e549e 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -316,6 +316,12 @@ DEF_HELPER_3(vpmsumh, void, avr, avr, avr)
 DEF_HELPER_3(vpmsumw, void, avr, avr, avr)
 DEF_HELPER_3(vpmsumd, void, avr, avr, avr)
 
+DEF_HELPER_2(vsbox, void, avr, avr)
+DEF_HELPER_3(vcipher, void, avr, avr, avr)
+DEF_HELPER_3(vcipherlast, void, avr, avr, avr)
+DEF_HELPER_3(vncipher, void, avr, avr, avr)
+DEF_HELPER_3(vncipherlast, void, avr, avr, avr)
+
 DEF_HELPER_4(bcdadd, i32, avr, avr, avr, i32)
 DEF_HELPER_4(bcdsub, i32, avr, avr, avr, i32)
 
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index ce7c6a0..cd04e8a 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -2338,6 +2338,286 @@ uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, 
ppc_avr_t *b, uint32_t ps)
     return helper_bcdadd(r, a, &bcopy, ps);
 }
 
+static uint8_t SBOX[256] = {
+0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
+0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
+0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
+0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
+0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
+0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
+0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
+0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
+0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
+0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
+0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
+0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16,
+};
+
+static void SubBytes(ppc_avr_t *r, ppc_avr_t *a)
+{
+    int i;
+    VECTOR_FOR_INORDER_I(i, u8) {
+        r->u8[i] = SBOX[a->u8[i]];
+    }
+}
+
+static uint8_t InvSBOX[256] = {
+0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38,
+0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
+0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
+0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D,
+0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2,
+0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
+0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
+0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
+0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A,
+0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
+0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
+0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA,
+0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85,
+0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
+0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
+0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20,
+0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31,
+0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
+0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
+0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0,
+0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26,
+0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D,
+};
+
+static void InvSubBytes(ppc_avr_t *r, ppc_avr_t *a)
+{
+    int i;
+    VECTOR_FOR_INORDER_I(i, u8) {
+        r->u8[i] = InvSBOX[a->u8[i]];
+    }
+}
+
+static uint8_t ROTL8(uint8_t x, int n)
+{
+    return (x << n) | (x >> (8-n));
+}
+
+static inline int BIT8(uint8_t x, int n)
+{
+    return (x & (0x80 >> n)) != 0;
+}
+
+static uint8_t GFx02(uint8_t x)
+{
+    return ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0);
+}
+
+static uint8_t GFx03(uint8_t x)
+{
+    return x ^ ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0);
+}
+
+static uint8_t GFx09(uint8_t x)
+{
+    uint8_t term2 = ROTL8(x, 3);
+    uint8_t term3 = (BIT8(x, 0) ? 0x68 : 0) | (BIT8(x, 1) ? 0x14 : 0) |
+                    (BIT8(x, 2) ? 0x02 : 0);
+    uint8_t term4 = (BIT8(x, 1) ? 0x20 : 0) | (BIT8(x, 2) ? 0x18 : 0);
+    return x ^ term2 ^ term3 ^ term4;
+}
+
+static uint8_t GFx0B(uint8_t x)
+{
+    uint8_t term2 = ROTL8(x, 1);
+    uint8_t term3 = (x << 3) | (BIT8(x, 0) ? 0x06 : 0) |
+                    (BIT8(x, 2) ? 0x01 : 0);
+    uint8_t term4 = (BIT8(x, 0) ? 0x70 : 0) | (BIT8(x, 1) ? 0x06 : 0) |
+                    (BIT8(x, 2) ? 0x08 : 0);
+    uint8_t term5 = (BIT8(x, 1) ? 0x30 : 0) | (BIT8(x, 2) ? 0x02 : 0);
+    uint8_t term6 = BIT8(x, 2) ? 0x10 : 0;
+    return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6;
+}
+
+static uint8_t GFx0D(uint8_t x)
+{
+    uint8_t term2 = ROTL8(x, 2);
+    uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) |
+                    (BIT8(x, 2) ? 0x03 : 0);
+    uint8_t term4 = (BIT8(x, 0) ? 0x58 : 0) | (BIT8(x, 1) ? 0x20 : 0);
+    uint8_t term5 = (BIT8(x, 1) ? 0x08 : 0) | (BIT8(x, 2) ? 0x10 : 0);
+    uint8_t term6 = BIT8(x, 2) ? 0x08 : 0;
+    return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6;
+}
+
+static uint8_t GFx0E(uint8_t x)
+{
+    uint8_t term1 = ROTL8(x, 1);
+    uint8_t term2 = (x << 2) | (BIT8(x, 2) ? 0x02 : 0) |
+                    (BIT8(x, 1) ? 0x01 : 0);
+    uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) |
+                    (BIT8(x, 2) ? 0x01 : 0);
+    uint8_t term4 = (BIT8(x, 0) ? 0x40 : 0) | (BIT8(x, 1) ? 0x28 : 0) |
+                    (BIT8(x, 2) ? 0x10 : 0);
+    uint8_t term5 = (BIT8(x, 2) ? 0x08 : 0);
+    return term1 ^ term2 ^ term3 ^ term4 ^ term5;
+}
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define MCB(x, i, b) ((x)->u8[(i)*4 + (b)])
+#else
+#define MCB(x, i, b) ((x)->u8[15 - ((i)*4 + (b))])
+#endif
+
+static void MixColumns(ppc_avr_t *r, ppc_avr_t *x)
+{
+    int i;
+    for (i = 0; i < 4; i++) {
+        MCB(r, i, 0) = GFx02(MCB(x, i, 0)) ^ GFx03(MCB(x, i, 1)) ^
+                       MCB(x, i, 2) ^ MCB(x, i, 3);
+        MCB(r, i, 1) = MCB(x, i, 0) ^ GFx02(MCB(x, i, 1)) ^
+                       GFx03(MCB(x, i, 2)) ^ MCB(x, i, 3);
+        MCB(r, i, 2) = MCB(x, i, 0) ^ MCB(x, i, 1) ^
+                       GFx02(MCB(x, i, 2)) ^ GFx03(MCB(x, i, 3));
+        MCB(r, i, 3) = GFx03(MCB(x, i, 0)) ^ MCB(x, i, 1) ^
+                       MCB(x, i, 2) ^ GFx02(MCB(x, i, 3));
+    }
+}
+
+static void InvMixColumns(ppc_avr_t *r, ppc_avr_t *x)
+{
+    int i;
+    for (i = 0; i < 4; i++) {
+        MCB(r, i, 0) = GFx0E(MCB(x, i, 0)) ^ GFx0B(MCB(x, i, 1)) ^
+                       GFx0D(MCB(x, i, 2)) ^ GFx09(MCB(x, i, 3));
+        MCB(r, i, 1) = GFx09(MCB(x, i, 0)) ^ GFx0E(MCB(x, i, 1)) ^
+                       GFx0B(MCB(x, i, 2)) ^ GFx0D(MCB(x, i, 3));
+        MCB(r, i, 2) = GFx0D(MCB(x, i, 0)) ^ GFx09(MCB(x, i, 1)) ^
+                       GFx0E(MCB(x, i, 2)) ^ GFx0B(MCB(x, i, 3));
+        MCB(r, i, 3) = GFx0B(MCB(x, i, 0)) ^ GFx0D(MCB(x, i, 1)) ^
+                       GFx09(MCB(x, i, 2)) ^ GFx0E(MCB(x, i, 3));
+    }
+}
+
+static void ShiftRows(ppc_avr_t *r, ppc_avr_t *x)
+{
+    MCB(r, 0, 0) = MCB(x, 0, 0);
+    MCB(r, 1, 0) = MCB(x, 1, 0);
+    MCB(r, 2, 0) = MCB(x, 2, 0);
+    MCB(r, 3, 0) = MCB(x, 3, 0);
+
+    MCB(r, 0, 1) = MCB(x, 1, 1);
+    MCB(r, 1, 1) = MCB(x, 2, 1);
+    MCB(r, 2, 1) = MCB(x, 3, 1);
+    MCB(r, 3, 1) = MCB(x, 0, 1);
+
+    MCB(r, 0, 2) = MCB(x, 2, 2);
+    MCB(r, 1, 2) = MCB(x, 3, 2);
+    MCB(r, 2, 2) = MCB(x, 0, 2);
+    MCB(r, 3, 2) = MCB(x, 1, 2);
+
+    MCB(r, 0, 3) = MCB(x, 3, 3);
+    MCB(r, 1, 3) = MCB(x, 0, 3);
+    MCB(r, 2, 3) = MCB(x, 1, 3);
+    MCB(r, 3, 3) = MCB(x, 2, 3);
+}
+
+static void InvShiftRows(ppc_avr_t *r, ppc_avr_t *x)
+{
+    MCB(r, 0, 0) = MCB(x, 0, 0);
+    MCB(r, 1, 0) = MCB(x, 1, 0);
+    MCB(r, 2, 0) = MCB(x, 2, 0);
+    MCB(r, 3, 0) = MCB(x, 3, 0);
+
+    MCB(r, 0, 1) = MCB(x, 3, 1);
+    MCB(r, 1, 1) = MCB(x, 0, 1);
+    MCB(r, 2, 1) = MCB(x, 1, 1);
+    MCB(r, 3, 1) = MCB(x, 2, 1);
+
+    MCB(r, 0, 2) = MCB(x, 2, 2);
+    MCB(r, 1, 2) = MCB(x, 3, 2);
+    MCB(r, 2, 2) = MCB(x, 0, 2);
+    MCB(r, 3, 2) = MCB(x, 1, 2);
+
+    MCB(r, 0, 3) = MCB(x, 1, 3);
+    MCB(r, 1, 3) = MCB(x, 2, 3);
+    MCB(r, 2, 3) = MCB(x, 3, 3);
+    MCB(r, 3, 3) = MCB(x, 0, 3);
+}
+
+#undef MCB
+
+void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
+{
+    SubBytes(r, a);
+}
+
+void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    ppc_avr_t vtemp1, vtemp2, vtemp3;
+    SubBytes(&vtemp1, a);
+    ShiftRows(&vtemp2, &vtemp1);
+    MixColumns(&vtemp3, &vtemp2);
+    r->u64[0] = vtemp3.u64[0] ^ b->u64[0];
+    r->u64[1] = vtemp3.u64[1] ^ b->u64[1];
+}
+
+void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    ppc_avr_t vtemp1, vtemp2;
+    SubBytes(&vtemp1, a);
+    ShiftRows(&vtemp2, &vtemp1);
+    r->u64[0] = vtemp2.u64[0] ^ b->u64[0];
+    r->u64[1] = vtemp2.u64[1] ^ b->u64[1];
+}
+
+void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    /* This differs from what is written in ISA V2.07.  The RTL is */
+    /* incorrect and will be fixed in V2.07B.                      */
+    ppc_avr_t vtemp1, vtemp2, vtemp3;
+    InvShiftRows(&vtemp1, a);
+    InvSubBytes(&vtemp2, &vtemp1);
+    vtemp3.u64[0] = vtemp2.u64[0] ^ b->u64[0];
+    vtemp3.u64[1] = vtemp2.u64[1] ^ b->u64[1];
+    InvMixColumns(r, &vtemp3);
+}
+
+void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    ppc_avr_t vtemp1, vtemp2;
+    InvShiftRows(&vtemp1, a);
+    InvSubBytes(&vtemp2, &vtemp1);
+    r->u64[0] = vtemp2.u64[0] ^ b->u64[0];
+    r->u64[1] = vtemp2.u64[1] ^ b->u64[1];
+}
+
 #undef VECTOR_FOR_INORDER_I
 #undef HI_IDX
 #undef LO_IDX
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 9186bb6..fe98367 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7412,6 +7412,30 @@ GEN_VXFORM_DUAL(vsubuhm, PPC_ALTIVEC, PPC_NONE, \
 GEN_VXFORM_DUAL(vsubuhs, PPC_ALTIVEC, PPC_NONE, \
                 bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
 
+static void gen_vsbox(DisasContext *ctx)
+{
+    TCGv_ptr ra, rd;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    ra = gen_avr_ptr(rA(ctx->opcode));
+    rd = gen_avr_ptr(rD(ctx->opcode));
+    gen_helper_vsbox(rd, ra);
+    tcg_temp_free_ptr(ra);
+    tcg_temp_free_ptr(rd);
+}
+
+GEN_VXFORM(vcipher, 4, 20)
+GEN_VXFORM(vcipherlast, 4, 20)
+GEN_VXFORM(vncipher, 4, 21)
+GEN_VXFORM(vncipherlast, 4, 21)
+
+GEN_VXFORM_DUAL(vcipher, PPC_NONE, PPC2_ALTIVEC_207,
+                vcipherlast, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vncipher, PPC_NONE, PPC2_ALTIVEC_207,
+                vncipherlast, PPC_NONE, PPC2_ALTIVEC_207)
+
 /***                           VSX extension                               ***/
 
 static inline TCGv_i64 cpu_vsrh(int n)
@@ -10642,6 +10666,11 @@ GEN_VXFORM_207(vpmsumh, 4, 17),
 GEN_VXFORM_207(vpmsumw, 4, 18),
 GEN_VXFORM_207(vpmsumd, 4, 19),
 
+GEN_VXFORM_207(vsbox, 4, 23),
+
+GEN_VXFORM_DUAL(vcipher, vcipherlast, 4, 20, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL(vncipher, vncipherlast, 4, 21, PPC_NONE, PPC2_ALTIVEC_207),
+
 GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(lxsiwzx, 0x1F, 0x0C, 0x00, 0, PPC_NONE, PPC2_VSX207),
-- 
1.7.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]