qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 16/25] tcg/i386: Handle ctz and clz opcodes


From: Richard Henderson
Subject: [Qemu-devel] [PATCH 16/25] tcg/i386: Handle ctz and clz opcodes
Date: Wed, 16 Nov 2016 20:25:26 +0100

Signed-off-by: Richard Henderson <address@hidden>
---
 tcg/i386/tcg-target.h     |  8 ++---
 tcg/i386/tcg-target.inc.c | 83 ++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 78 insertions(+), 13 deletions(-)

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index f2d9955..8fff287 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -93,8 +93,8 @@ extern bool have_bmi1;
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_clz_i32          0
-#define TCG_TARGET_HAS_ctz_i32          0
+#define TCG_TARGET_HAS_clz_i32          1
+#define TCG_TARGET_HAS_ctz_i32          1
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract_i32      1
 #define TCG_TARGET_HAS_sextract_i32     1
@@ -127,8 +127,8 @@ extern bool have_bmi1;
 #define TCG_TARGET_HAS_eqv_i64          0
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_clz_i64          0
-#define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_clz_i64          1
+#define TCG_TARGET_HAS_ctz_i64          1
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract_i64      1
 #define TCG_TARGET_HAS_sextract_i64     0
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 39f62bd..3eeb58f 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -92,6 +92,7 @@ static const int tcg_target_call_oarg_regs[] = {
 #define TCG_CT_CONST_S32 0x100
 #define TCG_CT_CONST_U32 0x200
 #define TCG_CT_CONST_I32 0x400
+#define TCG_CT_CONST_WSZ 0x800
 
 /* Registers used with L constraint, which are the first argument 
    registers on x86_64, and two random call clobbered registers on
@@ -225,6 +226,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, 
const char **pct_str)
         } else {
             goto case_c;
         }
+    case 'W':
+        /* With TZCNT/LZCNT, we can have operand-size as an input.  */
+        if (have_bmi1) {
+            ct->ct |= TCG_CT_CONST_WSZ;
+        }
+        break;
 
         /* qemu_ld/st address constraint */
     case 'L':
@@ -273,6 +280,9 @@ static inline int tcg_target_const_match(tcg_target_long 
val, TCGType type,
     if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
         return 1;
     }
+    if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
+        return 1;
+    }
     return 0;
 }
 
@@ -306,6 +316,8 @@ static inline int tcg_target_const_match(tcg_target_long 
val, TCGType type,
 #define OPC_ARITH_GvEv (0x03)          /* ... plus (ARITH_FOO << 3) */
 #define OPC_ANDN        (0xf2 | P_EXT38)
 #define OPC_ADD_GvEv   (OPC_ARITH_GvEv | (ARITH_ADD << 3))
+#define OPC_BSF         (0xbc | P_EXT)
+#define OPC_BSR         (0xbd | P_EXT)
 #define OPC_BSWAP      (0xc8 | P_EXT)
 #define OPC_CALL_Jz    (0xe8)
 #define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */
@@ -320,6 +332,7 @@ static inline int tcg_target_const_match(tcg_target_long 
val, TCGType type,
 #define OPC_JMP_long   (0xe9)
 #define OPC_JMP_short  (0xeb)
 #define OPC_LEA         (0x8d)
+#define OPC_LZCNT       (0xbd | P_EXT | P_SIMDF3)
 #define OPC_MOVB_EvGv  (0x88)          /* stores, more or less */
 #define OPC_MOVL_EvGv  (0x89)          /* stores, more or less */
 #define OPC_MOVL_GvEv  (0x8b)          /* loads, more or less */
@@ -346,6 +359,7 @@ static inline int tcg_target_const_match(tcg_target_long 
val, TCGType type,
 #define OPC_SHLX        (0xf7 | P_EXT38 | P_DATA16)
 #define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
 #define OPC_TESTL      (0x85)
+#define OPC_TZCNT       (0xbc | P_EXT | P_SIMDF3)
 #define OPC_XCHG_ax_r32        (0x90)
 
 #define OPC_GRP3_Ev    (0xf7)
@@ -431,6 +445,11 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int 
rm, int x)
     if (opc & P_ADDR32) {
         tcg_out8(s, 0x67);
     }
+    if (opc & P_SIMDF3) {
+        tcg_out8(s, 0xf3);
+    } else if (opc & P_SIMDF2) {
+        tcg_out8(s, 0xf2);
+    }
 
     rex = 0;
     rex |= (opc & P_REXW) ? 0x8 : 0x0;  /* REX.W */
@@ -465,6 +484,11 @@ static void tcg_out_opc(TCGContext *s, int opc)
     if (opc & P_DATA16) {
         tcg_out8(s, 0x66);
     }
+    if (opc & P_SIMDF3) {
+        tcg_out8(s, 0xf3);
+    } else if (opc & P_SIMDF2) {
+        tcg_out8(s, 0xf2);
+    }
     if (opc & (P_EXT | P_EXT38)) {
         tcg_out8(s, 0x0f);
         if (opc & P_EXT38) {
@@ -1093,13 +1117,11 @@ static void tcg_out_setcond2(TCGContext *s, const 
TCGArg *args,
 }
 #endif
 
-static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
-                              TCGArg c1, TCGArg c2, int const_c2,
-                              TCGArg v1)
+static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
+                         TCGReg dest, TCGReg v1)
 {
-    tcg_out_cmp(s, c1, c2, const_c2, 0);
     if (have_cmov) {
-        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
     } else {
         TCGLabel *over = gen_new_label();
         tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
@@ -1108,13 +1130,21 @@ static void tcg_out_movcond32(TCGContext *s, TCGCond 
cond, TCGArg dest,
     }
 }
 
+static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest,
+                              TCGReg c1, TCGArg c2, int const_c2,
+                              TCGReg v1)
+{
+    tcg_out_cmp(s, c1, c2, const_c2, 0);
+    tcg_out_cmov(s, cond, 0, dest, v1);
+}
+
 #if TCG_TARGET_REG_BITS == 64
-static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
-                              TCGArg c1, TCGArg c2, int const_c2,
-                              TCGArg v1)
+static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest,
+                              TCGReg c1, TCGArg c2, int const_c2,
+                              TCGReg v1)
 {
     tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
-    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
+    tcg_out_cmov(s, cond, P_REXW, dest, v1);
 }
 #endif
 
@@ -1993,6 +2023,37 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
         }
         break;
 
+    OP_32_64(ctz):
+        if (const_args[2]) {
+            tcg_debug_assert(have_bmi1);
+            tcg_debug_assert(args[2] == (rexw ? 64 : 32));
+            tcg_out_modrm(s, OPC_TZCNT + rexw, args[0], args[1]);
+        } else {
+            /* ??? The manual says that the output is undefined when the
+               input is zero, but real hardware leaves it unchanged.  As
+               noted in target-i386/translate.c, real programs depend on
+               this -- now we are one more of those.  */
+            tcg_out_modrm(s, OPC_BSF + rexw, args[0], args[1]);
+            if (args[0] != args[2]) {
+                tcg_out_cmov(s, TCG_COND_EQ, rexw, args[0], args[2]);
+            }
+        }
+        break;
+
+    OP_32_64(clz):
+        if (const_args[2]) {
+            tcg_debug_assert(have_bmi1);
+            tcg_debug_assert(args[2] == (rexw ? 64 : 32));
+            tcg_out_modrm(s, OPC_LZCNT + rexw, args[0], args[1]);
+        } else {
+            /* ??? See above.  */
+            tcg_out_modrm(s, OPC_BSR + rexw, args[0], args[1]);
+            if (args[0] != args[2]) {
+                tcg_out_cmov(s, TCG_COND_EQ, rexw, args[0], args[2]);
+            }
+        }
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
                          arg_label(args[3]), 0);
@@ -2220,6 +2281,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_sar_i32, { "r", "0", "Ci" } },
     { INDEX_op_rotl_i32, { "r", "0", "ci" } },
     { INDEX_op_rotr_i32, { "r", "0", "ci" } },
+    { INDEX_op_clz_i32, { "r", "r", "rW" } },
+    { INDEX_op_ctz_i32, { "r", "r", "rW" } },
 
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
@@ -2281,6 +2344,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_sar_i64, { "r", "0", "Ci" } },
     { INDEX_op_rotl_i64, { "r", "0", "ci" } },
     { INDEX_op_rotr_i64, { "r", "0", "ci" } },
+    { INDEX_op_clz_i64, { "r", "r", "rW" } },
+    { INDEX_op_ctz_i64, { "r", "r", "rW" } },
 
     { INDEX_op_brcond_i64, { "r", "re" } },
     { INDEX_op_setcond_i64, { "r", "r", "re" } },
-- 
2.7.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]