qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 4/5] tcg/i386: Use ANDN instruction


From: Richard Henderson
Subject: [Qemu-devel] [PATCH 4/5] tcg/i386: Use ANDN instruction
Date: Fri, 31 Jan 2014 08:43:37 -0600

Note that the optimizer cannot simplify ANDC X,Y,C to AND X,Y,~C
so we must handle constants in the implementation of andc.

Signed-off-by: Richard Henderson <address@hidden>
---
 tcg/i386/tcg-target.c | 52 ++++++++++++++++++++++++++++++++++++++++-----------
 tcg/i386/tcg-target.h |  6 ++++--
 2 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 00dbc3b..4f6b9c1 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -91,6 +91,7 @@ static const int tcg_target_call_oarg_regs[] = {
 /* Constants we accept.  */
 #define TCG_CT_CONST_S32 0x100
 #define TCG_CT_CONST_U32 0x200
+#define TCG_CT_CONST_I32 0x400
 
 /* Registers used with L constraint, which are the first argument 
    registers on x86_64, and two random call clobbered registers on
@@ -128,6 +129,10 @@ static bool have_movbe;
 # define have_movbe 0
 #endif
 
+/* We need this symbol in tcg-target.h, and we can't properly conditionalize
+   it there.  Therefore we always define the variable.  */
+bool have_bmi1;
+
 static uint8_t *tb_ret_addr;
 
 static void patch_reloc(uint8_t *code_ptr, int type,
@@ -224,6 +229,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, 
const char **pct_str)
     case 'Z':
         ct->ct |= TCG_CT_CONST_U32;
         break;
+    case 'I':
+        ct->ct |= TCG_CT_CONST_I32;
+        break;
 
     default:
         return -1;
@@ -247,6 +255,9 @@ static inline int tcg_target_const_match(tcg_target_long 
val,
     if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
         return 1;
     }
+    if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
+        return 1;
+    }
     return 0;
 }
 
@@ -276,6 +287,7 @@ static inline int tcg_target_const_match(tcg_target_long 
val,
 #define OPC_ARITH_EvIz (0x81)
 #define OPC_ARITH_EvIb (0x83)
 #define OPC_ARITH_GvEv (0x03)          /* ... plus (ARITH_FOO << 3) */
+#define OPC_ANDN        (0xf2 | P_EXT38)
 #define OPC_ADD_GvEv   (OPC_ARITH_GvEv | (ARITH_ADD << 3))
 #define OPC_BSWAP      (0xc8 | P_EXT)
 #define OPC_CALL_Jz    (0xe8)
@@ -1813,6 +1825,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
         }
         break;
 
+    OP_32_64(andc):
+        if (const_args[2]) {
+            tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
+                        args[0], args[1]);
+            tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
+        } else {
+            tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
+        }
+        break;
+
     OP_32_64(mul):
         if (const_args[2]) {
             int32_t val;
@@ -2041,6 +2063,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_and_i32, { "r", "0", "ri" } },
     { INDEX_op_or_i32, { "r", "0", "ri" } },
     { INDEX_op_xor_i32, { "r", "0", "ri" } },
+    { INDEX_op_andc_i32, { "r", "r", "ri" } },
 
     { INDEX_op_shl_i32, { "r", "0", "ci" } },
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
@@ -2098,6 +2121,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_and_i64, { "r", "0", "reZ" } },
     { INDEX_op_or_i64, { "r", "0", "re" } },
     { INDEX_op_xor_i64, { "r", "0", "re" } },
+    { INDEX_op_andc_i64, { "r", "r", "rI" } },
 
     { INDEX_op_shl_i64, { "r", "0", "ci" } },
     { INDEX_op_shr_i64, { "r", "0", "ci" } },
@@ -2235,25 +2259,31 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
 static void tcg_target_init(TCGContext *s)
 {
-#if !(defined(have_cmov) && defined(have_movbe))
-    {
-        unsigned a, b, c, d;
-        int ret = __get_cpuid(1, &a, &b, &c, &d);
+    unsigned a, b, c, d;
+    int max = __get_cpuid_max(0, 0);
 
-# ifndef have_cmov
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+#ifndef have_cmov
         /* For 32-bit, 99% certainty that we're running on hardware that
            supports cmov, but we still need to check.  In case cmov is not
            available, we'll use a small forward branch.  */
-        have_cmov = ret && (d & bit_CMOV);
-# endif
-
-# ifndef have_movbe
+        have_cmov = (d & bit_CMOV) != 0;
+#endif
+#ifndef have_movbe
         /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
            need to probe for it.  */
-        have_movbe = ret && (c & bit_MOVBE);
-# endif
+        have_movbe = (c & bit_MOVBE) != 0;
+#endif
     }
+
+    if (max >= 7) {
+        /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs.  */
+        __cpuid_count(7, 0, a, b, c, d);
+#ifdef bit_BMI
+        have_bmi1 = (b & bit_BMI) != 0;
 #endif
+    }
 
     if (TCG_TARGET_REG_BITS == 64) {
         tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 747b797..bdf2222 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -73,6 +73,8 @@ typedef enum {
 #define TCG_TARGET_CALL_STACK_OFFSET 0
 #endif
 
+extern bool have_bmi1;
+
 /* optional instructions */
 #define TCG_TARGET_HAS_div2_i32         1
 #define TCG_TARGET_HAS_rot_i32          1
@@ -84,7 +86,7 @@ typedef enum {
 #define TCG_TARGET_HAS_bswap32_i32      1
 #define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_andc_i32         0
+#define TCG_TARGET_HAS_andc_i32         have_bmi1
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
@@ -112,7 +114,7 @@ typedef enum {
 #define TCG_TARGET_HAS_bswap64_i64      1
 #define TCG_TARGET_HAS_neg_i64          1
 #define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_andc_i64         0
+#define TCG_TARGET_HAS_andc_i64         have_bmi1
 #define TCG_TARGET_HAS_orc_i64          0
 #define TCG_TARGET_HAS_eqv_i64          0
 #define TCG_TARGET_HAS_nand_i64         0
-- 
1.8.5.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]