[Qemu-devel] [PATCH v4 39/64] target-i386: Use clz and ctz opcodes

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v4 39/64] target-i386: Use clz and ctz opcodes

From:	Richard Henderson
Subject:	[Qemu-devel] [PATCH v4 39/64] target-i386: Use clz and ctz opcodes
Date:	Wed, 23 Nov 2016 14:01:36 +0100

Signed-off-by: Richard Henderson <address@hidden>
---
 target-i386/helper.h     |  2 --
 target-i386/int_helper.c | 11 -----------
 target-i386/translate.c  | 31 ++++++++++++++-----------------
 3 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/target-i386/helper.h b/target-i386/helper.h
index 4e859eb..1e76b09 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -201,8 +201,6 @@ DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, 
i64)
 DEF_HELPER_FLAGS_2(rdpkru, TCG_CALL_NO_WG, i64, env, i32)
 DEF_HELPER_FLAGS_3(wrpkru, TCG_CALL_NO_WG, void, env, i32, i64)
 
-DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 
diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c
index 9e873ac..4dc5c65 100644
--- a/target-i386/int_helper.c
+++ b/target-i386/int_helper.c
@@ -417,17 +417,6 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0)
 # define clztl  clz64
 #endif
 
-/* bit operations */
-target_ulong helper_ctz(target_ulong t0)
-{
-    return ctztl(t0);
-}
-
-target_ulong helper_clz(target_ulong t0)
-{
-    return clztl(t0);
-}
-
 target_ulong helper_pdep(target_ulong src, target_ulong mask)
 {
     target_ulong dest = 0;
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 4d6d36f..0eac334 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -6792,21 +6792,18 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
             int size = 8 << ot;
+            /* For lzcnt/tzcnt, C bit is defined related to the input. */
             tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
             if (b & 1) {
                 /* For lzcnt, reduce the target_ulong result by the
                    number of zeros that we expect to find at the top.  */
-                gen_helper_clz(cpu_T0, cpu_T0);
+                tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS);
                 tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
             } else {
-                /* For tzcnt, a zero input must return the operand size:
-                   force all bits outside the operand size to 1.  */
-                target_ulong mask = (target_ulong)-2 << (size - 1);
-                tcg_gen_ori_tl(cpu_T0, cpu_T0, mask);
-                gen_helper_ctz(cpu_T0, cpu_T0);
-            }
-            /* For lzcnt/tzcnt, C and Z bits are defined and are
-               related to the result.  */
+                /* For tzcnt, a zero input must return the operand size.  */
+                tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size);
+            }
+            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
             gen_op_update1_cc();
             set_cc_op(s, CC_OP_BMILGB + ot);
         } else {
@@ -6814,20 +6811,20 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
                to the input and not the result.  */
             tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
             set_cc_op(s, CC_OP_LOGICB + ot);
+
+            /* ??? The manual says that the output is undefined when the
+               input is zero, but real hardware leaves it unchanged, and
+               real programs appear to depend on that.  Accomplish this
+               by passing the output as the value to return upon zero.  */
             if (b & 1) {
                 /* For bsr, return the bit index of the first 1 bit,
                    not the count of leading zeros.  */
-                gen_helper_clz(cpu_T0, cpu_T0);
+                tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
+                tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1);
                 tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
             } else {
-                gen_helper_ctz(cpu_T0, cpu_T0);
+                tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]);
             }
-            /* ??? The manual says that the output is undefined when the
-               input is zero, but real hardware leaves it unchanged, and
-               real programs appear to depend on that.  */
-            tcg_gen_movi_tl(cpu_tmp0, 0);
-            tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T0, cpu_cc_dst, cpu_tmp0,
-                               cpu_regs[reg], cpu_T0);
         }
         gen_op_mov_reg_v(ot, reg, cpu_T0);
         break;
-- 
2.7.4

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [PATCH v4 28/64] target-cris: Use clz opcode, (continued)
- [Qemu-devel] [PATCH v4 28/64] target-cris: Use clz opcode, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 31/64] target-openrisc: Use clz and ctz opcodes, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 30/64] target-mips: Use clz opcode, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 32/64] target-ppc: Use clz and ctz opcodes, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 33/64] target-s390x: Use clz opcode, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 34/64] target-tilegx: Use clz and ctz opcodes, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 36/64] target-unicore32: Use clz opcode, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 35/64] target-tricore: Use clz opcode, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 38/64] target-arm: Use clz opcode, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 37/64] target-xtensa: Use clz opcode, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 39/64] target-i386: Use clz and ctz opcodes, Richard Henderson <=
- [Qemu-devel] [PATCH v4 41/64] tcg/aarch64: Handle ctz and clz opcodes, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 40/64] tcg/ppc: Handle ctz and clz opcodes, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 43/64] tcg/mips: Handle clz opcode, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 42/64] tcg/arm: Handle ctz and clz opcodes, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 44/64] tcg/s390: Handle clz opcode, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 46/64] tcg/i386: Hoist common arguments in tcg_out_op, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 47/64] tcg/i386: Allow bmi2 shiftx to have non-matching operands, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 45/64] tcg/i386: Fuly convert tcg_target_op_def, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 51/64] target-arm: Use clrsb helper, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 49/64] tcg/i386: Rely on undefined/undocumented behaviour of BSF/BSR, Richard Henderson, 2016/11/23

Prev by Date: [Qemu-devel] [PATCH v4 37/64] target-xtensa: Use clz opcode
Next by Date: [Qemu-devel] [PATCH v4 41/64] tcg/aarch64: Handle ctz and clz opcodes
Previous by thread: [Qemu-devel] [PATCH v4 37/64] target-xtensa: Use clz opcode
Next by thread: [Qemu-devel] [PATCH v4 41/64] tcg/aarch64: Handle ctz and clz opcodes
Index(es):
- Date
- Thread