[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 4/5] tcg/i386: Use ANDN instruction
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH 4/5] tcg/i386: Use ANDN instruction |
Date: |
Sun, 16 Feb 2014 19:12:05 +0100 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Fri, Jan 31, 2014 at 08:43:37AM -0600, Richard Henderson wrote:
> Note that the optimizer cannot simplify ANDC X,Y,C to AND X,Y,~C
> so we must handle constants in the implementation of andc.
I do wonder if it actually won't be a better idea to add this
simplification to the optimizer instead of adding it to the backend.
The best to do that would be to check with tcg_target_const_match to
see if ANDC would accept such a constraint and to convert it to AND
if not.
The same can probably be done for ORC.
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> tcg/i386/tcg-target.c | 52
> ++++++++++++++++++++++++++++++++++++++++-----------
> tcg/i386/tcg-target.h | 6 ++++--
> 2 files changed, 45 insertions(+), 13 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 00dbc3b..4f6b9c1 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -91,6 +91,7 @@ static const int tcg_target_call_oarg_regs[] = {
> /* Constants we accept. */
> #define TCG_CT_CONST_S32 0x100
> #define TCG_CT_CONST_U32 0x200
> +#define TCG_CT_CONST_I32 0x400
>
> /* Registers used with L constraint, which are the first argument
> registers on x86_64, and two random call clobbered registers on
> @@ -128,6 +129,10 @@ static bool have_movbe;
> # define have_movbe 0
> #endif
>
> +/* We need this symbol in tcg-target.h, and we can't properly conditionalize
> + it there. Therefore we always define the variable. */
> +bool have_bmi1;
> +
> static uint8_t *tb_ret_addr;
>
> static void patch_reloc(uint8_t *code_ptr, int type,
> @@ -224,6 +229,9 @@ static int target_parse_constraint(TCGArgConstraint *ct,
> const char **pct_str)
> case 'Z':
> ct->ct |= TCG_CT_CONST_U32;
> break;
> + case 'I':
> + ct->ct |= TCG_CT_CONST_I32;
> + break;
>
> default:
> return -1;
> @@ -247,6 +255,9 @@ static inline int tcg_target_const_match(tcg_target_long
> val,
> if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
> return 1;
> }
> + if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
> + return 1;
> + }
> return 0;
> }
>
> @@ -276,6 +287,7 @@ static inline int tcg_target_const_match(tcg_target_long
> val,
> #define OPC_ARITH_EvIz (0x81)
> #define OPC_ARITH_EvIb (0x83)
> #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
> +#define OPC_ANDN (0xf2 | P_EXT38)
> #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
> #define OPC_BSWAP (0xc8 | P_EXT)
> #define OPC_CALL_Jz (0xe8)
> @@ -1813,6 +1825,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
> opc,
> }
> break;
>
> + OP_32_64(andc):
> + if (const_args[2]) {
> + tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
> + args[0], args[1]);
> + tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
> + } else {
> + tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
> + }
> + break;
> +
> OP_32_64(mul):
> if (const_args[2]) {
> int32_t val;
> @@ -2041,6 +2063,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
> { INDEX_op_and_i32, { "r", "0", "ri" } },
> { INDEX_op_or_i32, { "r", "0", "ri" } },
> { INDEX_op_xor_i32, { "r", "0", "ri" } },
> + { INDEX_op_andc_i32, { "r", "r", "ri" } },
>
> { INDEX_op_shl_i32, { "r", "0", "ci" } },
> { INDEX_op_shr_i32, { "r", "0", "ci" } },
> @@ -2098,6 +2121,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
> { INDEX_op_and_i64, { "r", "0", "reZ" } },
> { INDEX_op_or_i64, { "r", "0", "re" } },
> { INDEX_op_xor_i64, { "r", "0", "re" } },
> + { INDEX_op_andc_i64, { "r", "r", "rI" } },
>
> { INDEX_op_shl_i64, { "r", "0", "ci" } },
> { INDEX_op_shr_i64, { "r", "0", "ci" } },
> @@ -2235,25 +2259,31 @@ static void tcg_target_qemu_prologue(TCGContext *s)
>
> static void tcg_target_init(TCGContext *s)
> {
> -#if !(defined(have_cmov) && defined(have_movbe))
> - {
> - unsigned a, b, c, d;
> - int ret = __get_cpuid(1, &a, &b, &c, &d);
> + unsigned a, b, c, d;
> + int max = __get_cpuid_max(0, 0);
>
> -# ifndef have_cmov
> + if (max >= 1) {
> + __cpuid(1, a, b, c, d);
> +#ifndef have_cmov
> /* For 32-bit, 99% certainty that we're running on hardware that
> supports cmov, but we still need to check. In case cmov is not
> available, we'll use a small forward branch. */
> - have_cmov = ret && (d & bit_CMOV);
> -# endif
> -
> -# ifndef have_movbe
> + have_cmov = (d & bit_CMOV) != 0;
> +#endif
> +#ifndef have_movbe
> /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
> need to probe for it. */
> - have_movbe = ret && (c & bit_MOVBE);
> -# endif
> + have_movbe = (c & bit_MOVBE) != 0;
> +#endif
> }
> +
> + if (max >= 7) {
> + /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
> + __cpuid_count(7, 0, a, b, c, d);
> +#ifdef bit_BMI
> + have_bmi1 = (b & bit_BMI) != 0;
> #endif
> + }
>
> if (TCG_TARGET_REG_BITS == 64) {
> tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index 747b797..bdf2222 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -73,6 +73,8 @@ typedef enum {
> #define TCG_TARGET_CALL_STACK_OFFSET 0
> #endif
>
> +extern bool have_bmi1;
> +
> /* optional instructions */
> #define TCG_TARGET_HAS_div2_i32 1
> #define TCG_TARGET_HAS_rot_i32 1
> @@ -84,7 +86,7 @@ typedef enum {
> #define TCG_TARGET_HAS_bswap32_i32 1
> #define TCG_TARGET_HAS_neg_i32 1
> #define TCG_TARGET_HAS_not_i32 1
> -#define TCG_TARGET_HAS_andc_i32 0
> +#define TCG_TARGET_HAS_andc_i32 have_bmi1
> #define TCG_TARGET_HAS_orc_i32 0
> #define TCG_TARGET_HAS_eqv_i32 0
> #define TCG_TARGET_HAS_nand_i32 0
> @@ -112,7 +114,7 @@ typedef enum {
> #define TCG_TARGET_HAS_bswap64_i64 1
> #define TCG_TARGET_HAS_neg_i64 1
> #define TCG_TARGET_HAS_not_i64 1
> -#define TCG_TARGET_HAS_andc_i64 0
> +#define TCG_TARGET_HAS_andc_i64 have_bmi1
> #define TCG_TARGET_HAS_orc_i64 0
> #define TCG_TARGET_HAS_eqv_i64 0
> #define TCG_TARGET_HAS_nand_i64 0
Otherwise the patch looks good to me.
--
Aurelien Jarno GPG: 1024D/F1BCDB73
address@hidden http://www.aurel32.net
- Re: [Qemu-devel] [PATCH 4/5] tcg/i386: Use ANDN instruction,
Aurelien Jarno <=