qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 1/4] tcg: Add muluh and mulsh opcodes


From: Aurelien Jarno
Subject: Re: [Qemu-devel] [PATCH 1/4] tcg: Add muluh and mulsh opcodes
Date: Wed, 28 Aug 2013 22:59:31 +0200
User-agent: Mutt/1.5.21 (2010-09-15)

On Sat, Aug 17, 2013 at 04:26:43PM -0700, Richard Henderson wrote:
> Use them in places where mulu2 and muls2 are used.
> Optimize mulx2 with dead low part to mulxh.
> 
> Signed-off-by: Richard Henderson <address@hidden>
> ---
>  tcg/aarch64/tcg-target.h |  4 ++++
>  tcg/arm/tcg-target.h     |  2 ++
>  tcg/hppa/tcg-target.h    |  2 ++
>  tcg/i386/tcg-target.h    |  4 ++++
>  tcg/ia64/tcg-target.h    |  4 ++++
>  tcg/mips/tcg-target.h    |  2 ++
>  tcg/optimize.c           | 20 ++++++++++++++++++++
>  tcg/ppc/tcg-target.h     |  2 ++
>  tcg/ppc64/tcg-target.h   |  4 ++++
>  tcg/s390/tcg-target.h    |  4 ++++
>  tcg/sparc/tcg-target.h   |  4 ++++
>  tcg/tcg-op.h             | 40 ++++++++++++++++++++++++++++++++++++----
>  tcg/tcg-opc.h            |  4 ++++
>  tcg/tcg.c                | 36 ++++++++++++++++++++++++++++++------
>  tcg/tcg.h                |  2 ++
>  tcg/tci/tcg-target.h     |  5 ++++-
>  16 files changed, 128 insertions(+), 11 deletions(-)
> 
> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
> index 51e5092..26ee28b 100644
> --- a/tcg/aarch64/tcg-target.h
> +++ b/tcg/aarch64/tcg-target.h
> @@ -61,6 +61,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_sub2_i32         0
>  #define TCG_TARGET_HAS_mulu2_i32        0
>  #define TCG_TARGET_HAS_muls2_i32        0
> +#define TCG_TARGET_HAS_muluh_i32        0
> +#define TCG_TARGET_HAS_mulsh_i32        0
>  
>  #define TCG_TARGET_HAS_div_i64          0
>  #define TCG_TARGET_HAS_rem_i64          0
> @@ -87,6 +89,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_sub2_i64         0
>  #define TCG_TARGET_HAS_mulu2_i64        0
>  #define TCG_TARGET_HAS_muls2_i64        0
> +#define TCG_TARGET_HAS_muluh_i64        0
> +#define TCG_TARGET_HAS_mulsh_i64        0
>  
>  enum {
>      TCG_AREG0 = TCG_REG_X19,
> diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
> index 5cd9d6a..ed48092 100644
> --- a/tcg/arm/tcg-target.h
> +++ b/tcg/arm/tcg-target.h
> @@ -80,6 +80,8 @@ extern bool use_idiv_instructions;
>  #define TCG_TARGET_HAS_deposit_i32      1
>  #define TCG_TARGET_HAS_movcond_i32      1
>  #define TCG_TARGET_HAS_muls2_i32        1
> +#define TCG_TARGET_HAS_muluh_i32        0
> +#define TCG_TARGET_HAS_mulsh_i32        0
>  #define TCG_TARGET_HAS_div_i32          use_idiv_instructions
>  #define TCG_TARGET_HAS_rem_i32          0
>  
> diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
> index 25467bd..0f6f2ff 100644
> --- a/tcg/hppa/tcg-target.h
> +++ b/tcg/hppa/tcg-target.h
> @@ -100,6 +100,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_deposit_i32      1
>  #define TCG_TARGET_HAS_movcond_i32      1
>  #define TCG_TARGET_HAS_muls2_i32        0
> +#define TCG_TARGET_HAS_muluh_i32        0
> +#define TCG_TARGET_HAS_mulsh_i32        0
>  
>  /* optional instructions automatically implemented */
>  #define TCG_TARGET_HAS_neg_i32          0 /* sub rd, 0, rs */
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index e3f6bb9..b7d1a55 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -96,6 +96,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_sub2_i32         1
>  #define TCG_TARGET_HAS_mulu2_i32        1
>  #define TCG_TARGET_HAS_muls2_i32        1
> +#define TCG_TARGET_HAS_muluh_i32        0
> +#define TCG_TARGET_HAS_mulsh_i32        0
>  
>  #if TCG_TARGET_REG_BITS == 64
>  #define TCG_TARGET_HAS_div2_i64         1
> @@ -122,6 +124,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_sub2_i64         1
>  #define TCG_TARGET_HAS_mulu2_i64        1
>  #define TCG_TARGET_HAS_muls2_i64        1
> +#define TCG_TARGET_HAS_muluh_i64        0
> +#define TCG_TARGET_HAS_mulsh_i64        0
>  #endif
>  
>  #define TCG_TARGET_deposit_i32_valid(ofs, len) \
> diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
> index f32d519..ee6b2c8 100644
> --- a/tcg/ia64/tcg-target.h
> +++ b/tcg/ia64/tcg-target.h
> @@ -146,6 +146,10 @@ typedef enum {
>  #define TCG_TARGET_HAS_mulu2_i64        0
>  #define TCG_TARGET_HAS_muls2_i32        0
>  #define TCG_TARGET_HAS_muls2_i64        0
> +#define TCG_TARGET_HAS_muluh_i32        0
> +#define TCG_TARGET_HAS_muluh_i64        0
> +#define TCG_TARGET_HAS_mulsh_i32        0
> +#define TCG_TARGET_HAS_mulsh_i64        0
>  
>  #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
>  #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
> diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
> index a438950..6cb7c2f 100644
> --- a/tcg/mips/tcg-target.h
> +++ b/tcg/mips/tcg-target.h
> @@ -89,6 +89,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_eqv_i32          0
>  #define TCG_TARGET_HAS_nand_i32         0
>  #define TCG_TARGET_HAS_muls2_i32        1
> +#define TCG_TARGET_HAS_muluh_i32        0
> +#define TCG_TARGET_HAS_mulsh_i32        0
>  
>  /* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */
>  #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index b35868a..e8dedf3 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -198,6 +198,8 @@ static TCGOpcode op_to_mov(TCGOpcode op)
>  
>  static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
>  {
> +    uint64_t l64, h64;
> +
>      switch (op) {
>      CASE_OP_32_64(add):
>          return x + y;
> @@ -290,6 +292,18 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg 
> x, TCGArg y)
>      case INDEX_op_ext32u_i64:
>          return (uint32_t)x;
>  
> +    case INDEX_op_muluh_i32:
> +        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
> +    case INDEX_op_mulsh_i32:
> +        return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
> +
> +    case INDEX_op_muluh_i64:
> +        mulu64(&l64, &h64, x, y);
> +        return h64;
> +    case INDEX_op_mulsh_i64:
> +        muls64(&l64, &h64, x, y);
> +        return h64;
> +
>      default:
>          fprintf(stderr,
>                  "Unrecognized operation %d in do_constant_folding.\n", op);
> @@ -531,6 +545,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>          CASE_OP_32_64(eqv):
>          CASE_OP_32_64(nand):
>          CASE_OP_32_64(nor):
> +        CASE_OP_32_64(muluh):
> +        CASE_OP_32_64(mulsh):
>              swap_commutative(args[0], &args[1], &args[2]);
>              break;
>          CASE_OP_32_64(brcond):
> @@ -771,6 +787,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>          switch (op) {
>          CASE_OP_32_64(and):
>          CASE_OP_32_64(mul):
> +        CASE_OP_32_64(muluh):
> +        CASE_OP_32_64(mulsh):
>              if ((temps[args[2]].state == TCG_TEMP_CONST
>                  && temps[args[2]].val == 0)) {
>                  s->gen_opc_buf[op_index] = op_to_movi(op);
> @@ -882,6 +900,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>          CASE_OP_32_64(eqv):
>          CASE_OP_32_64(nand):
>          CASE_OP_32_64(nor):
> +        CASE_OP_32_64(muluh):
> +        CASE_OP_32_64(mulsh):
>              if (temps[args[1]].state == TCG_TEMP_CONST
>                  && temps[args[2]].state == TCG_TEMP_CONST) {
>                  s->gen_opc_buf[op_index] = op_to_movi(op);
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index b42d97c..613c5ff 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -96,6 +96,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_deposit_i32      1
>  #define TCG_TARGET_HAS_movcond_i32      1
>  #define TCG_TARGET_HAS_muls2_i32        0
> +#define TCG_TARGET_HAS_muluh_i32        0
> +#define TCG_TARGET_HAS_mulsh_i32        0
>  
>  #define TCG_AREG0 TCG_REG_R27
>  
> diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
> index 48fc6e2..0789daf 100644
> --- a/tcg/ppc64/tcg-target.h
> +++ b/tcg/ppc64/tcg-target.h
> @@ -95,6 +95,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_sub2_i32         0
>  #define TCG_TARGET_HAS_mulu2_i32        0
>  #define TCG_TARGET_HAS_muls2_i32        0
> +#define TCG_TARGET_HAS_muluh_i32        0
> +#define TCG_TARGET_HAS_mulsh_i32        0
>  
>  #define TCG_TARGET_HAS_div_i64          1
>  #define TCG_TARGET_HAS_rem_i64          0
> @@ -118,6 +120,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_sub2_i64         1
>  #define TCG_TARGET_HAS_mulu2_i64        1
>  #define TCG_TARGET_HAS_muls2_i64        1
> +#define TCG_TARGET_HAS_muluh_i64        0
> +#define TCG_TARGET_HAS_mulsh_i64        0
>  
>  #define TCG_AREG0 TCG_REG_R27
>  
> diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
> index 42ca36c..b02f170 100644
> --- a/tcg/s390/tcg-target.h
> +++ b/tcg/s390/tcg-target.h
> @@ -69,6 +69,8 @@ typedef enum TCGReg {
>  #define TCG_TARGET_HAS_sub2_i32         1
>  #define TCG_TARGET_HAS_mulu2_i32        0
>  #define TCG_TARGET_HAS_muls2_i32        0
> +#define TCG_TARGET_HAS_muluh_i32        0
> +#define TCG_TARGET_HAS_mulsh_i32        0
>  
>  #define TCG_TARGET_HAS_div2_i64         1
>  #define TCG_TARGET_HAS_rot_i64          1
> @@ -94,6 +96,8 @@ typedef enum TCGReg {
>  #define TCG_TARGET_HAS_sub2_i64         1
>  #define TCG_TARGET_HAS_mulu2_i64        1
>  #define TCG_TARGET_HAS_muls2_i64        0
> +#define TCG_TARGET_HAS_muluh_i64        0
> +#define TCG_TARGET_HAS_mulsh_i64        0
>  
>  extern bool tcg_target_deposit_valid(int ofs, int len);
>  #define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid
> diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
> index dab52d7..1a696bc 100644
> --- a/tcg/sparc/tcg-target.h
> +++ b/tcg/sparc/tcg-target.h
> @@ -107,6 +107,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_sub2_i32         1
>  #define TCG_TARGET_HAS_mulu2_i32        1
>  #define TCG_TARGET_HAS_muls2_i32        0
> +#define TCG_TARGET_HAS_muluh_i32        0
> +#define TCG_TARGET_HAS_mulsh_i32        0
>  
>  #if TCG_TARGET_REG_BITS == 64
>  #define TCG_TARGET_HAS_div_i64          1
> @@ -134,6 +136,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_sub2_i64         0
>  #define TCG_TARGET_HAS_mulu2_i64        0
>  #define TCG_TARGET_HAS_muls2_i64        0
> +#define TCG_TARGET_HAS_muluh_i64        0
> +#define TCG_TARGET_HAS_mulsh_i64        0
>  #endif
>  
>  #define TCG_AREG0 TCG_REG_I0
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index 364964d..3de7545 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -1039,10 +1039,18 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, 
> TCGv_i64 arg1, TCGv_i64 arg2)
>      t0 = tcg_temp_new_i64();
>      t1 = tcg_temp_new_i32();
>  
> -    tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
> -                    TCGV_LOW(arg1), TCGV_LOW(arg2));
> -    /* Allow the optimizer room to replace mulu2 with two moves.  */
> -    tcg_gen_op0(INDEX_op_nop);
> +    if (TCG_TARGET_HAS_mulu2_i32) {
> +        tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
> +                        TCGV_LOW(arg1), TCGV_LOW(arg2));
> +        /* Allow the optimizer room to replace mulu2 with two moves.  */
> +        tcg_gen_op0(INDEX_op_nop);
> +    } else {
> +        tcg_debug_assert(TCG_TARGET_HAS_muluh_i32);
> +        tcg_gen_op3_i32(INDEX_op_mul_i32, TCGV_LOW(t0),
> +                        TCGV_LOW(arg1), TCGV_LOW(arg2));
> +        tcg_gen_op3_i32(INDEX_op_muluh_i32, TCGV_HIGH(t0),
> +                        TCGV_LOW(arg1), TCGV_LOW(arg2));
> +    }
>  
>      tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
>      tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
> @@ -2401,6 +2409,12 @@ static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, 
> TCGv_i32 rh,
>          tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
>          /* Allow the optimizer room to replace mulu2 with two moves.  */
>          tcg_gen_op0(INDEX_op_nop);
> +    } else if (TCG_TARGET_HAS_muluh_i32) {
> +        TCGv_i32 t = tcg_temp_new_i32();
> +        tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
> +        tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
> +        tcg_gen_mov_i32(rl, t);
> +        tcg_temp_free_i32(t);
>      } else {
>          TCGv_i64 t0 = tcg_temp_new_i64();
>          TCGv_i64 t1 = tcg_temp_new_i64();
> @@ -2420,6 +2434,12 @@ static inline void tcg_gen_muls2_i32(TCGv_i32 rl, 
> TCGv_i32 rh,
>          tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
>          /* Allow the optimizer room to replace muls2 with two moves.  */
>          tcg_gen_op0(INDEX_op_nop);
> +    } else if (TCG_TARGET_HAS_mulsh_i32) {
> +        TCGv_i32 t = tcg_temp_new_i32();
> +        tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
> +        tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
> +        tcg_gen_mov_i32(rl, t);
> +        tcg_temp_free_i32(t);
>      } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) {
>          TCGv_i32 t0 = tcg_temp_new_i32();
>          TCGv_i32 t1 = tcg_temp_new_i32();
> @@ -2499,6 +2519,12 @@ static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, 
> TCGv_i64 rh,
>          tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
>          /* Allow the optimizer room to replace mulu2 with two moves.  */
>          tcg_gen_op0(INDEX_op_nop);
> +    } else if (TCG_TARGET_HAS_muluh_i64) {
> +        TCGv_i64 t = tcg_temp_new_i64();
> +        tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
> +        tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
> +        tcg_gen_mov_i64(rl, t);
> +        tcg_temp_free_i64(t);
>      } else if (TCG_TARGET_HAS_mulu2_i64) {
>          TCGv_i64 t0 = tcg_temp_new_i64();
>          TCGv_i64 t1 = tcg_temp_new_i64();
> @@ -2540,6 +2566,12 @@ static inline void tcg_gen_muls2_i64(TCGv_i64 rl, 
> TCGv_i64 rh,
>          tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
>          /* Allow the optimizer room to replace muls2 with two moves.  */
>          tcg_gen_op0(INDEX_op_nop);
> +    } else if (TCG_TARGET_HAS_mulsh_i64) {
> +        TCGv_i64 t = tcg_temp_new_i64();
> +        tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
> +        tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
> +        tcg_gen_mov_i64(rl, t);
> +        tcg_temp_free_i64(t);
>      } else {
>          TCGv_i64 t0 = tcg_temp_new_i64();
>          int sizemask = 0;
> diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
> index a8af5b9..a75c29d 100644
> --- a/tcg/tcg-opc.h
> +++ b/tcg/tcg-opc.h
> @@ -91,6 +91,8 @@ DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
>  DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
>  DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
>  DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
> +DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
> +DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
>  DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
>  DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
>  
> @@ -167,6 +169,8 @@ DEF(add2_i64, 2, 4, 0, IMPL64 | 
> IMPL(TCG_TARGET_HAS_add2_i64))
>  DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
>  DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
>  DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
> +DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64))
> +DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64))
>  
>  /* QEMU specific */
>  #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index dac8224..75034ca 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -1243,12 +1243,13 @@ static inline void tcg_la_bb_end(TCGContext *s, 
> uint8_t *dead_temps,
>  static void tcg_liveness_analysis(TCGContext *s)
>  {
>      int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
> -    TCGOpcode op, op_new;
> +    TCGOpcode op, op_new, op_new2;
>      TCGArg *args;
>      const TCGOpDef *def;
>      uint8_t *dead_temps, *mem_temps;
>      uint16_t dead_args;
>      uint8_t sync_args;
> +    bool have_op_new2;
>      
>      s->gen_opc_ptr++; /* skip end */
>  
> @@ -1385,29 +1386,52 @@ static void tcg_liveness_analysis(TCGContext *s)
>              goto do_not_remove;
>  
>          case INDEX_op_mulu2_i32:
> +            op_new = INDEX_op_mul_i32;
> +            op_new2 = INDEX_op_muluh_i32;
> +            have_op_new2 = TCG_TARGET_HAS_muluh_i32;
> +            goto do_mul2;
>          case INDEX_op_muls2_i32:
>              op_new = INDEX_op_mul_i32;
> +            op_new2 = INDEX_op_mulsh_i32;
> +            have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
>              goto do_mul2;
>          case INDEX_op_mulu2_i64:
> +            op_new = INDEX_op_mul_i64;
> +            op_new2 = INDEX_op_muluh_i64;
> +            have_op_new2 = TCG_TARGET_HAS_muluh_i64;
> +            goto do_mul2;
>          case INDEX_op_muls2_i64:
>              op_new = INDEX_op_mul_i64;
> +            op_new2 = INDEX_op_mulsh_i64;
> +            have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
> +            goto do_mul2;
>          do_mul2:
>              args -= 4;
>              nb_iargs = 2;
>              nb_oargs = 2;
> -            /* Likewise, test for the high part of the operation dead.  */
>              if (dead_temps[args[1]] && !mem_temps[args[1]]) {
>                  if (dead_temps[args[0]] && !mem_temps[args[0]]) {
> +                    /* Both parts of the operation are dead.  */
>                      goto do_remove;
>                  }
> +                /* The high part of the operation is dead; generate the low. 
> */
>                  s->gen_opc_buf[op_index] = op = op_new;
>                  args[1] = args[2];
>                  args[2] = args[3];
> -                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
> -                tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
> -                /* Fall through and mark the single-word operation live.  */
> -                nb_oargs = 1;
> +            } else if (have_op_new2 && dead_temps[args[0]]
> +                       && !mem_temps[args[0]]) {
> +                /* The low part of the operation is dead; generate the high. 
>  */
> +                s->gen_opc_buf[op_index] = op = op_new2;
> +                args[0] = args[1];
> +                args[1] = args[2];
> +                args[2] = args[3];
> +            } else {
> +                goto do_not_remove;
>              }
> +            assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
> +            tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
> +            /* Mark the single-word operation live.  */
> +            nb_oargs = 1;
>              goto do_not_remove;
>  
>          default:
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index f3f9889..3f869dd 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -85,6 +85,8 @@ typedef uint64_t TCGRegSet;
>  #define TCG_TARGET_HAS_sub2_i64         0
>  #define TCG_TARGET_HAS_mulu2_i64        0
>  #define TCG_TARGET_HAS_muls2_i64        0
> +#define TCG_TARGET_HAS_muluh_i64        0
> +#define TCG_TARGET_HAS_mulsh_i64        0
>  /* Turn some undef macros into true macros.  */
>  #define TCG_TARGET_HAS_add2_i32         1
>  #define TCG_TARGET_HAS_sub2_i32         1
> diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
> index d7fc14e..ff12b4b 100644
> --- a/tcg/tci/tcg-target.h
> +++ b/tcg/tci/tcg-target.h
> @@ -76,6 +76,8 @@
>  #define TCG_TARGET_HAS_rot_i32          1
>  #define TCG_TARGET_HAS_movcond_i32      0
>  #define TCG_TARGET_HAS_muls2_i32        0
> +#define TCG_TARGET_HAS_muluh_i32        0
> +#define TCG_TARGET_HAS_mulsh_i32        0
>  
>  #if TCG_TARGET_REG_BITS == 64
>  #define TCG_TARGET_HAS_bswap16_i64      1
> @@ -100,13 +102,14 @@
>  #define TCG_TARGET_HAS_rot_i64          1
>  #define TCG_TARGET_HAS_movcond_i64      0
>  #define TCG_TARGET_HAS_muls2_i64        0
> -
>  #define TCG_TARGET_HAS_add2_i32         0
>  #define TCG_TARGET_HAS_sub2_i32         0
>  #define TCG_TARGET_HAS_mulu2_i32        0
>  #define TCG_TARGET_HAS_add2_i64         0
>  #define TCG_TARGET_HAS_sub2_i64         0
>  #define TCG_TARGET_HAS_mulu2_i64        0
> +#define TCG_TARGET_HAS_muluh_i64        0
> +#define TCG_TARGET_HAS_mulsh_i64        0
>  #endif /* TCG_TARGET_REG_BITS == 64 */
>  
>  /* Number of registers available.

Reviewed-by: Aurelien Jarno <address@hidden>
-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
address@hidden                 http://www.aurel32.net



reply via email to

[Prev in Thread] Current Thread [Next in Thread]