qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 15/17] ppc: store CR registers in 32 1-bit registers


From: Paolo Bonzini
Subject: [Qemu-devel] [PATCH 15/17] ppc: store CR registers in 32 1-bit registers
Date: Thu, 28 Aug 2014 19:15:11 +0200

This makes comparisons much smaller and faster.  The speedup is
approximately 10% on user-mode emulation on x86 host, 3-4% on PPC.

Note that CRF_* constants are flipped to match PowerPC's big
bit-endianness.  Previously, the CR register was effectively stored
in mixed endianness, so now there is less indirection going on.

Signed-off-by: Paolo Bonzini <address@hidden>
---
 linux-user/main.c       |   4 +-
 target-ppc/cpu.h        |  33 ++++--
 target-ppc/fpu_helper.c |  39 ++----
 target-ppc/helper.h     |   6 -
 target-ppc/int_helper.c |   2 +-
 target-ppc/machine.c    |   9 ++
 target-ppc/translate.c  | 307 +++++++++++++++++++++++++-----------------------
 7 files changed, 204 insertions(+), 196 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 152c031..b403f24 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -1929,7 +1929,7 @@ void cpu_loop(CPUPPCState *env)
              * PPC ABI uses overflow flag in cr0 to signal an error
              * in syscalls.
              */
-            env->crf[0] &= ~0x1;
+            env->cr[CRF_SO] = 0;
             ret = do_syscall(env, env->gpr[0], env->gpr[3], env->gpr[4],
                              env->gpr[5], env->gpr[6], env->gpr[7],
                              env->gpr[8], 0, 0);
@@ -1939,7 +1939,7 @@ void cpu_loop(CPUPPCState *env)
                 break;
             }
             if (ret > (target_ulong)(-515)) {
-                env->crf[0] |= 0x1;
+                env->cr[CRF_SO] = 1;
                 ret = -ret;
             }
             env->gpr[3] = ret;
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 05c29b2..67510e8 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -939,7 +939,7 @@ struct CPUPPCState {
     /* CTR */
     target_ulong ctr;
     /* condition register */
-    uint32_t crf[8];
+    uint32_t cr[32];
 #if defined(TARGET_PPC64)
     /* CFAR */
     target_ulong cfar;
@@ -1058,6 +1058,9 @@ struct CPUPPCState {
     uint64_t dtl_addr, dtl_size;
 #endif /* TARGET_PPC64 */
 
+    /* condition register, for migration compatibility */
+    uint32_t crf[8];
+
     int error_code;
     uint32_t pending_interrupts;
 #if !defined(CONFIG_USER_ONLY)
@@ -1200,12 +1203,20 @@ void store_fpscr(CPUPPCState *env, uint64_t arg, 
uint32_t mask);
 
 static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
 {
-    return env->crf[i];
+    uint32_t r;
+    r = env->cr[i * 4];
+    r = (r << 1) | (env->cr[i * 4 + 1]);
+    r = (r << 1) | (env->cr[i * 4 + 2]);
+    r = (r << 1) | (env->cr[i * 4 + 3]);
+    return r;
 }
 
 static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
 {
-    env->crf[i] = val;
+    env->cr[i * 4 + 0] = (val & 0x08) != 0;
+    env->cr[i * 4 + 1] = (val & 0x04) != 0;
+    env->cr[i * 4 + 2] = (val & 0x02) != 0;
+    env->cr[i * 4 + 3] = (val & 0x01) != 0;
 }
 
 static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
@@ -1256,14 +1267,14 @@ static inline int cpu_mmu_index (CPUPPCState *env)
 
 /*****************************************************************************/
 /* CRF definitions */
-#define CRF_LT        3
-#define CRF_GT        2
-#define CRF_EQ        1
-#define CRF_SO        0
-#define CRF_CH        (1 << CRF_LT)
-#define CRF_CL        (1 << CRF_GT)
-#define CRF_CH_OR_CL  (1 << CRF_EQ)
-#define CRF_CH_AND_CL (1 << CRF_SO)
+#define CRF_LT        0
+#define CRF_GT        1
+#define CRF_EQ        2
+#define CRF_SO        3
+#define CRF_CH        CRF_LT
+#define CRF_CL        CRF_GT
+#define CRF_CH_OR_CL  CRF_EQ
+#define CRF_CH_AND_CL CRF_SO
 
 /* XER definitions */
 #define XER_SO  31
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 1ccbcf3..9574ebe 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1098,8 +1098,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, 
uint64_t arg2,
     }
 
     env->fpscr &= ~(0x0F << FPSCR_FPRF);
-    env->fpscr |= (0x01 << FPSCR_FPRF) << ret;
-    ppc_set_crf(env, crfD, 1 << ret);
+    env->fpscr |= (0x08 << FPSCR_FPRF) >> ret;
+    ppc_set_crf(env, crfD, 0x08 >> ret);
 
     if (unlikely(ret == CRF_SO
                  && (float64_is_signaling_nan(farg1.d) ||
@@ -1130,8 +1130,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, 
uint64_t arg2,
     }
 
     env->fpscr &= ~(0x0F << FPSCR_FPRF);
-    env->fpscr |= (0x01 << FPSCR_FPRF) << ret;
-    ppc_set_crf(env, crfD, 1 << ret);
+    env->fpscr |= (0x08 << FPSCR_FPRF) >> ret;
+    ppc_set_crf(env, crfD, 0x08 >> ret);
 
     if (unlikely(ret == CRF_SO)) {
         if (float64_is_signaling_nan(farg1.d) ||
@@ -1403,7 +1403,7 @@ static inline uint32_t efscmplt(CPUPPCState *env, 
uint32_t op1, uint32_t op2)
 
     u1.l = op1;
     u2.l = op2;
-    return float32_lt(u1.f, u2.f, &env->vec_status) ? 4 : 0;
+    return float32_lt(u1.f, u2.f, &env->vec_status);
 }
 
 static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2)
@@ -1412,7 +1412,7 @@ static inline uint32_t efscmpgt(CPUPPCState *env, 
uint32_t op1, uint32_t op2)
 
     u1.l = op1;
     u2.l = op2;
-    return float32_le(u1.f, u2.f, &env->vec_status) ? 0 : 4;
+    return !float32_le(u1.f, u2.f, &env->vec_status);
 }
 
 static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2)
@@ -1421,7 +1421,7 @@ static inline uint32_t efscmpeq(CPUPPCState *env, 
uint32_t op1, uint32_t op2)
 
     u1.l = op1;
     u2.l = op2;
-    return float32_eq(u1.f, u2.f, &env->vec_status) ? 4 : 0;
+    return float32_eq(u1.f, u2.f, &env->vec_status);
 }
 
 static inline uint32_t efststlt(CPUPPCState *env, uint32_t op1, uint32_t op2)
@@ -1465,25 +1465,6 @@ static inline uint32_t evcmp_merge(int t0, int t1)
     return (t0 << 3) | (t1 << 2) | ((t0 | t1) << 1) | (t0 & t1);
 }
 
-#define HELPER_VECTOR_SPE_CMP(name)                                     \
-    uint32_t helper_ev##name(CPUPPCState *env, uint64_t op1, uint64_t op2) \
-    {                                                                   \
-        return evcmp_merge(e##name(env, op1 >> 32, op2 >> 32),          \
-                           e##name(env, op1, op2));                     \
-    }
-/* evfststlt */
-HELPER_VECTOR_SPE_CMP(fststlt);
-/* evfststgt */
-HELPER_VECTOR_SPE_CMP(fststgt);
-/* evfststeq */
-HELPER_VECTOR_SPE_CMP(fststeq);
-/* evfscmplt */
-HELPER_VECTOR_SPE_CMP(fscmplt);
-/* evfscmpgt */
-HELPER_VECTOR_SPE_CMP(fscmpgt);
-/* evfscmpeq */
-HELPER_VECTOR_SPE_CMP(fscmpeq);
-
 /* Double-precision floating-point conversion */
 uint64_t helper_efdcfsi(CPUPPCState *env, uint32_t val)
 {
@@ -1725,7 +1706,7 @@ uint32_t helper_efdtstlt(CPUPPCState *env, uint64_t op1, 
uint64_t op2)
 
     u1.ll = op1;
     u2.ll = op2;
-    return float64_lt(u1.d, u2.d, &env->vec_status) ? 4 : 0;
+    return float64_lt(u1.d, u2.d, &env->vec_status);
 }
 
 uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2)
@@ -1734,7 +1715,7 @@ uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, 
uint64_t op2)
 
     u1.ll = op1;
     u2.ll = op2;
-    return float64_le(u1.d, u2.d, &env->vec_status) ? 0 : 4;
+    return !float64_le(u1.d, u2.d, &env->vec_status);
 }
 
 uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2)
@@ -1743,7 +1724,7 @@ uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, 
uint64_t op2)
 
     u1.ll = op1;
     u2.ll = op2;
-    return float64_eq_quiet(u1.d, u2.d, &env->vec_status) ? 4 : 0;
+    return float64_eq_quiet(u1.d, u2.d, &env->vec_status);
 }
 
 uint32_t helper_efdcmplt(CPUPPCState *env, uint64_t op1, uint64_t op2)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 5342f13..8d6a92b 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -493,12 +493,6 @@ DEF_HELPER_3(efststeq, i32, env, i32, i32)
 DEF_HELPER_3(efscmplt, i32, env, i32, i32)
 DEF_HELPER_3(efscmpgt, i32, env, i32, i32)
 DEF_HELPER_3(efscmpeq, i32, env, i32, i32)
-DEF_HELPER_3(evfststlt, i32, env, i64, i64)
-DEF_HELPER_3(evfststgt, i32, env, i64, i64)
-DEF_HELPER_3(evfststeq, i32, env, i64, i64)
-DEF_HELPER_3(evfscmplt, i32, env, i64, i64)
-DEF_HELPER_3(evfscmpgt, i32, env, i64, i64)
-DEF_HELPER_3(evfscmpeq, i32, env, i64, i64)
 DEF_HELPER_2(efdcfsi, i64, env, i32)
 DEF_HELPER_2(efdcfsid, i64, env, i64)
 DEF_HELPER_2(efdcfui, i64, env, i32)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 2287064..d3ace6a 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -2602,7 +2602,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong 
high,
  done:
     env->xer = (env->xer & ~0x7F) | i;
     if (update_Rc) {
-        env->crf[0] |= xer_so;
+        env->cr[CRF_SO] = xer_so;
     }
     return i;
 }
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index c801b82..9fa309a 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -132,6 +132,10 @@ static void cpu_pre_save(void *opaque)
     CPUPPCState *env = &cpu->env;
     int i;
 
+    for (i = 0; i < 8; i++) {
+        env->crf[i] = ppc_get_crf(env, i);
+    }
+
     env->spr[SPR_LR] = env->lr;
     env->spr[SPR_CTR] = env->ctr;
     env->spr[SPR_XER] = env->xer;
@@ -165,6 +169,11 @@ static int cpu_post_load(void *opaque, int version_id)
      * software has to take care of running QEMU in a compatible mode.
      */
     env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value;
+
+    for (i = 0; i < 8; i++) {
+        ppc_set_crf(env, i, env->crf[i]);
+    }
+
     env->lr = env->spr[SPR_LR];
     env->ctr = env->spr[SPR_CTR];
     env->xer = env->spr[SPR_XER];
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 1ed6a8f..dd19b39 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -53,13 +53,13 @@ static char cpu_reg_names[10*3 + 22*4 /* GPR */
     + 10*4 + 22*5 /* FPR */
     + 2*(10*6 + 22*7) /* AVRh, AVRl */
     + 10*5 + 22*6 /* VSR */
-    + 8*5 /* CRF */];
+    + 32*8 /* CR */];
 static TCGv cpu_gpr[32];
 static TCGv cpu_gprh[32];
 static TCGv_i64 cpu_fpr[32];
 static TCGv_i64 cpu_avrh[32], cpu_avrl[32];
 static TCGv_i64 cpu_vsr[32];
-static TCGv_i32 cpu_crf[8];
+static TCGv_i32 cpu_cr[32];
 static TCGv cpu_nip;
 static TCGv cpu_msr;
 static TCGv cpu_ctr;
@@ -89,12 +89,13 @@ void ppc_translate_init(void)
     p = cpu_reg_names;
     cpu_reg_names_size = sizeof(cpu_reg_names);
 
-    for (i = 0; i < 8; i++) {
-        snprintf(p, cpu_reg_names_size, "crf%d", i);
-        cpu_crf[i] = tcg_global_mem_new_i32(TCG_AREG0,
-                                            offsetof(CPUPPCState, crf[i]), p);
-        p += 5;
-        cpu_reg_names_size -= 5;
+    for (i = 0; i < 32; i++) {
+        static const char names[] = "lt\0gt\0eq\0so";
+        snprintf(p, cpu_reg_names_size, "cr%d[%s]", i >> 2, names + (i & 3) * 
3);
+        cpu_cr[i] = tcg_global_mem_new_i32(TCG_AREG0,
+                                           offsetof(CPUPPCState, cr[i]), p);
+        p += 8;
+        cpu_reg_names_size -= 8;
     }
 
     for (i = 0; i < 32; i++) {
@@ -251,17 +252,30 @@ static inline void gen_reset_fpstatus(void)
 
 static inline void gen_op_mfcr(TCGv dest, int first_cr, int shift)
 {
-    tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift);
+    TCGv_i32 t0 = tcg_temp_new_i32();
+
+    tcg_gen_shli_i32(dest, cpu_cr[first_cr + 3], shift);
+    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 2], shift + 1);
+    tcg_gen_or_i32(dest, dest, t0);
+    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 1], shift + 2);
+    tcg_gen_or_i32(dest, dest, t0);
+    tcg_gen_shli_i32(t0, cpu_cr[first_cr], shift + 3);
 }
 
 static inline void gen_op_mtcr(int first_cr, TCGv src, int shift)
 {
     if (shift) {
-        tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift);
-        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 0x0F);
+        tcg_gen_shri_i32(cpu_cr[first_cr + 3], src, shift);
+        tcg_gen_andi_i32(cpu_cr[first_cr + 3], cpu_cr[first_cr + 3], 1);
     } else {
-        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F);
+        tcg_gen_andi_i32(cpu_cr[first_cr + 3], src, 1);
     }
+    tcg_gen_shri_i32(cpu_cr[first_cr + 2], src, shift + 1);
+    tcg_gen_andi_i32(cpu_cr[first_cr + 2], cpu_cr[first_cr + 2], 1);
+    tcg_gen_shri_i32(cpu_cr[first_cr + 1], src, shift + 2);
+    tcg_gen_andi_i32(cpu_cr[first_cr + 1], cpu_cr[first_cr + 1], 1);
+    tcg_gen_shri_i32(cpu_cr[first_cr], src, shift + 3);
+    tcg_gen_andi_i32(cpu_cr[first_cr], cpu_cr[first_cr], 1);
 }
 
 static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
@@ -675,27 +689,19 @@ static bool is_user_mode(DisasContext *ctx)
 static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
 {
     TCGv t0 = tcg_temp_new();
-    TCGv_i32 t1 = tcg_temp_new_i32();
 
-    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so);
 
     tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1);
-    tcg_gen_trunc_tl_i32(t1, t0);
-    tcg_gen_shli_i32(t1, t1, CRF_LT);
-    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], t0);
 
     tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1);
-    tcg_gen_trunc_tl_i32(t1, t0);
-    tcg_gen_shli_i32(t1, t1, CRF_GT);
-    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], t0);
 
     tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1);
-    tcg_gen_trunc_tl_i32(t1, t0);
-    tcg_gen_shli_i32(t1, t1, CRF_EQ);
-    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], t0);
 
     tcg_temp_free(t0);
-    tcg_temp_free_i32(t1);
 }
 
 static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
@@ -707,17 +713,22 @@ static inline void gen_op_cmpi(TCGv arg0, target_ulong 
arg1, int s, int crf)
 
 static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf)
 {
-    TCGv t0, t1;
+    TCGv_i32 t0, t1;
+
     t0 = tcg_temp_new();
     t1 = tcg_temp_new();
-    if (s) {
-        tcg_gen_ext32s_tl(t0, arg0);
-        tcg_gen_ext32s_tl(t1, arg1);
-    } else {
-        tcg_gen_ext32u_tl(t0, arg0);
-        tcg_gen_ext32u_tl(t1, arg1);
-    }
-    gen_op_cmp(t0, t1, s, crf);
+    tcg_gen_trunc_tl_i32(t0, arg0);
+    tcg_gen_trunc_tl_i32(t1, arg1);
+
+    tcg_gen_setcond_i32((s ? TCG_COND_LT: TCG_COND_LTU), 
+                        cpu_cr[crf * 4 + CRF_LT], t0, t1);
+
+    tcg_gen_setcond_i32((s ? TCG_COND_GT: TCG_COND_GTU), 
+                        cpu_cr[crf * 4 + CRF_GT], t0, t1);
+
+    tcg_gen_setcond_i32(TCG_COND_EQ, 
+                        cpu_cr[crf * 4 + CRF_EQ], t0, t1);
+
     tcg_temp_free(t1);
     tcg_temp_free(t0);
 }
@@ -790,15 +801,10 @@ static void gen_cmpli(DisasContext *ctx)
 static void gen_isel(DisasContext *ctx)
 {
     uint32_t bi = rC(ctx->opcode);
-    uint32_t mask;
-    TCGv_i32 t0;
     TCGv t1, true_op, zero;
 
-    mask = 0x08 >> (bi & 0x03);
-    t0 = tcg_temp_new_i32();
-    tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask);
     t1 = tcg_temp_new();
-    tcg_gen_extu_i32_tl(t1, t0);
+    tcg_gen_extu_i32_tl(t1, cpu_cr[bi]);
     zero = tcg_const_tl(0);
     if (rA(ctx->opcode) == 0)
         true_op = zero;
@@ -2288,21 +2294,29 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT);
 
 static void gen_ftdiv(DisasContext *ctx)
 {
+    TCGv_i32 crf;
     if (unlikely(!ctx->fpu_enabled)) {
         gen_exception(ctx, POWERPC_EXCP_FPU);
         return;
     }
-    gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
+    crf = tcg_temp_new_i32();
+    gen_helper_ftdiv(crf, cpu_fpr[rA(ctx->opcode)],
                      cpu_fpr[rB(ctx->opcode)]);
+    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
+    tcg_temp_free_i32(crf);
 }
 
 static void gen_ftsqrt(DisasContext *ctx)
 {
+    TCGv_i32 crf;
     if (unlikely(!ctx->fpu_enabled)) {
         gen_exception(ctx, POWERPC_EXCP_FPU);
         return;
     }
-    gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
+    crf = tcg_temp_new_i32();
+    gen_helper_ftsqrt(crf, cpu_fpr[rB(ctx->opcode)]);
+    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
+    tcg_temp_free_i32(crf);
 }
 
 
@@ -3300,10 +3314,13 @@ static void gen_conditional_store(DisasContext *ctx, 
TCGv EA,
 {
     int l1;
 
-    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+    tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
+    tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
+    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 0);
+    tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
     l1 = gen_new_label();
     tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
-    tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
+    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 1);
 #if defined(TARGET_PPC64)
     if (size == 8) {
         gen_qemu_st64(ctx, cpu_gpr[reg], EA);
@@ -3870,17 +3887,11 @@ static inline void gen_bcond(DisasContext *ctx, int 
type)
     if ((bo & 0x10) == 0) {
         /* Test CR */
         uint32_t bi = BI(ctx->opcode);
-        uint32_t mask = 0x08 >> (bi & 0x03);
-        TCGv_i32 temp = tcg_temp_new_i32();
-
         if (bo & 0x8) {
-            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, temp, 0, l1);
+            tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[bi], 0, l1);
         } else {
-            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
-            tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1);
+            tcg_gen_brcondi_i32(TCG_COND_NE, cpu_cr[bi], 0, l1);
         }
-        tcg_temp_free_i32(temp);
     }
     gen_update_cfar(ctx, ctx->nip);
     if (type == BCOND_IM) {
@@ -3929,35 +3940,11 @@ static void gen_bctar(DisasContext *ctx)
 }
 
 /***                      Condition register logical                       ***/
-#define GEN_CRLOGIC(name, tcg_op, opc)                                        \
-static void glue(gen_, name)(DisasContext *ctx)                                
       \
-{                                                                             \
-    uint8_t bitmask;                                                          \
-    int sh;                                                                   \
-    TCGv_i32 t0, t1;                                                          \
-    sh = (crbD(ctx->opcode) & 0x03) - (crbA(ctx->opcode) & 0x03);             \
-    t0 = tcg_temp_new_i32();                                                  \
-    if (sh > 0)                                                               \
-        tcg_gen_shri_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], sh);            \
-    else if (sh < 0)                                                          \
-        tcg_gen_shli_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], -sh);           \
-    else                                                                      \
-        tcg_gen_mov_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2]);                 \
-    t1 = tcg_temp_new_i32();                                                  \
-    sh = (crbD(ctx->opcode) & 0x03) - (crbB(ctx->opcode) & 0x03);             \
-    if (sh > 0)                                                               \
-        tcg_gen_shri_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], sh);            \
-    else if (sh < 0)                                                          \
-        tcg_gen_shli_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], -sh);           \
-    else                                                                      \
-        tcg_gen_mov_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2]);                 \
-    tcg_op(t0, t0, t1);                                                       \
-    bitmask = 0x08 >> (crbD(ctx->opcode) & 0x03);                             \
-    tcg_gen_andi_i32(t0, t0, bitmask);                                        \
-    tcg_gen_andi_i32(t1, cpu_crf[crbD(ctx->opcode) >> 2], ~bitmask);          \
-    tcg_gen_or_i32(cpu_crf[crbD(ctx->opcode) >> 2], t0, t1);                  \
-    tcg_temp_free_i32(t0);                                                    \
-    tcg_temp_free_i32(t1);                                                    \
+#define GEN_CRLOGIC(name, tcg_op, opc)                                         
\
+static void glue(gen_, name)(DisasContext *ctx)                                
\
+{                                                                              
\
+    tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)],               
\
+           cpu_cr[crbB(ctx->opcode)]);                                         
\
 }
 
 /* crand */
@@ -3980,7 +3967,11 @@ GEN_CRLOGIC(crxor, tcg_gen_xor_i32, 0x06);
 /* mcrf */
 static void gen_mcrf(DisasContext *ctx)
 {
-    tcg_gen_mov_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfS(ctx->opcode)]);
+    int i;
+    for (i = 0; i < 4; i++) {
+        tcg_gen_mov_i32(cpu_cr[crfD(ctx->opcode) * 4 + i],
+                        cpu_cr[crfS(ctx->opcode) * 4 + i]);
+    }
 }
 
 /***                           System linkage                              ***/
@@ -4133,20 +4124,12 @@ static void gen_write_xer(TCGv src)
 /* mcrxr */
 static void gen_mcrxr(DisasContext *ctx)
 {
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv_i32 t1 = tcg_temp_new_i32();
-    TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
-
-    tcg_gen_trunc_tl_i32(t0, cpu_so);
-    tcg_gen_trunc_tl_i32(t1, cpu_ov);
-    tcg_gen_trunc_tl_i32(dst, cpu_ca);
-    tcg_gen_shli_i32(t0, t0, 3);
-    tcg_gen_shli_i32(t1, t1, 2);
-    tcg_gen_shli_i32(dst, dst, 1);
-    tcg_gen_or_i32(dst, dst, t0);
-    tcg_gen_or_i32(dst, dst, t1);
-    tcg_temp_free_i32(t0);
-    tcg_temp_free_i32(t1);
+    int crf = crfD(ctx->opcode);
+
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], cpu_so);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], cpu_ov);
+    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], cpu_ca);
+    tcg_gen_movi_i32(cpu_cr[crf * 4 + CRF_SO], 0);
 
     tcg_gen_movi_tl(cpu_so, 0);
     tcg_gen_movi_tl(cpu_ov, 0);
@@ -6320,11 +6303,13 @@ static void gen_tlbsx_40x(DisasContext *ctx)
     gen_helper_4xx_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
     tcg_temp_free(t0);
     if (Rc(ctx->opcode)) {
-        int l1 = gen_new_label();
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
-        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
-        gen_set_label(l1);
+        t0 = tcg_temp_new();
+        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
+        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
+        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
+        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
+        tcg_temp_free(t0);
     }
 #endif
 }
@@ -6401,11 +6386,13 @@ static void gen_tlbsx_440(DisasContext *ctx)
     gen_helper_440_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
     tcg_temp_free(t0);
     if (Rc(ctx->opcode)) {
-        int l1 = gen_new_label();
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
-        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
-        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
-        gen_set_label(l1);
+        t0 = tcg_temp_new();
+        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
+        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
+        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
+        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
+        tcg_temp_free(t0);
     }
 #endif
 }
@@ -7371,7 +7358,7 @@ GEN_VXFORM(vpmsumd, 4, 19)
 static void gen_##op(DisasContext *ctx)             \
 {                                                   \
     TCGv_ptr ra, rb, rd;                            \
-    TCGv_i32 ps;                                    \
+    TCGv_i32 ps, crf;                               \
                                                     \
     if (unlikely(!ctx->altivec_enabled)) {          \
         gen_exception(ctx, POWERPC_EXCP_VPU);       \
@@ -7383,13 +7370,16 @@ static void gen_##op(DisasContext *ctx)             \
     rd = gen_avr_ptr(rD(ctx->opcode));              \
                                                     \
     ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
+    crf = tcg_temp_new_i32();                       \
                                                     \
-    gen_helper_##op(cpu_crf[6], rd, ra, rb, ps);    \
+    gen_helper_##op(crf, rd, ra, rb, ps);           \
+    gen_op_mtcr(6 << 2, crf, 0);                    \
                                                     \
     tcg_temp_free_ptr(ra);                          \
     tcg_temp_free_ptr(rb);                          \
     tcg_temp_free_ptr(rd);                          \
     tcg_temp_free_i32(ps);                          \
+    tcg_temp_free_ptr(crf);                         \
 }
 
 GEN_BCD(bcdadd)
@@ -8217,6 +8207,7 @@ static void gen_##name(DisasContext *ctx)        \
 static void gen_##name(DisasContext *ctx)         \
 {                                                 \
     TCGv_ptr ra, rb;                              \
+    TCGv_i32 tmp;                                 \
     if (unlikely(!ctx->fpu_enabled)) {            \
         gen_exception(ctx, POWERPC_EXCP_FPU);     \
         return;                                   \
@@ -8224,8 +8215,10 @@ static void gen_##name(DisasContext *ctx)         \
     gen_update_nip(ctx, ctx->nip - 4);            \
     ra = gen_fprp_ptr(rA(ctx->opcode));           \
     rb = gen_fprp_ptr(rB(ctx->opcode));           \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
-                      cpu_env, ra, rb);           \
+    tmp = tcg_temp_new_i32();                     \
+    gen_helper_##name(tmp, cpu_env, ra, rb);      \
+    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
+    tcg_temp_free_i32(tmp);                       \
     tcg_temp_free_ptr(ra);                        \
     tcg_temp_free_ptr(rb);                        \
 }
@@ -8234,7 +8227,7 @@ static void gen_##name(DisasContext *ctx)         \
 static void gen_##name(DisasContext *ctx)         \
 {                                                 \
     TCGv_ptr ra;                                  \
-    TCGv_i32 dcm;                                 \
+    TCGv_i32 dcm, tmp;                            \
     if (unlikely(!ctx->fpu_enabled)) {            \
         gen_exception(ctx, POWERPC_EXCP_FPU);     \
         return;                                   \
@@ -8242,8 +8235,10 @@ static void gen_##name(DisasContext *ctx)         \
     gen_update_nip(ctx, ctx->nip - 4);            \
     ra = gen_fprp_ptr(rA(ctx->opcode));           \
     dcm = tcg_const_i32(DCM(ctx->opcode));        \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
-                      cpu_env, ra, dcm);          \
+    tmp = tcg_temp_new_i32();                     \
+    gen_helper_##name(tmp, cpu_env, ra, dcm);     \
+    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
+    tcg_temp_free_i32(tmp);                       \
     tcg_temp_free_ptr(ra);                        \
     tcg_temp_free_i32(dcm);                       \
 }
@@ -8668,37 +8663,32 @@ GEN_SPEOP_ARITH_IMM2(evsubifw, tcg_gen_subi_i32);
 #define GEN_SPEOP_COMP(name, tcg_cond)                                        \
 static inline void gen_##name(DisasContext *ctx)                              \
 {                                                                             \
+    TCGv tmp = tcg_temp_new();                                                \
+                                                                              \
     if (unlikely(!ctx->spe_enabled)) {                                        \
         gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
         return;                                                               \
     }                                                                         \
-    int l1 = gen_new_label();                                                 \
-    int l2 = gen_new_label();                                                 \
-    int l3 = gen_new_label();                                                 \
-    int l4 = gen_new_label();                                                 \
                                                                               \
     tcg_gen_ext32s_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);    \
     tcg_gen_ext32s_tl(cpu_gpr[rB(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);    \
     tcg_gen_ext32s_tl(cpu_gprh[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);  \
     tcg_gen_ext32s_tl(cpu_gprh[rB(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);  \
                                                                               \
-    tcg_gen_brcond_tl(tcg_cond, cpu_gpr[rA(ctx->opcode)],                     \
-                       cpu_gpr[rB(ctx->opcode)], l1);                         \
-    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0);                          \
-    tcg_gen_br(l2);                                                           \
-    gen_set_label(l1);                                                        \
-    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)],                              \
-                     CRF_CL | CRF_CH_OR_CL | CRF_CH_AND_CL);                  \
-    gen_set_label(l2);                                                        \
-    tcg_gen_brcond_tl(tcg_cond, cpu_gprh[rA(ctx->opcode)],                    \
-                       cpu_gprh[rB(ctx->opcode)], l3);                        \
-    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],  \
-                     ~(CRF_CH | CRF_CH_AND_CL));                              \
-    tcg_gen_br(l4);                                                           \
-    gen_set_label(l3);                                                        \
-    tcg_gen_ori_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],   \
-                    CRF_CH | CRF_CH_OR_CL);                                   \
-    gen_set_label(l4);                                                        \
+    tcg_gen_setcond_tl(tcg_cond, tmp,                                         \
+                       cpu_gpr[rA(ctx->opcode)],                              \
+                       cpu_gpr[rB(ctx->opcode)]);                             \
+    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], tmp);        \
+    tcg_gen_setcond_tl(tcg_cond, tmp,                                         \
+                       cpu_gprh[rA(ctx->opcode)],                             \
+                       cpu_gprh[rB(ctx->opcode)]);                            \
+    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], tmp);        \
+    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],              \
+                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                    \
+                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                   \
+    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],            \
+                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                   \
+                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                  \
 }
 GEN_SPEOP_COMP(evcmpgtu, TCG_COND_GTU);
 GEN_SPEOP_COMP(evcmpgts, TCG_COND_GT);
@@ -8769,22 +8759,20 @@ static inline void gen_evsel(DisasContext *ctx)
     int l2 = gen_new_label();
     int l3 = gen_new_label();
     int l4 = gen_new_label();
-    TCGv_i32 t0 = tcg_temp_local_new_i32();
-    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3);
-    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
+
+    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4], 0, l1);
     tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
     tcg_gen_br(l2);
     gen_set_label(l1);
     tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
     gen_set_label(l2);
-    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 2);
-    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l3);
+
+    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4 + 1], 0, 
l3);
     tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_br(l4);
     gen_set_label(l3);
     tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
     gen_set_label(l4);
-    tcg_temp_free_i32(t0);
 }
 
 static void gen_evsel0(DisasContext *ctx)
@@ -9366,9 +9354,12 @@ static inline void gen_##name(DisasContext *ctx)         
                     \
     t0 = tcg_temp_new_i32();                                                  \
     t1 = tcg_temp_new_i32();                                                  \
                                                                               \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);              \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);              \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);              \
     tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
     tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
+    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, t0, 
t1); \
                                                                               \
     tcg_temp_free_i32(t0);                                                    \
     tcg_temp_free_i32(t1);                                                    \
@@ -9385,10 +9376,32 @@ static inline void gen_##name(DisasContext *ctx)        
                      \
     t1 = tcg_temp_new_i64();                                                  \
     gen_load_gpr64(t0, rA(ctx->opcode));                                      \
     gen_load_gpr64(t1, rB(ctx->opcode));                                      \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);              \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);              \
+    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);              \
+    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env,        \
+                      t0, t1);                                                \
     tcg_temp_free_i64(t0);                                                    \
     tcg_temp_free_i64(t1);                                                    \
 }
+#define GEN_SPEFPUOP_COMP_V64(name, helper)                                   \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], cpu_env,      \
+                        cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);  \
+    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], cpu_env,      \
+                        cpu_gprh[rA(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);\
+    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],              \
+                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                    \
+                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                   \
+    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],            \
+                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                   \
+                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                  \
+}
 
 /* Single precision floating-point vectors operations */
 /* Arithmetic */
@@ -9443,12 +9456,12 @@ GEN_SPEFPUOP_CONV_64_64(evfsctuiz);
 GEN_SPEFPUOP_CONV_64_64(evfsctsiz);
 
 /* Comparison */
-GEN_SPEFPUOP_COMP_64(evfscmpgt);
-GEN_SPEFPUOP_COMP_64(evfscmplt);
-GEN_SPEFPUOP_COMP_64(evfscmpeq);
-GEN_SPEFPUOP_COMP_64(evfststgt);
-GEN_SPEFPUOP_COMP_64(evfststlt);
-GEN_SPEFPUOP_COMP_64(evfststeq);
+GEN_SPEFPUOP_COMP_V64(evfscmpgt, efscmpgt);
+GEN_SPEFPUOP_COMP_V64(evfscmplt, efscmplt);
+GEN_SPEFPUOP_COMP_V64(evfscmpeq, efscmpeq);
+GEN_SPEFPUOP_COMP_V64(evfststgt, efststgt);
+GEN_SPEFPUOP_COMP_V64(evfststlt, efststlt);
+GEN_SPEFPUOP_COMP_V64(evfststeq, efststeq);
 
 /* Opcodes definitions */
 GEN_SPE(evfsadd,   evfssub,   0x00, 0x0A, 0x00000000, 0x00000000, 
PPC_SPE_SINGLE); //
-- 
1.8.3.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]