[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 13/13] alpha: Implement fp branch/cmov inline.
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH 13/13] alpha: Implement fp branch/cmov inline. |
Date: |
Fri, 11 Dec 2009 15:07:43 -0800 |
The old fcmov implementation had a typo:
- tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[ra]);
which moved the condition, not the second source, to the destination.
But it's also easy to implement the simplified fp comparison inline.
Signed-off-by: Richard Henderson <address@hidden>
---
target-alpha/helper.h | 7 --
target-alpha/op_helper.c | 31 -------
target-alpha/translate.c | 197 ++++++++++++++++++++++++++--------------------
3 files changed, 110 insertions(+), 125 deletions(-)
diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index 4eb3b6f..bedd3c0 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -77,13 +77,6 @@ DEF_HELPER_2(cmpgeq, i64, i64, i64)
DEF_HELPER_2(cmpgle, i64, i64, i64)
DEF_HELPER_2(cmpglt, i64, i64, i64)
-DEF_HELPER_1(cmpfeq, i64, i64)
-DEF_HELPER_1(cmpfne, i64, i64)
-DEF_HELPER_1(cmpflt, i64, i64)
-DEF_HELPER_1(cmpfle, i64, i64)
-DEF_HELPER_1(cmpfgt, i64, i64)
-DEF_HELPER_1(cmpfge, i64, i64)
-
DEF_HELPER_2(cpys, i64, i64, i64)
DEF_HELPER_2(cpysn, i64, i64, i64)
DEF_HELPER_2(cpyse, i64, i64, i64)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index d7f4fb2..8eba5ec 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -884,37 +884,6 @@ uint64_t helper_cmpglt(uint64_t a, uint64_t b)
return 0;
}
-uint64_t helper_cmpfeq (uint64_t a)
-{
- return !(a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfne (uint64_t a)
-{
- return (a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpflt (uint64_t a)
-{
- return (a & 0x8000000000000000ULL) && (a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfle (uint64_t a)
-{
- return (a & 0x8000000000000000ULL) || !(a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfgt (uint64_t a)
-{
- return !(a & 0x8000000000000000ULL) && (a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfge (uint64_t a)
-{
- return !(a & 0x8000000000000000ULL) || !(a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-
/* Floating point format conversion */
uint64_t helper_cvtts (uint64_t a)
{
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index e426677..5b34fc6 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -294,77 +294,98 @@ static inline void gen_store_mem(DisasContext *ctx,
tcg_temp_free(addr);
}
-static inline void gen_bcond(DisasContext *ctx, TCGCond cond, int ra,
- int32_t disp, int mask)
+static void gen_bcond_pcload(DisasContext *ctx, int32_t disp, int lab_true)
{
- int l1, l2;
+ int lab_over = gen_new_label();
+
+ tcg_gen_movi_i64(cpu_pc, ctx->pc);
+ tcg_gen_br(lab_over);
+ gen_set_label(lab_true);
+ tcg_gen_movi_i64(cpu_pc, ctx->pc + (int64_t)(disp << 2));
+ gen_set_label(lab_over);
+}
+
+static void gen_bcond(DisasContext *ctx, TCGCond cond, int ra,
+ int32_t disp, int mask)
+{
+ int lab_true = gen_new_label();
- l1 = gen_new_label();
- l2 = gen_new_label();
if (likely(ra != 31)) {
if (mask) {
TCGv tmp = tcg_temp_new();
tcg_gen_andi_i64(tmp, cpu_ir[ra], 1);
- tcg_gen_brcondi_i64(cond, tmp, 0, l1);
+ tcg_gen_brcondi_i64(cond, tmp, 0, lab_true);
tcg_temp_free(tmp);
- } else
- tcg_gen_brcondi_i64(cond, cpu_ir[ra], 0, l1);
+ } else {
+ tcg_gen_brcondi_i64(cond, cpu_ir[ra], 0, lab_true);
+ }
} else {
/* Very uncommon case - Do not bother to optimize. */
TCGv tmp = tcg_const_i64(0);
- tcg_gen_brcondi_i64(cond, tmp, 0, l1);
+ tcg_gen_brcondi_i64(cond, tmp, 0, lab_true);
tcg_temp_free(tmp);
}
- tcg_gen_movi_i64(cpu_pc, ctx->pc);
- tcg_gen_br(l2);
- gen_set_label(l1);
- tcg_gen_movi_i64(cpu_pc, ctx->pc + (int64_t)(disp << 2));
- gen_set_label(l2);
+ gen_bcond_pcload(ctx, disp, lab_true);
}
-static inline void gen_fbcond(DisasContext *ctx, int opc, int ra, int32_t disp)
+/* Generate a forward TCG branch to LAB_TRUE if RA cmp 0.0.
+ This is complicated by the fact that -0.0 compares the same as +0.0. */
+
+static void gen_fbcond_internal(TCGCond cond, TCGv src, int lab_true)
{
- int l1, l2;
+ int lab_false = -1;
+ uint64_t mzero = 1ull << 63;
TCGv tmp;
- TCGv src;
-
- l1 = gen_new_label();
- l2 = gen_new_label();
- if (ra != 31) {
+
+ switch (cond) {
+ case TCG_COND_LE:
+ case TCG_COND_GT:
+ /* For <= or >, the -0.0 value directly compares the way we want. */
+ tcg_gen_brcondi_i64(cond, src, 0, lab_true);
+ break;
+
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ /* For == or !=, we can simply mask off the sign bit and compare. */
+ /* ??? Assume that the temporary is reclaimed at the branch. */
tmp = tcg_temp_new();
- src = cpu_fir[ra];
- } else {
- tmp = tcg_const_i64(0);
- src = tmp;
- }
- switch (opc) {
- case 0x31: /* FBEQ */
- gen_helper_cmpfeq(tmp, src);
- break;
- case 0x32: /* FBLT */
- gen_helper_cmpflt(tmp, src);
- break;
- case 0x33: /* FBLE */
- gen_helper_cmpfle(tmp, src);
- break;
- case 0x35: /* FBNE */
- gen_helper_cmpfne(tmp, src);
+ tcg_gen_andi_i64(tmp, src, mzero - 1);
+ tcg_gen_brcondi_i64(cond, tmp, 0, lab_true);
break;
- case 0x36: /* FBGE */
- gen_helper_cmpfge(tmp, src);
+
+ case TCG_COND_GE:
+ /* For >=, emit two branches to the destination. */
+ tcg_gen_brcondi_i64(cond, src, 0, lab_true);
+ tcg_gen_brcondi_i64(TCG_COND_EQ, src, mzero, lab_true);
break;
- case 0x37: /* FBGT */
- gen_helper_cmpfgt(tmp, src);
+
+ case TCG_COND_LT:
+ /* For <, first filter out -0.0 to what will be the fallthru. */
+ lab_false = gen_new_label();
+ tcg_gen_brcondi_i64(TCG_COND_EQ, src, mzero, lab_false);
+ tcg_gen_brcondi_i64(cond, src, 0, lab_true);
+ gen_set_label(lab_false);
break;
+
default:
abort();
}
- tcg_gen_brcondi_i64(TCG_COND_NE, tmp, 0, l1);
- tcg_gen_movi_i64(cpu_pc, ctx->pc);
- tcg_gen_br(l2);
- gen_set_label(l1);
- tcg_gen_movi_i64(cpu_pc, ctx->pc + (int64_t)(disp << 2));
- gen_set_label(l2);
+}
+
+static void gen_fbcond(DisasContext *ctx, TCGCond cond, int ra, int32_t disp)
+{
+ int lab_true;
+
+ if (unlikely(ra == 31)) {
+ /* Very uncommon case, but easier to optimize it to an integer
+ comparison than continuing with the floating point comparison. */
+ gen_bcond(ctx, cond, ra, disp, 0);
+ return;
+ }
+
+ lab_true = gen_new_label();
+ gen_fbcond_internal(cond, cpu_fir[ra], lab_true);
+ gen_bcond_pcload(ctx, disp, lab_true);
}
static inline void gen_cmov(TCGCond inv_cond, int ra, int rb, int rc,
@@ -399,6 +420,28 @@ static inline void gen_cmov(TCGCond inv_cond, int ra, int
rb, int rc,
gen_set_label(l1);
}
+static void gen_fcmov(TCGCond inv_cond, int ra, int rb, int rc)
+{
+ TCGv va = cpu_fir[ra];
+ int l1;
+
+ if (unlikely(rc == 31))
+ return;
+ if (unlikely(ra == 31)) {
+ /* ??? Assume that the temporary is reclaimed at the branch. */
+ va = tcg_const_i64(0);
+ }
+
+ l1 = gen_new_label();
+ gen_fbcond_internal(inv_cond, va, l1);
+
+ if (rb != 31)
+ tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[rb]);
+ else
+ tcg_gen_movi_i64(cpu_fir[rc], 0);
+ gen_set_label(l1);
+}
+
#define FARITH2(name) \
static inline void glue(gen_f, name)(int rb, int rc) \
{ \
@@ -482,38 +525,6 @@ FARITH3(cpys)
FARITH3(cpysn)
FARITH3(cpyse)
-#define FCMOV(name) \
-static inline void glue(gen_f, name)(int ra, int rb, int rc) \
-{ \
- int l1; \
- TCGv tmp; \
- \
- if (unlikely(rc == 31)) \
- return; \
- \
- l1 = gen_new_label(); \
- tmp = tcg_temp_new(); \
- if (ra != 31) { \
- tmp = tcg_temp_new(); \
- gen_helper_ ## name (tmp, cpu_fir[ra]); \
- } else { \
- tmp = tcg_const_i64(0); \
- gen_helper_ ## name (tmp, tmp); \
- } \
- tcg_gen_brcondi_i64(TCG_COND_EQ, tmp, 0, l1); \
- if (rb != 31) \
- tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[ra]); \
- else \
- tcg_gen_movi_i64(cpu_fir[rc], 0); \
- gen_set_label(l1); \
-}
-FCMOV(cmpfeq)
-FCMOV(cmpfne)
-FCMOV(cmpflt)
-FCMOV(cmpfge)
-FCMOV(cmpfle)
-FCMOV(cmpfgt)
-
static inline uint64_t zapnot_mask(uint8_t lit)
{
uint64_t mask = 0;
@@ -1871,27 +1882,27 @@ static inline int translate_one(DisasContext *ctx,
uint32_t insn)
break;
case 0x02A:
/* FCMOVEQ */
- gen_fcmpfeq(ra, rb, rc);
+ gen_fcmov(TCG_COND_NE, ra, rb, rc);
break;
case 0x02B:
/* FCMOVNE */
- gen_fcmpfne(ra, rb, rc);
+ gen_fcmov(TCG_COND_EQ, ra, rb, rc);
break;
case 0x02C:
/* FCMOVLT */
- gen_fcmpflt(ra, rb, rc);
+ gen_fcmov(TCG_COND_GE, ra, rb, rc);
break;
case 0x02D:
/* FCMOVGE */
- gen_fcmpfge(ra, rb, rc);
+ gen_fcmov(TCG_COND_LT, ra, rb, rc);
break;
case 0x02E:
/* FCMOVLE */
- gen_fcmpfle(ra, rb, rc);
+ gen_fcmov(TCG_COND_GT, ra, rb, rc);
break;
case 0x02F:
/* FCMOVGT */
- gen_fcmpfgt(ra, rb, rc);
+ gen_fcmov(TCG_COND_LE, ra, rb, rc);
break;
case 0x030:
/* CVTQL */
@@ -2482,9 +2493,15 @@ static inline int translate_one(DisasContext *ctx,
uint32_t insn)
ret = 1;
break;
case 0x31: /* FBEQ */
+ gen_fbcond(ctx, TCG_COND_EQ, ra, disp21);
+ ret = 1;
+ break;
case 0x32: /* FBLT */
+ gen_fbcond(ctx, TCG_COND_LT, ra, disp21);
+ ret = 1;
+ break;
case 0x33: /* FBLE */
- gen_fbcond(ctx, opc, ra, disp21);
+ gen_fbcond(ctx, TCG_COND_LE, ra, disp21);
ret = 1;
break;
case 0x34:
@@ -2495,9 +2512,15 @@ static inline int translate_one(DisasContext *ctx,
uint32_t insn)
ret = 1;
break;
case 0x35: /* FBNE */
+ gen_fbcond(ctx, TCG_COND_NE, ra, disp21);
+ ret = 1;
+ break;
case 0x36: /* FBGE */
+ gen_fbcond(ctx, TCG_COND_GE, ra, disp21);
+ ret = 1;
+ break;
case 0x37: /* FBGT */
- gen_fbcond(ctx, opc, ra, disp21);
+ gen_fbcond(ctx, TCG_COND_GT, ra, disp21);
ret = 1;
break;
case 0x38:
- [Qemu-devel] [PATCH 00/13] Alpha emulation improvements, round two, Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 02/13] alpha: Fix -d in_asm, Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 01/13] alpha: Implement missing MVI instructions., Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 03/13] alpha: Expand zap/zapnot with immediate inline., Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 09/13] alpha: Expand msk*h inline., Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 11/13] alpha: Fix FMOV., Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 06/13] alpha: Implement RD/WRUNIQUE in the translator, Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 10/13] alpha: Expand ins*h inline., Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 05/13] alpha: Fix fbcond branch offset., Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 13/13] alpha: Implement fp branch/cmov inline.,
Richard Henderson <=
- [Qemu-devel] [PATCH 07/13] alpha: Expand ins*l inline., Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 04/13] alpha: Rewrite gen_ext_[hl] in terms of zapnot., Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 08/13] alpha: Expand msk*l inline., Richard Henderson, 2009/12/11
- [Qemu-devel] [PATCH 12/13] alpha: Fix double log_cpu_state., Richard Henderson, 2009/12/11