qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 05/14] tcg-sparc: Use 64-bit registers with sparcv8p


From: Richard Henderson
Subject: [Qemu-devel] [PATCH 05/14] tcg-sparc: Use 64-bit registers with sparcv8plus
Date: Mon, 17 Mar 2014 11:37:47 -0700

Quite a lot of effort was spent composing and decomposing 64-bit
quantities in registers, when we should just create them and leave
them as one 64-bit register.

Signed-off-by: Richard Henderson <address@hidden>
---
 tcg/sparc/tcg-target.c | 525 +++++++++++++++++--------------------------------
 tcg/sparc/tcg-target.h |  14 +-
 tcg/tcg.c              |  45 ++++-
 3 files changed, 228 insertions(+), 356 deletions(-)

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 38e8577..fb5ba27 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -67,6 +67,18 @@ static const char * const 
tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 # define SPARC64 0
 #endif
 
+/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o
+   registers.  These are saved manually by the kernel in full 64-bit
+   slots.  The %i and %l registers are saved by the register window
+   mechanism, which only allocates space for 32 bits.  Given that this
+   window spill/fill can happen on any signal, we must consider the
+   high bits of the %i and %l registers garbage at all times.  */
+#if SPARC64
+# define ALL_64  0xffffffffu
+#else
+# define ALL_64  0xffffu
+#endif
+
 /* Define some temporary registers.  T2 is used for constant generation.  */
 #define TCG_REG_T1  TCG_REG_G1
 #define TCG_REG_T2  TCG_REG_O7
@@ -307,14 +319,27 @@ static int target_parse_constraint(TCGArgConstraint *ct, 
const char **pct_str)
         ct->ct |= TCG_CT_REG;
         tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
         break;
-    case 'L': /* qemu_ld/st constraint */
+    case 'R':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
-        // Helper args
+        tcg_regset_set32(ct->u.regs, 0, ALL_64);
+        break;
+    case 'A': /* qemu_ld/st address constraint */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0,
+                         TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff);
+    reserve_helpers:
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
         break;
+    case 's': /* qemu_st data 32-bit constraint */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+        goto reserve_helpers;
+    case 'S': /* qemu_st data 64-bit constraint */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, ALL_64);
+        goto reserve_helpers;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
         break;
@@ -402,7 +427,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
     }
 
     /* A 32-bit constant, or 32-bit zero-extended to 64-bits.  */
-    if (type == TCG_TYPE_I32 || (arg & ~0xffffffffu) == 0) {
+    if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
         tcg_out_sethi(s, ret, arg);
         if (arg & 0x3ff) {
             tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
@@ -420,12 +445,12 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
     /* A 64-bit constant decomposed into 2 32-bit pieces.  */
     lo = (int32_t)arg;
     if (check_fit_tl(lo, 13)) {
-        hi = (arg - lo) >> 31 >> 1;
+        hi = (arg - lo) >> 32;
         tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
         tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
         tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
     } else {
-        hi = arg >> 31 >> 1;
+        hi = arg >> 32;
         tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo);
         tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
@@ -586,7 +611,6 @@ static void tcg_out_movcond_i32(TCGContext *s, TCGCond 
cond, TCGArg ret,
     tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
 }
 
-#if SPARC64
 static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1,
                                TCGArg arg2, int const_arg2, int label)
 {
@@ -634,45 +658,6 @@ static void tcg_out_movcond_i64(TCGContext *s, TCGCond 
cond, TCGArg ret,
         tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
     }
 }
-#else
-static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond,
-                                TCGArg al, TCGArg ah,
-                                TCGArg bl, int blconst,
-                                TCGArg bh, int bhconst, int label_dest)
-{
-    int scond, label_next = gen_new_label();
-
-    tcg_out_cmp(s, ah, bh, bhconst);
-
-    /* Note that we fill one of the delay slots with the second compare.  */
-    switch (cond) {
-    case TCG_COND_EQ:
-        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
-        tcg_out_cmp(s, al, bl, blconst);
-        tcg_out_bpcc(s, COND_E, BPCC_ICC | BPCC_PT, label_dest);
-        break;
-
-    case TCG_COND_NE:
-        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
-        tcg_out_cmp(s, al, bl, blconst);
-        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
-        break;
-
-    default:
-        scond = tcg_cond_to_bcond[tcg_high_cond(cond)];
-        tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
-        tcg_out_nop(s);
-        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
-        tcg_out_cmp(s, al, bl, blconst);
-        scond = tcg_cond_to_bcond[tcg_unsigned_cond(cond)];
-        tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
-        break;
-    }
-    tcg_out_nop(s);
-
-    tcg_out_label(s, label_next, s->code_ptr);
-}
-#endif
 
 static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
                                 TCGArg c1, TCGArg c2, int c2const)
@@ -724,7 +709,6 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond 
cond, TCGArg ret,
     }
 }
 
-#if SPARC64
 static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret,
                                 TCGArg c1, TCGArg c2, int c2const)
 {
@@ -739,48 +723,6 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond 
cond, TCGArg ret,
         tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
     }
 }
-#else
-static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
-                                 TCGArg al, TCGArg ah,
-                                 TCGArg bl, int blconst,
-                                 TCGArg bh, int bhconst)
-{
-    int tmp = TCG_REG_T1;
-
-    /* Note that the low parts are fully consumed before tmp is set.  */
-    if (ret != ah && (bhconst || ret != bh)) {
-        tmp = ret;
-    }
-
-    switch (cond) {
-    case TCG_COND_EQ:
-    case TCG_COND_NE:
-        if (bl == 0 && bh == 0) {
-            if (cond == TCG_COND_EQ) {
-                tcg_out_arith(s, TCG_REG_G0, al, ah, ARITH_ORCC);
-                tcg_out_movi(s, TCG_TYPE_I32, ret, 1);
-            } else {
-                tcg_out_arith(s, ret, al, ah, ARITH_ORCC);
-            }
-        } else {
-            tcg_out_setcond_i32(s, cond, tmp, al, bl, blconst);
-            tcg_out_cmp(s, ah, bh, bhconst);
-            tcg_out_mov(s, TCG_TYPE_I32, ret, tmp);
-        }
-        tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, cond == TCG_COND_NE, 1);
-        break;
-
-    default:
-        /* <= : ah < bh | (ah == bh && al <= bl) */
-        tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), tmp, al, bl, blconst);
-        tcg_out_cmp(s, ah, bh, bhconst);
-        tcg_out_mov(s, TCG_TYPE_I32, ret, tmp);
-        tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, 0, 1);
-        tcg_out_movcc(s, tcg_high_cond(cond), MOVCC_ICC, ret, 1, 1);
-        break;
-    }
-}
-#endif
 
 static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh,
                             TCGArg al, TCGArg ah, TCGArg bl, int blconst,
@@ -855,8 +797,13 @@ static void build_trampolines(TCGContext *s)
         }
         qemu_ld_trampoline[i] = tramp;
 
-        /* Find the retaddr argument register.  */
-        ra = TCG_REG_O3 + (!SPARC64 && TARGET_LONG_BITS == 64);
+        if (SPARC64 || TARGET_LONG_BITS == 32) {
+            ra = TCG_REG_O3;
+        } else {
+            /* Install the high part of the address.  */
+            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
+            ra = TCG_REG_O4;
+        }
 
         /* Set the retaddr operand.  */
         tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
@@ -880,12 +827,28 @@ static void build_trampolines(TCGContext *s)
         }
         qemu_st_trampoline[i] = tramp;
 
-        /* Find the retaddr argument.  For 32-bit, this may be past the
-           last argument register, and need passing on the stack.  */
-        ra = (TCG_REG_O4
-              + (!SPARC64 && TARGET_LONG_BITS == 64)
-              + (!SPARC64 && (i & MO_SIZE) == MO_64));
-
+        if (SPARC64) {
+            ra = TCG_REG_O4;
+        } else {
+            ra = TCG_REG_O1;
+            if (TARGET_LONG_BITS == 64) {
+                /* Install the high part of the address.  */
+                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
+                ra += 2;
+            } else {
+                ra += 1;
+            }
+            if ((i & MO_SIZE) == MO_64) {
+                /* Install the high part of the data.  */
+                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
+                ra += 2;
+            } else {
+                ra += 1;
+            }
+            /* Skip the mem_index argument.  */
+            ra += 1;
+        }
+                
         /* Set the retaddr operand.  */
         if (ra >= TCG_REG_O6) {
             tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK,
@@ -954,25 +917,16 @@ static void tcg_target_qemu_prologue(TCGContext *s)
    The result of the TLB comparison is in %[ix]cc.  The sanitized address
    is in the returned register, maybe %o0.  The TLB addend is in %o1.  */
 
-static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
-                               int mem_index, TCGMemOp s_bits, int which)
+static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
+                               TCGMemOp s_bits, int which)
 {
     const TCGReg r0 = TCG_REG_O0;
     const TCGReg r1 = TCG_REG_O1;
     const TCGReg r2 = TCG_REG_O2;
-    TCGReg addr = addrlo;
     int tlb_ofs;
 
-    if (!SPARC64 && TARGET_LONG_BITS == 64) {
-        /* Assemble the 64-bit address in R0.  */
-        tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
-        tcg_out_arithi(s, r1, addrhi, 32, SHIFT_SLLX);
-        tcg_out_arith(s, r0, r0, r1, ARITH_OR);
-        addr = r0;
-    }
-
     /* Shift the page number down.  */
-    tcg_out_arithi(s, r1, addrlo, TARGET_PAGE_BITS, SHIFT_SRL);
+    tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL);
 
     /* Mask out the page offset, except for the required alignment.  */
     tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1,
@@ -1006,10 +960,10 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
 
     /* If the guest address must be zero-extended, do so now.  */
     if (SPARC64 && TARGET_LONG_BITS == 32) {
-        tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
+        tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
         return r0;
     }
-    return addrlo;
+    return addr;
 }
 #endif /* CONFIG_SOFTMMU */
 
@@ -1042,78 +996,37 @@ static const int qemu_st_opc[16] = {
     [MO_LEQ]  = STX_LE,
 };
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
+                            TCGMemOp memop, int memi, bool is_64)
 {
-    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
-    TCGMemOp memop, s_bits;
-#if defined(CONFIG_SOFTMMU)
+#ifdef CONFIG_SOFTMMU
+    TCGMemOp s_bits = memop & MO_SIZE;
     TCGReg addrz, param;
     uintptr_t func;
-    int memi;
-    uint32_t *label_ptr[2];
-#endif
-
-    datalo = *args++;
-    datahi = (!SPARC64 && is64 ? *args++ : 0);
-    addrlo = *args++;
-    addrhi = (!SPARC64 && TARGET_LONG_BITS == 64 ? *args++ : 0);
-    memop = *args++;
-    s_bits = memop & MO_SIZE;
+    uint32_t *label_ptr;
 
-#if defined(CONFIG_SOFTMMU)
-    memi = *args++;
-    addrz = tcg_out_tlb_load(s, addrlo, addrhi, memi, s_bits,
+    addrz = tcg_out_tlb_load(s, addr, memi, s_bits,
                              offsetof(CPUTLBEntry, addr_read));
 
-    if (!SPARC64 && s_bits == MO_64) {
-        int reg64;
-
-        /* bne,pn %[xi]cc, label0 */
-        label_ptr[0] = (uint32_t *)s->code_ptr;
-        tcg_out_bpcc0(s, COND_NE, BPCC_PN
-                      | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
-        tcg_out_nop(s);
-
-        /* TLB Hit.  */
-        /* Load all 64-bits into an O/G register.  */
-        reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
-        tcg_out_ldst_rr(s, reg64, addrz, TCG_REG_O1, qemu_ld_opc[memop]);
+    /* The fast path is exactly one insn.  Thus we can perform the
+       entire TLB Hit in the (annulled) delay slot of the branch
+       over the TLB Miss case.  */
 
-        /* Move the two 32-bit pieces into the destination registers.  */
-        tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
-        if (reg64 != datalo) {
-            tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
-        }
-
-        /* b,a,pt label1 */
-        label_ptr[1] = (uint32_t *)s->code_ptr;
-        tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
-    } else {
-        /* The fast path is exactly one insn.  Thus we can perform the
-           entire TLB Hit in the (annulled) delay slot of the branch
-           over the TLB Miss case.  */
-
-        /* beq,a,pt %[xi]cc, label0 */
-        label_ptr[0] = NULL;
-        label_ptr[1] = (uint32_t *)s->code_ptr;
-        tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
-                      | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
-        /* delay slot */
-        tcg_out_ldst_rr(s, datalo, addrz, TCG_REG_O1, qemu_ld_opc[memop]);
-    }
+    /* beq,a,pt %[xi]cc, label0 */
+    label_ptr = (uint32_t *)s->code_ptr;
+    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
+    /* delay slot */
+    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_ld_opc[memop]);
 
     /* TLB Miss.  */
 
-    if (label_ptr[0]) {
-        *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
-                                    (unsigned long)label_ptr[0]);
-    }
-
     param = TCG_REG_O1;
-    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-        tcg_out_mov(s, TCG_TYPE_REG, param++, addrhi);
+    if (!SPARC64 && TARGET_LONG_BITS == 64) {
+        /* Skip the high-part; we'll perform the extract in the trampoline.  */
+        param++;
     }
-    tcg_out_mov(s, TCG_TYPE_REG, param++, addrlo);
+    tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
 
     /* We use the helpers to extend SB and SW data, leaving the case
        of SL needing explicit extending below.  */
@@ -1127,81 +1040,54 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
     /* delay slot */
     tcg_out_movi(s, TCG_TYPE_I32, param, memi);
 
-    switch (memop & ~MO_BSWAP) {
-    case MO_SL:
-        tcg_out_arithi(s, datalo, TCG_REG_O0, 0, SHIFT_SRA);
-        break;
-    case MO_Q:
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_mov(s, TCG_TYPE_REG, datahi, TCG_REG_O0);
-            tcg_out_mov(s, TCG_TYPE_REG, datalo, TCG_REG_O1);
-            break;
+    /* Recall that all of the helpers return 64-bit results.
+       Which complicates things for sparcv8plus.  */
+    if (SPARC64) {
+        /* We let the helper sign-extend SB and SW, but leave SL for here.  */
+        if (is_64 && (memop & ~MO_BSWAP) == MO_SL) {
+            tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
+        } else {
+            tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
+        }
+    } else {
+        if (s_bits == MO_64) {
+            tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
+            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
+            tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
+        } else if (is_64) {
+            /* Re-extend from 32-bit rather than reassembling when we
+               know the high register must be an extension.  */
+            tcg_out_arithi(s, data, TCG_REG_O1, 0,
+                           memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL);
+        } else {
+            tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1);
         }
-        /* FALLTHRU */
-    default:
-        /* mov */
-        tcg_out_mov(s, TCG_TYPE_REG, datalo, TCG_REG_O0);
-        break;
     }
 
-    *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr -
-                                (unsigned long)label_ptr[1]);
+    *label_ptr |= INSN_OFF19((uintptr_t)s->code_ptr - (uintptr_t)label_ptr);
 #else
     if (SPARC64 && TARGET_LONG_BITS == 32) {
-        tcg_out_arithi(s, TCG_REG_T1, addrlo, 0, SHIFT_SRL);
-        addrlo = TCG_REG_T1;
-    }
-    if (!SPARC64 && s_bits == MO_64) {
-        int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
-
-        tcg_out_ldst_rr(s, reg64, addrlo,
-                        (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
-                        qemu_ld_opc[memop]);
-
-        tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
-        if (reg64 != datalo) {
-            tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
-        }
-    } else {
-        tcg_out_ldst_rr(s, datalo, addrlo,
-                        (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
-                        qemu_ld_opc[memop]);
+        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
+        addr = TCG_REG_T1;
     }
+    tcg_out_ldst_rr(s, data, addr,
+                    (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+                    qemu_ld_opc[memop]);
 #endif /* CONFIG_SOFTMMU */
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
+static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
+                            TCGMemOp memop, int memi)
 {
-    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
-    TCGMemOp memop, s_bits;
-#if defined(CONFIG_SOFTMMU)
-    TCGReg addrz, datafull, param;
+#ifdef CONFIG_SOFTMMU
+    TCGMemOp s_bits = memop & MO_SIZE;
+    TCGReg addrz, param;
     uintptr_t func;
-    int memi;
     uint32_t *label_ptr;
-#endif
-
-    datalo = *args++;
-    datahi = (!SPARC64 && is64 ? *args++ : 0);
-    addrlo = *args++;
-    addrhi = (!SPARC64 && TARGET_LONG_BITS == 64 ? *args++ : 0);
-    memop = *args++;
-    s_bits = memop & MO_SIZE;
 
-#if defined(CONFIG_SOFTMMU)
-    memi = *args++;
-    addrz = tcg_out_tlb_load(s, addrlo, addrhi, memi, s_bits,
+    addrz = tcg_out_tlb_load(s, addr, memi, s_bits,
                              offsetof(CPUTLBEntry, addr_write));
 
-    datafull = datalo;
-    if (!SPARC64 && s_bits == MO_64) {
-        /* Reconstruct the full 64-bit value.  */
-        tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
-        tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
-        tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
-        datafull = TCG_REG_O2;
-    }
-
     /* The fast path is exactly one insn.  Thus we can perform the entire
        TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
     /* beq,a,pt %[xi]cc, label0 */
@@ -1209,19 +1095,21 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
     tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
                   | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
     /* delay slot */
-    tcg_out_ldst_rr(s, datafull, addrz, TCG_REG_O1, qemu_st_opc[memop]);
+    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_st_opc[memop]);
 
     /* TLB Miss.  */
 
     param = TCG_REG_O1;
-    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-        tcg_out_mov(s, TCG_TYPE_REG, param++, addrhi);
+    if (!SPARC64 && TARGET_LONG_BITS == 64) {
+        /* Skip the high-part; we'll perform the extract in the trampoline.  */
+        param++;
     }
-    tcg_out_mov(s, TCG_TYPE_REG, param++, addrlo);
-    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
-        tcg_out_mov(s, TCG_TYPE_REG, param++, datahi);
+    tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
+    if (!SPARC64 && s_bits == MO_64) {
+        /* Skip the high-part; we'll perform the extract in the trampoline.  */
+        param++;
     }
-    tcg_out_mov(s, TCG_TYPE_REG, param++, datalo);
+    tcg_out_mov(s, TCG_TYPE_REG, param++, data);
 
     func = qemu_st_trampoline[memop];
     assert(func != 0);
@@ -1229,20 +1117,13 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
     /* delay slot */
     tcg_out_movi(s, TCG_TYPE_REG, param, memi);
 
-    *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr -
-                             (unsigned long)label_ptr);
+    *label_ptr |= INSN_OFF19((uintptr_t)s->code_ptr - (uintptr_t)label_ptr);
 #else
     if (SPARC64 && TARGET_LONG_BITS == 32) {
-        tcg_out_arithi(s, TCG_REG_T1, addrlo, 0, SHIFT_SRL);
-        addrlo = TCG_REG_T1;
-    }
-    if (!SPARC64 && s_bits == MO_64) {
-        tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
-        tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
-        tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
-        datalo = TCG_REG_O2;
+        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
+        addr = TCG_REG_T1;
     }
-    tcg_out_ldst_rr(s, datalo, addrlo,
+    tcg_out_ldst_rr(s, data, addr,
                     (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
                     qemu_st_opc[memop]);
 #endif /* CONFIG_SOFTMMU */
@@ -1292,14 +1173,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc, const TCGArg *args,
         tcg_out_movi(s, TCG_TYPE_I32, args[0], (uint32_t)args[1]);
         break;
 
-#if SPARC64
 #define OP_32_64(x)                             \
         glue(glue(case INDEX_op_, x), _i32):    \
         glue(glue(case INDEX_op_, x), _i64)
-#else
-#define OP_32_64(x)                             \
-        glue(glue(case INDEX_op_, x), _i32)
-#endif
+
     OP_32_64(ld8u):
         tcg_out_ldst(s, args[0], args[1], args[2], LDUB);
         break;
@@ -1313,9 +1190,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc, const TCGArg *args,
         tcg_out_ldst(s, args[0], args[1], args[2], LDSH);
         break;
     case INDEX_op_ld_i32:
-#if SPARC64
     case INDEX_op_ld32u_i64:
-#endif
         tcg_out_ldst(s, args[0], args[1], args[2], LDUW);
         break;
     OP_32_64(st8):
@@ -1325,9 +1200,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc, const TCGArg *args,
         tcg_out_ldst(s, args[0], args[1], args[2], STH);
         break;
     case INDEX_op_st_i32:
-#if SPARC64
     case INDEX_op_st32_i64:
-#endif
         tcg_out_ldst(s, args[0], args[1], args[2], STW);
         break;
     OP_32_64(add):
@@ -1394,19 +1267,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc, const TCGArg *args,
                             args[2], const_args[2], args[3], const_args[3]);
         break;
 
-#if !SPARC64
-    case INDEX_op_brcond2_i32:
-        tcg_out_brcond2_i32(s, args[4], args[0], args[1],
-                            args[2], const_args[2],
-                            args[3], const_args[3], args[5]);
-        break;
-    case INDEX_op_setcond2_i32:
-        tcg_out_setcond2_i32(s, args[5], args[0], args[1], args[2],
-                             args[3], const_args[3],
-                             args[4], const_args[4]);
-        break;
-#endif
-
     case INDEX_op_add2_i32:
         tcg_out_addsub2(s, args[0], args[1], args[2], args[3],
                         args[4], const_args[4], args[5], const_args[5],
@@ -1424,19 +1284,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc, const TCGArg *args,
         break;
 
     case INDEX_op_qemu_ld_i32:
-        tcg_out_qemu_ld(s, args, 0);
+        tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], false);
         break;
     case INDEX_op_qemu_ld_i64:
-        tcg_out_qemu_ld(s, args, 1);
+        tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], true);
         break;
     case INDEX_op_qemu_st_i32:
-        tcg_out_qemu_st(s, args, 0);
+        tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]);
         break;
     case INDEX_op_qemu_st_i64:
-        tcg_out_qemu_st(s, args, 1);
+        tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]);
         break;
 
-#if SPARC64
     case INDEX_op_movi_i64:
         tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
         break;
@@ -1496,7 +1355,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc, const TCGArg *args,
         tcg_out_movcond_i64(s, args[5], args[0], args[1],
                             args[2], const_args[2], args[3], const_args[3]);
         break;
-#endif
+
     gen_arith:
         tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c);
         break;
@@ -1550,73 +1409,54 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } },
     { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } },
 
-#if !SPARC64
-    { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } },
-    { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } },
-#endif
-
     { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
     { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
     { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } },
 
-#if SPARC64
-    { INDEX_op_mov_i64, { "r", "r" } },
-    { INDEX_op_movi_i64, { "r" } },
-    { INDEX_op_ld8u_i64, { "r", "r" } },
-    { INDEX_op_ld8s_i64, { "r", "r" } },
-    { INDEX_op_ld16u_i64, { "r", "r" } },
-    { INDEX_op_ld16s_i64, { "r", "r" } },
-    { INDEX_op_ld32u_i64, { "r", "r" } },
-    { INDEX_op_ld32s_i64, { "r", "r" } },
-    { INDEX_op_ld_i64, { "r", "r" } },
-    { INDEX_op_st8_i64, { "rZ", "r" } },
-    { INDEX_op_st16_i64, { "rZ", "r" } },
-    { INDEX_op_st32_i64, { "rZ", "r" } },
-    { INDEX_op_st_i64, { "rZ", "r" } },
-
-    { INDEX_op_add_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_mul_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_div_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_divu_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_sub_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_and_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_andc_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_or_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_orc_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_xor_i64, { "r", "rZ", "rJ" } },
-
-    { INDEX_op_shl_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_shr_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_sar_i64, { "r", "rZ", "rJ" } },
-
-    { INDEX_op_neg_i64, { "r", "rJ" } },
-    { INDEX_op_not_i64, { "r", "rJ" } },
-
-    { INDEX_op_ext32s_i64, { "r", "r" } },
-    { INDEX_op_ext32u_i64, { "r", "r" } },
-    { INDEX_op_trunc_i32,  { "r", "r" } },
-
-    { INDEX_op_brcond_i64, { "rZ", "rJ" } },
-    { INDEX_op_setcond_i64, { "r", "rZ", "rJ" } },
-    { INDEX_op_movcond_i64, { "r", "rZ", "rJ", "rI", "0" } },
-#endif
-
-#if SPARC64
-    { INDEX_op_qemu_ld_i32, { "r", "L" } },
-    { INDEX_op_qemu_ld_i64, { "r", "L" } },
-    { INDEX_op_qemu_st_i32, { "L", "L" } },
-    { INDEX_op_qemu_st_i64, { "L", "L" } },
-#elif TARGET_LONG_BITS == 32
-    { INDEX_op_qemu_ld_i32, { "r", "L" } },
-    { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
-    { INDEX_op_qemu_st_i32, { "L", "L" } },
-    { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
-#else
-    { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } },
-    { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
-    { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
-#endif
+    { INDEX_op_mov_i64, { "R", "R" } },
+    { INDEX_op_movi_i64, { "R" } },
+    { INDEX_op_ld8u_i64, { "R", "r" } },
+    { INDEX_op_ld8s_i64, { "R", "r" } },
+    { INDEX_op_ld16u_i64, { "R", "r" } },
+    { INDEX_op_ld16s_i64, { "R", "r" } },
+    { INDEX_op_ld32u_i64, { "R", "r" } },
+    { INDEX_op_ld32s_i64, { "R", "r" } },
+    { INDEX_op_ld_i64, { "R", "r" } },
+    { INDEX_op_st8_i64, { "RZ", "r" } },
+    { INDEX_op_st16_i64, { "RZ", "r" } },
+    { INDEX_op_st32_i64, { "RZ", "r" } },
+    { INDEX_op_st_i64, { "RZ", "r" } },
+
+    { INDEX_op_add_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_mul_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_div_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_divu_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_sub_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_and_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_andc_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_or_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_orc_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_xor_i64, { "R", "RZ", "RJ" } },
+
+    { INDEX_op_shl_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_shr_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_sar_i64, { "R", "RZ", "RJ" } },
+
+    { INDEX_op_neg_i64, { "R", "RJ" } },
+    { INDEX_op_not_i64, { "R", "RJ" } },
+
+    { INDEX_op_ext32s_i64, { "R", "r" } },
+    { INDEX_op_ext32u_i64, { "R", "r" } },
+    { INDEX_op_trunc_i32,  { "r", "R" } },
+
+    { INDEX_op_brcond_i64, { "RZ", "RJ" } },
+    { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } },
+    { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } },
+
+    { INDEX_op_qemu_ld_i32, { "r", "A" } },
+    { INDEX_op_qemu_ld_i64, { "R", "A" } },
+    { INDEX_op_qemu_st_i32, { "s", "A" } },
+    { INDEX_op_qemu_st_i64, { "S", "A" } },
 
     { -1 },
 };
@@ -1624,9 +1464,8 @@ static const TCGTargetOpDef sparc_op_defs[] = {
 static void tcg_target_init(TCGContext *s)
 {
     tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
-#if SPARC64
-    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
-#endif
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64);
+
     tcg_regset_set32(tcg_target_call_clobber_regs, 0,
                      (1 << TCG_REG_G1) |
                      (1 << TCG_REG_G2) |
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 7b8b33e..5442d45 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -24,13 +24,7 @@
 #ifndef TCG_TARGET_SPARC 
 #define TCG_TARGET_SPARC 1
 
-#if UINTPTR_MAX == UINT32_MAX
-# define TCG_TARGET_REG_BITS 32
-#elif UINTPTR_MAX == UINT64_MAX
-# define TCG_TARGET_REG_BITS 64
-#else
-# error Unknown pointer size for tcg target
-#endif
+#define TCG_TARGET_REG_BITS 64
 
 #define TCG_TARGET_WORDS_BIGENDIAN
 
@@ -78,7 +72,7 @@ typedef enum {
 /* used for function call generation */
 #define TCG_REG_CALL_STACK TCG_REG_O6
 
-#if TCG_TARGET_REG_BITS == 64
+#ifdef __arch64__
 #define TCG_TARGET_STACK_BIAS           2047
 #define TCG_TARGET_STACK_ALIGN          16
 #define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6*8 + TCG_TARGET_STACK_BIAS)
@@ -88,7 +82,7 @@ typedef enum {
 #define TCG_TARGET_CALL_STACK_OFFSET    (64 + 4 + 6*4)
 #endif
 
-#if TCG_TARGET_REG_BITS == 64
+#ifdef __arch64__
 #define TCG_TARGET_EXTEND_ARGS 1
 #endif
 
@@ -118,7 +112,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
 
-#if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_trunc_i32        1
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rem_i64          0
@@ -147,7 +140,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muls2_i64        0
 #define TCG_TARGET_HAS_muluh_i64        0
 #define TCG_TARGET_HAS_mulsh_i64        0
-#endif
 
 #define TCG_TARGET_HAS_new_ldst         1
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index f1e0763..eb4afe4 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -664,7 +664,28 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned 
int flags,
     int nb_rets;
     TCGArg *nparam;
 
-#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+#if defined(__sparc__) && !defined(__arch64__)
+    /* We have 64-bit values in one register, but need to pass as two
+       separate parameters.  Split them.  */
+    int orig_sizemask = sizemask;
+    int orig_nargs = nargs;
+    if (sizemask != 0) {
+        TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2);
+        for (i = real_args = 0; i < nargs; ++i) {
+            int is_64bit = sizemask & (1 << (i+1)*2);
+            if (is_64bit) {
+                TCGv_i32 h = tcg_temp_new_i32();
+                TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
+                tcg_gen_trunc_rshift_i64_i32(h, orig, 32);
+                split_args[real_args++] = GET_TCGV_I32(h);
+            }
+            split_args[real_args++] = args[i];
+        }
+        nargs = real_args;
+        args = split_args;
+        sizemask = 0;
+    }
+#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
     for (i = 0; i < nargs; ++i) {
         int is_64bit = sizemask & (1 << (i+1)*2);
         int is_signed = sizemask & (2 << (i+1)*2);
@@ -749,7 +770,16 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned 
int flags,
     /* total parameters, needed to go backward in the instruction stream */
     *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
 
-#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+#if defined(__sparc__) && !defined(__arch64__)
+    for (i = real_args = 0; i < orig_nargs; ++i) {
+        int is_64bit = orig_sizemask & (1 << (i+1)*2);
+        if (is_64bit) {
+            TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]);
+            tcg_temp_free_i32(h);
+        }
+        real_args++;
+    }
+#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
     for (i = 0; i < nargs; ++i) {
         int is_64bit = sizemask & (1 << (i+1)*2);
         if (!is_64bit) {
@@ -2411,6 +2441,17 @@ static int tcg_reg_alloc_call(TCGContext *s, const 
TCGOpDef *def,
         ts = &s->temps[arg];
         reg = tcg_target_call_oarg_regs[i];
         assert(s->reg_to_temp[reg] == -1);
+
+#if defined(__sparc__) && !defined(__arch64__)
+        /* Reconstruct %o0,%o1 32-bit pair into a single 64-bit value. */
+        if (ts->type == TCG_TYPE_I64) {
+            assert(i == 0);
+            tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
+            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
+            tcg_out_arith(s, TCG_REG_O0, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
+        }
+#endif
+
         if (ts->fixed_reg) {
             if (ts->reg != reg) {
                 tcg_out_mov(s, ts->type, ts->reg, reg);
-- 
1.8.5.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]