[Qemu-devel] [PATCH 3/3] tcg-alpha: New TCG target.

qemu-devel
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 3/3] tcg-alpha: New TCG target.

From:	Richard Henderson
Subject:	[Qemu-devel] [PATCH 3/3] tcg-alpha: New TCG target.
Date:	Sat, 29 Sep 2012 11:10:39 -0700
This began with a patch from Dong Weiyu <address@hidden>,
and was modified to fix problems and to adapt to changes in TCG.

Signed-off-by: Richard Henderson <address@hidden>
---
 configure              |   17 +-
 exec-all.h             |    7 +-
 qemu-common.h          |    4 +-
 tcg/alpha/tcg-target.c | 1860 ++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/alpha/tcg-target.h |  142 ++++
 5 files changed, 2018 insertions(+), 12 deletions(-)
 create mode 100644 tcg/alpha/tcg-target.c
 create mode 100644 tcg/alpha/tcg-target.h

diff --git a/configure b/configure
index 8f99b7b..85e5efa 100755
--- a/configure
+++ b/configure
@@ -352,6 +352,8 @@ elif check_define __arm__ ; then
   cpu="arm"
 elif check_define __hppa__ ; then
   cpu="hppa"
+elif check_define __alpha__ ; then
+  cpu="alpha"
 else
   cpu=`uname -m`
 fi
@@ -381,6 +383,9 @@ case "$cpu" in
   sparc|sun4[cdmuv])
     cpu="sparc"
   ;;
+  alpha*)
+    cpu="alpha"
+  ;;
   *)
     # This will result in either an error or falling back to TCI later
     ARCH=unknown
@@ -895,6 +900,11 @@ case "$cpu" in
            cc_i386='$(CC) -m32'
            host_guest_base="yes"
            ;;
+    alpha)
+           QEMU_CFLAGS="-msmall-data $QEMU_CFLAGS"
+           LDFLAGS="-Wl,--warn-multiple-gp $LDFLAGS"
+           host_guest_base="yes"
+           ;;
     arm*)
            host_guest_base="yes"
            ;;
@@ -4048,13 +4058,6 @@ if test "$tcg_interpreter" = "yes" ; then
   echo "CONFIG_TCI_DIS=y"  >> $libdis_config_mak
 fi
 
-case "$ARCH" in
-alpha)
-  # Ensure there's only a single GP
-  cflags="-msmall-data $cflags"
-;;
-esac
-
 if test "$target_softmmu" = "yes" ; then
   case "$TARGET_BASE_ARCH" in
   arm)
diff --git a/exec-all.h b/exec-all.h
index 6516da0..4e2f2e8 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -132,9 +132,8 @@ static inline void tlb_flush(CPUArchState *env, int 
flush_global)
 #define CODE_GEN_AVG_BLOCK_SIZE 64
 #endif
 
-#if defined(__arm__) || defined(_ARCH_PPC) \
-    || defined(__x86_64__) || defined(__i386__) \
-    || defined(__sparc__) \
+#if defined(__alpha__) || defined(__arm__) || defined(_ARCH_PPC) \
+    || defined(__x86_64__) || defined(__i386__) || defined(__sparc__) \
     || defined(CONFIG_TCG_INTERPRETER)
 #define USE_DIRECT_JUMP
 #endif
@@ -245,7 +244,7 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr, 
uintptr_t addr)
     __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg));
 #endif
 }
-#elif defined(__sparc__)
+#elif defined(__alpha__) || defined(__sparc__)
 void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr);
 #else
 #error tb_set_jmp_target1 is missing
diff --git a/qemu-common.h b/qemu-common.h
index 15d9e4e..b46a9b0 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -6,7 +6,9 @@
 #include "compiler.h"
 #include "config-host.h"
 
-#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || 
defined(__hppa__) || defined(__ia64__)
+#if defined(__alpha__) || defined(__arm__) \
+    || defined(__sparc__) || defined(__mips__) \
+    || defined(__hppa__) || defined(__ia64__)
 #define WORDS_ALIGNED
 #endif
 
diff --git a/tcg/alpha/tcg-target.c b/tcg/alpha/tcg-target.c
new file mode 100644
index 0000000..3a9a354
--- /dev/null
+++ b/tcg/alpha/tcg-target.c
@@ -0,0 +1,1860 @@
+/*
+ * Tiny Code Generator for QEMU on ALPHA platform.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+    [TCG_REG_V0] = "v0",
+    [TCG_REG_T0] = "t0",
+    [TCG_REG_T1] = "t1",
+    [TCG_REG_T2] = "t2",
+    [TCG_REG_T3] = "t3",
+    [TCG_REG_T4] = "t4",
+    [TCG_REG_T5] = "t5",
+    [TCG_REG_T6] = "t6",
+    [TCG_REG_T7] = "t7",
+    [TCG_REG_T8] = "t8",
+    [TCG_REG_T9] = "t9",
+    [TCG_REG_T10] = "t10",
+    [TCG_REG_T11] = "t11",
+    [TCG_REG_S0] = "s0",
+    [TCG_REG_S1] = "s1",
+    [TCG_REG_S2] = "s2",
+    [TCG_REG_S3] = "s3",
+    [TCG_REG_S4] = "s4",
+    [TCG_REG_S5] = "s5",
+    [TCG_REG_S6] = "s6",
+    [TCG_REG_A0] = "a0",
+    [TCG_REG_A1] = "a1",
+    [TCG_REG_A2] = "a2",
+    [TCG_REG_A3] = "a3",
+    [TCG_REG_A4] = "a4",
+    [TCG_REG_A5] = "a5",
+    [TCG_REG_RA] = "ra",
+    [TCG_REG_PV] = "pv",
+    [TCG_REG_AT] = "at",
+    [TCG_REG_GP] = "gp",
+    [TCG_REG_SP] = "sp",
+    [TCG_REG_ZERO] = "zero",
+};
+#endif
+
+/*
+ * $29 is the global pointer,
+ * $30 is the stack pointer,
+ * $31 is the zero register,
+ */
+static const int tcg_target_reg_alloc_order[] = {
+    /* Call-saved registers.  */
+    TCG_REG_S0,
+    TCG_REG_S1,
+    TCG_REG_S2,
+    TCG_REG_S3,
+    TCG_REG_S4,
+    TCG_REG_S5,
+    TCG_REG_S6,
+    /* Call-clobbered temporaries.  */
+    TCG_REG_T0,
+    TCG_REG_T1,
+    TCG_REG_T2,
+    TCG_REG_T3,
+    TCG_REG_T4,
+    TCG_REG_T5,
+    TCG_REG_T6,
+    TCG_REG_T7,
+    TCG_REG_T8,
+    TCG_REG_T9,
+    TCG_REG_T10,
+    TCG_REG_T11,
+    TCG_REG_RA,
+    TCG_REG_PV,
+    TCG_REG_AT,
+    /* Call-clobbered argument and return registers.  */
+    TCG_REG_V0,
+    TCG_REG_A0,
+    TCG_REG_A1,
+    TCG_REG_A2,
+    TCG_REG_A3,
+    TCG_REG_A4,
+    TCG_REG_A5,
+};
+
+/*
+ * According to alpha calling convention, these 6 registers are used for
+ * function parameter passing. if function has more than 6 parameters,
+ * remaining arguments are stored on the stack.
+ */
+static const int tcg_target_call_iarg_regs[6] = {
+    TCG_REG_A0,
+    TCG_REG_A1,
+    TCG_REG_A2,
+    TCG_REG_A3,
+    TCG_REG_A4,
+    TCG_REG_A5,
+};
+
+/*
+ * According to alpha calling convention, $0 is used for returning function
+ * result.
+ */
+static const int tcg_target_call_oarg_regs[1] = {
+    TCG_REG_V0
+};
+
+/*
+ * Temporary registers used within this translator.  Note that T9 is
+ * selected because it is the division return address register.
+ */
+#define TMP_REG1 TCG_REG_AT
+#define TMP_REG2 TCG_REG_T9
+
+/*
+ * Save the address of TB's epilogue.
+ */
+#define TB_RET_OFS \
+    (TCG_STATIC_CALL_ARGS_SIZE + CPU_TEMP_BUF_NLONGS * sizeof(long))
+
+/*
+ * If the guest base gets placed in high memory, it's more efficient
+ * to use a register to hold the address.
+ */
+#ifndef CONFIG_USE_GUEST_BASE
+#define GUEST_BASE 0
+#endif
+#define USE_GUEST_BASE_REG (GUEST_BASE > 0x7fff0000)
+#define TCG_GUEST_BASE_REG TCG_REG_S5
+
+/*
+ * Constant constraint mask values.
+ */
+#define TCG_CT_CONST_U8     0x100
+#define TCG_CT_CONST_ZERO   0x200
+#define TCG_CT_CONST_ANDI   0x400
+#define TCG_CT_CONST_PN255  0x800
+
+#define INSN_OP(x)     (((x) & 0x3f) << 26)
+#define INSN_FUNC1(x)  (((x) & 0x3) << 14)
+#define INSN_FUNC2(x)  (((x) & 0x7f) << 5)
+#define INSN_RA(x)     (TCG_TO_HW_REGNO(x) << 21)
+#define INSN_RB(x)     (TCG_TO_HW_REGNO(x) << 16)
+#define INSN_RC(x)     (TCG_TO_HW_REGNO(x))
+#define INSN_LIT(x)    (((x) & 0xff) << 13)
+#define INSN_DISP16(x) ((x) & 0xffff)
+#define INSN_DISP21(x) ((x) & 0x1fffff)
+#define INSN_RSVED(x)  ((x) & 0x3fff)
+
+typedef enum AlphaOpcode {
+    INSN_ADDL       = INSN_OP(0x10) | INSN_FUNC2(0x00),
+    INSN_ADDQ       = INSN_OP(0x10) | INSN_FUNC2(0x20),
+    INSN_AND        = INSN_OP(0x11) | INSN_FUNC2(0x00),
+    INSN_BEQ        = INSN_OP(0x39),
+    INSN_BGE        = INSN_OP(0x3e),
+    INSN_BGT        = INSN_OP(0x3f),
+    INSN_BIC        = INSN_OP(0x11) | INSN_FUNC2(0x08),
+    INSN_BIS        = INSN_OP(0x11) | INSN_FUNC2(0x20),
+    INSN_BLE        = INSN_OP(0x3b),
+    INSN_BLT        = INSN_OP(0x3a),
+    INSN_BNE        = INSN_OP(0x3d),
+    INSN_BR         = INSN_OP(0x30),
+    INSN_BSR        = INSN_OP(0x34),
+    INSN_CMOVEQ     = INSN_OP(0x11) | INSN_FUNC2(0x24),
+    INSN_CMOVGE     = INSN_OP(0x11) | INSN_FUNC2(0x46),
+    INSN_CMOVGT     = INSN_OP(0x11) | INSN_FUNC2(0x66),
+    INSN_CMOVLE     = INSN_OP(0x11) | INSN_FUNC2(0x64),
+    INSN_CMOVLT     = INSN_OP(0x11) | INSN_FUNC2(0x44),
+    INSN_CMOVNE     = INSN_OP(0x11) | INSN_FUNC2(0x26),
+    INSN_CMPEQ      = INSN_OP(0x10) | INSN_FUNC2(0x2d),
+    INSN_CMPLE      = INSN_OP(0x10) | INSN_FUNC2(0x6d),
+    INSN_CMPLT      = INSN_OP(0x10) | INSN_FUNC2(0x4d),
+    INSN_CMPULE     = INSN_OP(0x10) | INSN_FUNC2(0x3d),
+    INSN_CMPULT     = INSN_OP(0x10) | INSN_FUNC2(0x1d),
+    INSN_EQV        = INSN_OP(0x11) | INSN_FUNC2(0x48),
+    INSN_EXTBL      = INSN_OP(0x12) | INSN_FUNC2(0x06),
+    INSN_EXTWH      = INSN_OP(0x12) | INSN_FUNC2(0x5a),
+    INSN_EXTWL      = INSN_OP(0x12) | INSN_FUNC2(0x16),
+    INSN_INSBL      = INSN_OP(0x12) | INSN_FUNC2(0x0b),
+    INSN_INSLH      = INSN_OP(0x12) | INSN_FUNC2(0x67),
+    INSN_INSLL      = INSN_OP(0x12) | INSN_FUNC2(0x2b),
+    INSN_INSWL      = INSN_OP(0x12) | INSN_FUNC2(0x1b),
+    INSN_JMP        = INSN_OP(0x1a) | INSN_FUNC1(0),
+    INSN_JSR        = INSN_OP(0x1a) | INSN_FUNC1(1),
+    INSN_LDA        = INSN_OP(0x08),
+    INSN_LDAH       = INSN_OP(0x09),
+    INSN_LDBU       = INSN_OP(0x0a),
+    INSN_LDL        = INSN_OP(0x28),
+    INSN_LDQ        = INSN_OP(0x29),
+    INSN_LDWU       = INSN_OP(0x0c),
+    INSN_MSKBL      = INSN_OP(0x12) | INSN_FUNC2(0x02),
+    INSN_MSKLL      = INSN_OP(0x12) | INSN_FUNC2(0x22),
+    INSN_MSKWL      = INSN_OP(0x12) | INSN_FUNC2(0x12),
+    INSN_MULL       = INSN_OP(0x13) | INSN_FUNC2(0x00),
+    INSN_MULQ       = INSN_OP(0x13) | INSN_FUNC2(0x20),
+    INSN_ORNOT      = INSN_OP(0x11) | INSN_FUNC2(0x28),
+    INSN_RET        = INSN_OP(0x1a) | INSN_FUNC1(2),
+    INSN_S4ADDL     = INSN_OP(0x10) | INSN_FUNC2(0x02),
+    INSN_S8ADDL     = INSN_OP(0x10) | INSN_FUNC2(0x12),
+    INSN_SEXTB      = INSN_OP(0x1c) | INSN_FUNC2(0x00),
+    INSN_SEXTW      = INSN_OP(0x1c) | INSN_FUNC2(0x01),
+    INSN_SLL        = INSN_OP(0x12) | INSN_FUNC2(0x39),
+    INSN_SRA        = INSN_OP(0x12) | INSN_FUNC2(0x3c),
+    INSN_SRL        = INSN_OP(0x12) | INSN_FUNC2(0x34),
+    INSN_STB        = INSN_OP(0x0e),
+    INSN_STL        = INSN_OP(0x2c),
+    INSN_STQ        = INSN_OP(0x2d),
+    INSN_STW        = INSN_OP(0x0d),
+    INSN_SUBL       = INSN_OP(0x10) | INSN_FUNC2(0x09),
+    INSN_SUBQ       = INSN_OP(0x10) | INSN_FUNC2(0x29),
+    INSN_XOR        = INSN_OP(0x11) | INSN_FUNC2(0x40),
+    INSN_ZAPNOT     = INSN_OP(0x12) | INSN_FUNC2(0x31),
+
+    INSN_BUGCHK     = INSN_OP(0x00) | INSN_DISP16(0x81),
+
+    INSN_NOP        = INSN_BIS
+                      | INSN_RA(TCG_REG_ZERO)
+                      | INSN_RB(TCG_REG_ZERO)
+                      | INSN_RC(TCG_REG_ZERO),
+} AlphaOpcode;
+
+/*
+ * Given a constraint, fill in the available register set or constant range.
+ */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+    const char *ct_str = *pct_str;
+
+    switch (ct_str[0]) {
+    case 'r':
+        /* Constaint 'r' means any register is okay.  */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);
+        break;
+
+    case 'a':
+        /* Constraint 'a' means $24, one of the division inputs.  */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_clear(ct->u.regs);
+        tcg_regset_set_reg(ct->u.regs, TCG_REG_T10);
+        break;
+
+    case 'b':
+        /* Constraint 'b' means $25, one of the division inputs.  */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_clear(ct->u.regs);
+        tcg_regset_set_reg(ct->u.regs, TCG_REG_T11);
+        break;
+
+    case 'c':
+        /* Constraint 'c' means $27, the call procedure vector,
+           as well as the division output.  */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_clear(ct->u.regs);
+        tcg_regset_set_reg(ct->u.regs, TCG_REG_PV);
+        break;
+
+    case 'L':
+        /* Constraint for qemu_ld/st.  The extra reserved registers are
+           used for passing the parameters to the helper function.  */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xffffffffu);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
+        break;
+
+    case 'I':
+        /* Constraint 'I' means an immediate 0 ... 255.  */
+        ct->ct |= TCG_CT_CONST_U8;
+        break;
+
+    case 'J':
+        /* Constraint 'J' means the immediate 0.  */
+        ct->ct |= TCG_CT_CONST_ZERO;
+        break;
+
+    case 'K':
+        /* Constraint 'K' means an immediate -255..255.  */
+        ct->ct |= TCG_CT_CONST_PN255;
+        break;
+
+    case 'M':
+        /* Constraint 'M' means constants used with AND/BIC/ZAPNOT.  */
+        ct->ct |= TCG_CT_CONST_ANDI;
+        break;
+
+    default:
+        return -1;
+    }
+
+    ct_str++;
+    *pct_str = ct_str;
+    return 0;
+}
+
+static int tcg_match_zapnot(tcg_target_long val)
+{
+    tcg_target_long mask0, maskff;
+
+    /* Since we know this is an alpha host, speed the check by using
+       cmpbge to compare 8 bytes at once, and incidentally also
+       produce the zapnot mask.  */
+    /* ??? This builtin was implemented sometime in 2002,
+       perhaps in the GCC 3.1 timeframe.  */
+    mask0 = __builtin_alpha_cmpbge(0, val);
+    maskff = __builtin_alpha_cmpbge(val, -1);
+
+    /* Here, mask0 contains the bytes that are 0, maskff contains
+       the bytes that are 0xff; that should cover the entire word.  */
+    if ((mask0 | maskff) == 0xff) {
+        return maskff;
+    }
+    return 0;
+}
+
+static int tcg_match_andi(tcg_target_long val)
+{
+    if (val == (val & 0xff)) {
+        return 1;  /* and */
+    } else if (~val == (~val & 0xff)) {
+        return 1;  /* bic */
+    } else {
+        return tcg_match_zapnot(val) != 0;
+    }
+}
+
+static inline int tcg_target_const_match(tcg_target_long val,
+                                         const TCGArgConstraint *arg_ct)
+{
+    int ct = arg_ct->ct;
+    if (ct & TCG_CT_CONST) {
+        return 1;
+    } else if (ct & TCG_CT_CONST_U8) {
+        return val == (uint8_t)val;
+    } else if (ct & TCG_CT_CONST_ZERO) {
+        return val == 0;
+    } else if (ct & TCG_CT_CONST_ANDI) {
+        return tcg_match_andi(val);
+    } else if (ct & TCG_CT_CONST_PN255) {
+        return val >= -255 && val <= 255;
+    } else {
+        return 0;
+    }
+}
+
+static inline void tcg_out_fmt_br(TCGContext *s, AlphaOpcode opc,
+                                  TCGReg ra, int disp)
+{
+    tcg_out32(s, opc | INSN_RA(ra) | INSN_DISP21(disp));
+}
+
+static inline void tcg_out_fmt_mem(TCGContext *s, AlphaOpcode opc,
+                                   TCGReg ra, TCGReg rb, int disp)
+{
+    assert(disp != (int16_t)disp);
+    tcg_out32(s, opc | INSN_RA(ra) | INSN_RB(rb) | INSN_DISP16(disp));
+}
+
+static inline void tcg_out_fmt_jmp(TCGContext *s, AlphaOpcode opc,
+                                   TCGReg ra, TCGReg rb, int rsved)
+{
+    tcg_out32(s, opc | INSN_RA(ra) | INSN_RB(rb) | INSN_RSVED(rsved));
+}
+
+static inline void tcg_out_fmt_opr(TCGContext *s, AlphaOpcode opc,
+                                   TCGReg ra, TCGReg rb, TCGReg rc)
+{
+    tcg_out32(s, opc | INSN_RA(ra) | INSN_RB(rb) | INSN_RC(rc));
+}
+
+static inline void tcg_out_fmt_opi(TCGContext *s, AlphaOpcode opc,
+                                   TCGReg ra, tcg_target_ulong lit, TCGReg rc)
+{
+    assert(lit <= 0xff);
+    tcg_out32(s, opc | INSN_RA(ra) | INSN_LIT(lit) | INSN_RC(rc) | (1<<12));
+}
+
+/*
+ * Move from one reg to another.  This is called from tcg.c.
+ */
+static inline void tcg_out_mov(TCGContext *s, TCGType type,
+                               TCGReg rc, TCGReg rb)
+{
+    if (type == TCG_TYPE_I32) {
+        /* Also used for 64->32 bit truncation, so don't elide copies.  */
+        tcg_out_fmt_opr(s, INSN_ADDL, TCG_REG_ZERO, rb, rc);
+    } else if (rb != rc) {
+        tcg_out_fmt_opr(s, INSN_BIS, TCG_REG_ZERO, rb, rc);
+    }
+}
+
+/*
+ * Helper function to emit a memory format operation with a displacement
+ * that may be larger than the 16 bits accepted by the real instruction.
+ */
+static void tcg_out_mem_long(TCGContext *s, AlphaOpcode opc, TCGReg ra,
+                             TCGReg rb, tcg_target_long orig)
+{
+    tcg_target_long l0, l1, extra = 0, val = orig;
+    TCGReg rs;
+
+    /* Pick a scratch register.  Use the output register, if possible.  */
+    switch (opc) {
+    default:
+        if (ra != rb) {
+            rs = ra;
+            break;
+        }
+        /* FALLTHRU */
+
+    case INSN_STB:
+    case INSN_STW:
+    case INSN_STL:
+    case INSN_STQ:
+        assert(ra != TMP_REG1);
+        rs = TMP_REG1;
+        break;
+    }
+
+    /* See if we can turn a large absolute address into an offset from $gp.
+       Note that we assert via -msmall-data and --warn-multiple-gp that
+       the $gp value is constant everywhere.  Which means that the translated
+       code shares the same value as we have loaded right now.  */
+    if (rb == TCG_REG_ZERO && orig != (int32_t)orig) {
+        register tcg_target_long gp __asm__("$29");
+        tcg_target_long gprel = orig - gp;
+
+        if (gprel == (int32_t)gprel) {
+            orig = val = gprel;
+            rb = TCG_REG_GP;
+        }
+    }
+
+    l0 = (int16_t)val;
+    val = (val - l0) >> 16;
+    l1 = (int16_t)val;
+
+    if (orig == (int32_t)orig) {
+        if (l1 < 0 && orig >= 0) {
+            extra = 0x4000;
+            l1 = (int16_t)(val - 0x4000);
+        }
+    } else {
+        tcg_target_long l2, l3;
+        int rh = TCG_REG_ZERO;
+
+        val = (val - l1) >> 16;
+        l2 = (int16_t)val;
+        val = (val - l2) >> 16;
+        l3 = (int16_t)val;
+
+        if (l3) {
+            tcg_out_fmt_mem(s, INSN_LDAH, rs, rh, l3);
+            rh = rs;
+        }
+        if (l2) {
+            tcg_out_fmt_mem(s, INSN_LDA, rs, rh, l2);
+            rh = rs;
+        }
+        tcg_out_fmt_opi(s, INSN_SLL, rh, 32, rs);
+
+        if (rb != TCG_REG_ZERO) {
+            tcg_out_fmt_opr(s, INSN_ADDQ, rs, rb, rs);
+        }
+        rb = rs;
+    }
+
+    if (l1) {
+        tcg_out_fmt_mem(s, INSN_LDAH, rs, rb, l1);
+        rb = rs;
+    }
+    if (extra) {
+        tcg_out_fmt_mem(s, INSN_LDAH, rs, rb, extra);
+        rb = rs;
+    }
+
+    if (opc != INSN_LDA || rb != ra || l0 != 0) {
+        tcg_out_fmt_mem(s, opc, ra, rb, l0);
+    }
+}
+
+static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ra,
+                                tcg_target_long val)
+{
+    if (type == TCG_TYPE_I32) {
+        val = (int32_t)val;
+    }
+    tcg_out_mem_long(s, INSN_LDA, ra, TCG_REG_ZERO, val);
+}
+
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ra,
+                              TCGReg rb, tcg_target_long disp)
+{
+    tcg_out_mem_long(s, type == TCG_TYPE_I32 ? INSN_LDL : INSN_LDQ,
+                     ra, rb, disp);
+}
+
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg ra,
+                              TCGReg rb, tcg_target_long disp)
+{
+    tcg_out_mem_long(s, type == TCG_TYPE_I32 ? INSN_STL : INSN_STQ,
+                     ra, rb, disp);
+}
+
+static void tgen_andi(TCGContext *s, TCGReg ra, tcg_target_long val, TCGReg rc)
+{
+    if (val == (val & 0xff)) {
+        tcg_out_fmt_opi(s, INSN_AND, ra, val, rc);
+    } else if (~val == (~val & 0xff)) {
+        tcg_out_fmt_opi(s, INSN_BIC, ra, ~val, rc);
+    } else {
+        int mask = tcg_match_zapnot(val);
+        assert(mask != 0);
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, ra, mask, rc);
+    }
+}
+
+static inline void tgen_ext8u(TCGContext *s, TCGReg ra, TCGReg rc)
+{
+    tcg_out_fmt_opi(s, INSN_AND, ra, 0xff, rc);
+}
+
+static inline void tgen_ext8s(TCGContext *s, TCGReg ra, TCGReg rc)
+{
+    tcg_out_fmt_opr(s, INSN_SEXTB, TCG_REG_ZERO, ra, rc);
+}
+
+static inline void tgen_ext16u(TCGContext *s, TCGReg ra, TCGReg rc)
+{
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, ra, 0x03, rc);
+}
+
+static inline void tgen_ext16s(TCGContext *s, TCGReg ra, TCGReg rc)
+{
+    tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_ZERO, ra, rc);
+}
+
+static inline void tgen_ext32u(TCGContext *s, TCGReg ra, TCGReg rc)
+{
+    tcg_out_fmt_opi(s, INSN_ZAPNOT, ra, 0x0f, rc);
+}
+
+static inline void tgen_ext32s(TCGContext *s, TCGReg ra, TCGReg rc)
+{
+    tcg_out_fmt_opr(s, INSN_ADDL, TCG_REG_ZERO, ra, rc);
+}
+
+static void tgen_extend(TCGContext *s, int sizeop, TCGReg ra, TCGReg rc)
+{
+    switch (sizeop) {
+    case 0:
+        tgen_ext8u(s, ra, rc);
+        break;
+    case 0 | 4:
+        tgen_ext8s(s, ra, rc);
+        break;
+    case 1:
+        tgen_ext16u(s, ra, rc);
+        break;
+    case 1 | 4:
+        tgen_ext16s(s, ra, rc);
+        break;
+    case 2:
+        tgen_ext32u(s, ra, rc);
+        break;
+    case 2 | 4:
+        tgen_ext32s(s, ra, rc);
+        break;
+    case 3:
+        tcg_out_mov(s, TCG_TYPE_I64, ra, rc);
+        break;
+    default:
+        tcg_abort();
+    }
+}
+
+static void tgen_bswap(TCGContext *s, int sizeop, TCGReg ra, TCGReg rc)
+{
+    const TCGReg t0 = TMP_REG1, t1 = TMP_REG2;
+
+    switch (sizeop) {
+    case 1:     /* 16-bit swap, unsigned result */
+    case 1 | 4: /* 16-bit swap, signed result */
+        /* input value =                                   xxxx xxAB */
+        tcg_out_fmt_opi(s, INSN_EXTWH, ra, 7, t0);      /* .... ..B. */
+        tcg_out_fmt_opi(s, INSN_EXTBL, ra, 1, rc);      /* .... ...A */
+        tcg_out_fmt_opr(s, INSN_BIS, rc, t0, rc);       /* .... ..BA */
+        if (sizeop & 4) {
+            tcg_out_fmt_opr(s, INSN_SEXTW, TCG_REG_ZERO, rc, rc);
+        }
+        break;
+
+    case 2:     /* 32-bit swap, unsigned result */
+    case 2 | 4: /* 32-bit swap, signed result */
+        /* input value =                                   xxxx ABCD */
+        tcg_out_fmt_opi(s, INSN_INSLH, ra, 7, t0);      /* .... .ABC */
+        tcg_out_fmt_opi(s, INSN_INSWL, ra, 3, rc);      /* ...C D... */
+        tcg_out_fmt_opr(s, INSN_BIS, t0, rc, rc);       /* ...C DABC */
+        tcg_out_fmt_opi(s, INSN_SRL, rc, 16, t0);       /* .... .CDA */
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, rc, 0x0A, rc);  /* .... D.B. */
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, t0, 0x05, t0);  /* .... .C.A */
+        tcg_out_fmt_opr(s, (sizeop & 4 ? INSN_ADDL : INSN_BIS), t0, rc, rc);
+        break;
+
+    case 3:     /* 64-bit swap */
+        /* input value =                                   ABCD EFGH */
+        tcg_out_fmt_opi(s, INSN_SRL, ra, 24, t0);       /* ...A BCDE */
+        tcg_out_fmt_opi(s, INSN_SLL, ra, 24, t1);       /* DEFG H... */
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, t0, 0x11, t0);  /* ...A ...E */
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, t1, 0x88, t1);  /* D... H... */
+        tcg_out_fmt_opr(s, INSN_BIS, t0, t1, t1);       /* D..A H..E */
+        tcg_out_fmt_opi(s, INSN_SRL, ra, 8, t0);        /* .ABC DEFG */
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, t0, 0x22, t0);  /* ..B. ..F. */
+        tcg_out_fmt_opr(s, INSN_BIS, t0, t1, t1);       /* D.BA H.FE */
+        tcg_out_fmt_opi(s, INSN_SLL, ra, 8, t0);        /* BCDE FGH. */
+        tcg_out_fmt_opi(s, INSN_ZAPNOT, t1, 0x44, t0);  /* .C.. .G.. */
+        tcg_out_fmt_opr(s, INSN_BIS, t1, t1, t1);       /* DCBA HGFE */
+        tcg_out_fmt_opi(s, INSN_SRL, t1, 32, t0);       /* .... DCBA */
+        tcg_out_fmt_opi(s, INSN_SLL, t1, 32, t1);       /* HGFE .... */
+        tcg_out_fmt_opr(s, INSN_BIS, t0, t1, rc);       /* HGFE DCBA */
+        break;
+
+    default:
+        tcg_abort();
+    }
+}
+
+static void tcg_out_ld_sz(TCGContext *s, int sizeop, TCGReg ra, TCGReg rb,
+                          tcg_target_long disp)
+{
+    static const AlphaOpcode ld_opc[4] = {
+        INSN_LDBU, INSN_LDWU, INSN_LDL, INSN_LDQ
+    };
+
+    tcg_out_mem_long(s, ld_opc[sizeop & 3], ra, rb, disp);
+
+    switch (sizeop) {
+    case 0 | 4 | 8:
+    case 0 | 4:
+    case 1 | 4:
+    case 2:
+        tgen_extend(s, sizeop & 7, ra, ra);
+        break;
+
+    case 0:
+    case 0 | 8:
+    case 1:
+    case 2 | 4:
+    case 3:
+        break;
+
+    case 1 | 8:
+    case 1 | 4 | 8:
+    case 2 | 8:
+    case 2 | 4 | 8:
+    case 3 | 8:
+        tgen_bswap(s, sizeop & 7, ra, ra);
+        break;
+
+    default:
+        tcg_abort();
+    }
+}
+
+static void tcg_out_st_sz(TCGContext *s, int sizeop, TCGReg ra, TCGReg rb,
+                          tcg_target_long disp)
+{
+    static const AlphaOpcode st_opc[4] = {
+        INSN_STB, INSN_STW, INSN_STL, INSN_STQ
+    };
+
+    tcg_out_mem_long(s, st_opc[sizeop & 3], ra, rb, disp);
+}
+
+static void patch_reloc(uint8_t *x_ptr, int type,
+                        tcg_target_long value, tcg_target_long addend)
+{
+    uint32_t *code_ptr = (uint32_t *)x_ptr;
+    uint32_t insn = *code_ptr;
+
+    value += addend;
+    switch (type) {
+    case R_ALPHA_BRADDR:
+        value -= (tcg_target_long)x_ptr + 4;
+        if ((value & 3) || value < -0x400000 || value >= 0x400000) {
+            tcg_abort();
+        }
+        *code_ptr = (insn & ~0x1fffff) | INSN_DISP21(value >> 2);
+        break;
+
+    default:
+        tcg_abort();
+    }
+}
+
+static void tcg_out_br(TCGContext *s, int opc, TCGReg ra, int label_index)
+{
+    TCGLabel *l = &s->labels[label_index];
+    tcg_target_long value;
+
+    if (l->has_value) {
+        value = l->u.value;
+        value -= (tcg_target_long)s->code_ptr + 4;
+        if ((value & 3) || value < -0x400000 || value >= 0x400000) {
+            tcg_abort();
+        }
+        value >>= 2;
+    } else {
+        tcg_out_reloc(s, s->code_ptr, R_ALPHA_BRADDR, label_index, 0);
+        /* We need to keep the offset unchanged for retranslation.
+           The field loaded here will be masked in tcg_out_fmt_br.  */
+        value = *(uint32_t *) s->code_ptr;
+    }
+    tcg_out_fmt_br(s, opc, ra, value);
+}
+
+static void tcg_out_const_call(TCGContext *s, tcg_target_long dest)
+{
+    const uint16_t *check = (const uint16_t *) dest;
+    tcg_target_long disp;
+
+    /* ??? Ideally we'd have access to Elf64_Sym.st_other, which
+       would tell us definitively whether the target function uses
+       the incoming PV value.  Make a simplifying assumption here
+       that all of the compiler-generated code that we're calling
+       either computes the GP from the PV in the first two insns
+       or it doesn't use the PV at all.  This assumption holds in
+       general for just about anything except some hand-written
+       assembly, which we're not calling into.  */
+
+    /* Note we access the insn stream as 16-bit units to avoid having
+       to mask out the offsets of the ldah and lda insns.  */
+    if (check[1] == 0x27bb && check[3] == 0x23bd) {
+        /* Skip the GP computation.  We can do this even if the
+           direct branch is out of range.  */
+        dest += 8;
+    }
+
+    disp = dest - ((tcg_target_long)s->code_ptr + 4);
+    if (disp >= -0x400000 && disp < 0x400000) {
+        tcg_out_fmt_br(s, INSN_BSR, TCG_REG_RA, disp >> 2);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_PV, dest);
+        tcg_out_fmt_jmp(s, INSN_JSR, TCG_REG_RA, TCG_REG_PV, dest);
+    }
+}
+
+static void tcg_out_deposit(TCGContext *s, TCGReg dest, TCGReg arg1,
+                            TCGReg arg2, int ofs, int len, int is_64)
+{
+    AlphaOpcode ins_opc, msk_opc;
+
+    switch (len) {
+    case 8:
+        ins_opc = INSN_INSBL;
+        msk_opc = INSN_MSKBL;
+        break;
+    case 16:
+        ins_opc = INSN_INSWL;
+        msk_opc = INSN_MSKWL;
+        break;
+    case 32:
+        ins_opc = INSN_INSLL;
+        msk_opc = INSN_MSKLL;
+    default:
+        tcg_abort();
+    }
+
+    /* Convert the bit offset to a byte offset.  */
+    ofs >>= 3;
+
+    if (arg1 == TCG_REG_ZERO) {
+        tcg_out_fmt_opi(s, ins_opc, arg2, ofs, dest);
+        if (!is_64 && len + ofs * 8 == 32) {
+            tgen_ext32s(s, dest, dest);
+        }
+    } else if (arg2 == TCG_REG_ZERO) {
+        tcg_out_fmt_opi(s, msk_opc, arg1, ofs, dest);
+    } else {
+        tcg_out_fmt_opi(s, ins_opc, arg2, ofs, TMP_REG1);
+        tcg_out_fmt_opi(s, msk_opc, arg1, ofs, dest);
+        tcg_out_fmt_opr(s, is_64 ? INSN_BIS : INSN_ADDL, dest, TMP_REG1, dest);
+    }
+}
+
+/* The low bit of these entries indicates that the result of
+   the comparison must be inverted.  This bit should not be
+   output with the rest of the instruction.  */
+static const int cmp_opc[] = {
+    [TCG_COND_EQ] = INSN_CMPEQ,
+    [TCG_COND_NE] = INSN_CMPEQ | 1,
+    [TCG_COND_LT] = INSN_CMPLT,
+    [TCG_COND_GE] = INSN_CMPLT | 1,
+    [TCG_COND_LE] = INSN_CMPLE,
+    [TCG_COND_GT] = INSN_CMPLE | 1,
+    [TCG_COND_LTU] = INSN_CMPULT,
+    [TCG_COND_GEU] = INSN_CMPULT | 1,
+    [TCG_COND_LEU] = INSN_CMPULE,
+    [TCG_COND_GTU] = INSN_CMPULE | 1
+};
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg dest,
+                            TCGReg c1, TCGArg c2, int c2const)
+{
+    AlphaOpcode opc = cmp_opc[cond] & ~1;
+
+    if (c2const) {
+        tcg_out_fmt_opi(s, opc, c1, c2, dest);
+    } else {
+        tcg_out_fmt_opr(s, opc, c1, c2, dest);
+    }
+
+    if (cmp_opc[cond] & 1) {
+        tcg_out_fmt_opi(s, INSN_XOR, dest, 1, dest);
+    }
+}
+
+static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg dest,
+                            TCGReg c1, TCGArg c2, int c2const,
+                            TCGArg v1, int v1const)
+{
+    /* Note that unsigned comparisons are not present here, which means
+       that their entries will contain zeros.  */
+    static const AlphaOpcode cmov_opc[] = {
+        [TCG_COND_EQ] = INSN_CMOVEQ,
+        [TCG_COND_NE] = INSN_CMOVNE,
+        [TCG_COND_LT] = INSN_CMOVLT,
+        [TCG_COND_GE] = INSN_CMOVGE,
+        [TCG_COND_LE] = INSN_CMOVLE,
+        [TCG_COND_GT] = INSN_CMOVGT
+    };
+
+    AlphaOpcode opc = 0;
+
+    /* Notice signed comparisons vs zero.  These are handled by the
+       cmov instructions directly.  */
+    if (c2 == 0) {
+        opc = cmov_opc[cond];
+    }
+
+    /* Otherwise, generate a comparison into a temporary.  */
+    if (opc == 0) {
+        opc = cmp_opc[cond] & ~1;
+        if (c2const) {
+            tcg_out_fmt_opi(s, opc, c1, c2, TMP_REG1);
+        } else {
+            tcg_out_fmt_opr(s, opc, c1, c2, TMP_REG1);
+        }
+
+        opc = (cmp_opc[cond] & 1 ? INSN_CMOVEQ : INSN_CMOVNE);
+        c1 = TMP_REG1;
+    }
+
+    if (v1const) {
+        tcg_out_fmt_opi(s, opc, c1, v1, dest);
+    } else {
+        tcg_out_fmt_opr(s, opc, c1, v1, dest);
+    }
+}
+
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
+                           TCGArg arg2, int const_arg2, int label_index)
+{
+    /* Note that unsigned comparisons are not present here, which means
+       that their entries will contain zeros.  */
+    static const AlphaOpcode br_opc[] = {
+        [TCG_COND_EQ] = INSN_BEQ,
+        [TCG_COND_NE] = INSN_BNE,
+        [TCG_COND_LT] = INSN_BLT,
+        [TCG_COND_GE] = INSN_BGE,
+        [TCG_COND_LE] = INSN_BLE,
+        [TCG_COND_GT] = INSN_BGT
+    };
+
+    AlphaOpcode opc = 0;
+
+    /* Notice signed comparisons vs zero.  These are handled by the
+       branch instructions directly.  */
+    if (arg2 == 0) {
+        opc = br_opc[cond];
+    }
+
+    /* Otherwise, generate a comparison into a temporary.  */
+    if (opc == 0) {
+        opc = cmp_opc[cond] & ~1;
+        if (const_arg2) {
+            tcg_out_fmt_opi(s, opc, arg1, arg2, TMP_REG1);
+        } else {
+            tcg_out_fmt_opr(s, opc, arg1, arg2, TMP_REG1);
+        }
+
+        opc = (cmp_opc[cond] & 1 ? INSN_BEQ : INSN_BNE);
+        arg1 = TMP_REG1;
+    }
+
+    tcg_out_br(s, opc, arg1, label_index);
+}
+
+/* Note that these functions don't have normal C calling conventions.  */
+typedef long divfn(long, long);
+extern divfn __divl, __divlu, __reml, __remlu;
+extern divfn __divq, __divqu, __remq, __remqu;
+
+static void tcg_out_div(TCGContext *s, int sizeop)
+{
+    static divfn * const libc_div[16] = {
+        [2] = __divlu,
+        [2 | 8] = __remlu,
+        [2 | 4] = __divl,
+        [2 | 4 | 8] = __reml,
+
+        [3] = __divqu,
+        [3 | 8] = __remqu,
+        [3 | 4] = __divq,
+        [3 | 4 | 8] = __remq,
+    };
+
+    tcg_target_long val, disp;
+
+    val = (tcg_target_long) libc_div[sizeop];
+    assert(val != 0);
+
+    disp = val - ((tcg_target_long)s->code_ptr + 4);
+    if (disp >= -0x400000 && disp < 0x400000) {
+        tcg_out_fmt_br(s, INSN_BSR, TCG_REG_T9, disp >> 2);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_PTR, TMP_REG1, val);
+        tcg_out_fmt_jmp(s, INSN_JSR, TCG_REG_T9, TMP_REG1, val);
+    }
+}
+
+#if defined(CONFIG_SOFTMMU)
+
+#include "../../softmmu_defs.h"
+
+static void *qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+static void *qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+
+static void tgen_andi_tmp(TCGContext *s, TCGReg ra, uint64_t val, TCGReg rc)
+{
+    if (!tcg_match_andi(val)) {
+        tcg_out_movi(s, TCG_TYPE_I64, TMP_REG1, val);
+        tcg_out_fmt_opr(s, INSN_AND, ra, TMP_REG1, rc);
+    } else {
+        tgen_andi(s, ra, val, rc);
+    }
+}
+
+static void tcg_out_tlb_cmp(TCGContext *s, int sizeop, TCGReg r0, TCGReg r1,
+                            TCGReg addr_reg, int label1, long tlb_offset)
+{
+    int addrsizeop = TARGET_LONG_BITS == 32 ? 2 : 3;
+    unsigned long val;
+
+    /* Mask the page, plus the low bits of the access, into TMP3.  Note
+       that the low bits are added in order to catch unaligned accesses,
+       as those bits won't be set in the TLB entry.  For 32-bit targets,
+       force the high bits of the mask to be zero, as the high bits of
+       the input register are garbage.  */
+    val = TARGET_PAGE_MASK | ((1 << (sizeop & 3)) - 1);
+    if (TARGET_LONG_BITS == 32) {
+        val &= 0xfffffffful;
+    }
+    tgen_andi_tmp(s, addr_reg, val, TMP_REG1);
+
+    /* Compute the index into the TLB into R1.  Again, note that the
+       high bits of a 32-bit address must be cleared.  */
+    tcg_out_fmt_opi(s, INSN_SRL, addr_reg,
+                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, r1);
+
+    val = (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS;
+    if (TARGET_LONG_BITS == 32) {
+        val &= 0xfffffffful >> (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+    }
+    tgen_andi_tmp(s, r1, val, r1);
+
+    /* Load the word at (R1 + CPU_ENV + TLB_OFFSET).  Note that we
+       arrange for a 32-bit load to be zero-extended.  */
+    tcg_out_fmt_opr(s, INSN_ADDQ, r1, TCG_AREG0, r1);
+    tcg_out_ld_sz(s, addrsizeop, TMP_REG2, r1, tlb_offset);
+
+    /* Copy the original address into R0.  This is needed on the
+       slow path through the helper function.  */
+    tgen_extend(s, addrsizeop, addr_reg, r0);
+
+    /* Compare TMP1 with the value loaded from the TLB.  */
+    tcg_out_brcond(s, TCG_COND_NE, TMP_REG2, TMP_REG1, 0, label1);
+}
+#endif /* SOFTMMU */
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
+{
+    TCGReg addr_reg, data_reg, r0;
+    long ofs;
+    int bswap;
+#if defined(CONFIG_SOFTMMU)
+    TCGReg r1;
+    int label1, label2, mem_index;
+#endif
+
+    data_reg = *args++;
+    addr_reg = *args++;
+
+#if defined(CONFIG_SOFTMMU)
+    mem_index = *args;
+    r0 = TCG_REG_A1;
+    r1 = TCG_REG_A0;
+
+    label1 = gen_new_label();
+    label2 = gen_new_label();
+
+    tcg_out_tlb_cmp(s, sizeop, r0, r1, addr_reg, label1,
+                    offsetof(CPUArchState, tlb_table[mem_index][0].addr_read));
+
+    /* TLB Hit.  Note that Alpha statically predicts forward branch as
+       not taken, so arrange the fallthru as the common case.
+
+       R0 contains the guest address, and R1 contains the pointer
+       to CPU_ENV plus the TLB entry offset.  */
+
+    tcg_out_ld(s, TCG_TYPE_I64, r1, r1,
+               offsetof(CPUArchState, tlb_table[mem_index][0].addend));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r0, r1, r0);
+    ofs = 0;
+#else
+    if (TARGET_LONG_BITS == 32) {
+        r0 = TCG_REG_A1;
+        tgen_ext32u(s, addr_reg, r0);
+    } else {
+        r0 = addr_reg;
+    }
+    if (USE_GUEST_BASE_REG) {
+        tcg_out_fmt_opr(s, INSN_ADDQ, r0, TCG_GUEST_BASE_REG, TCG_REG_A1);
+        r0 = TCG_REG_A1;
+        ofs = 0;
+    } else {
+        ofs = GUEST_BASE;
+    }
+#endif
+
+#if defined(TARGET_WORDS_BIGENDIAN)
+    /* Signal byte swap necessary.  */
+    bswap = 8;
+#else
+    bswap = 0;
+#endif
+
+    /* Perform the actual load.  */
+    tcg_out_ld_sz(s, sizeop | bswap, data_reg, r0, ofs);
+
+#if defined(CONFIG_SOFTMMU)
+    tcg_out_br(s, INSN_BR, TCG_REG_ZERO, label2);
+
+    /* TLB miss.  Call the helper function.  */
+    tcg_out_label(s, label1, s->code_ptr);
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_A2, mem_index);
+
+    tcg_out_const_call(s, (tcg_target_long)qemu_ld_helpers[sizeop & 3]);
+
+    /* The helper routines have no defined data extension.
+       Properly extend the result to whatever data type we need.  */
+    tgen_extend(s, sizeop, TCG_REG_V0, data_reg);
+
+    tcg_out_label(s, label2, s->code_ptr);
+#endif
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
+{
+    TCGReg addr_reg, data_reg, r0;
+    long ofs;
+#if defined(CONFIG_SOFTMMU)
+    TCGReg r1;
+    int label1, label2, mem_index;
+#endif
+
+    data_reg = *args++;
+    addr_reg = *args++;
+
+#if defined(CONFIG_SOFTMMU)
+    mem_index = *args;
+    r0 = TCG_REG_A1;
+    r1 = TCG_REG_A0;
+
+    label1 = gen_new_label();
+    label2 = gen_new_label();
+
+    tcg_out_tlb_cmp(s, sizeop, r0, r1, addr_reg, label1,
+                    offsetof(CPUArchState,
+                             tlb_table[mem_index][0].addr_write));
+
+    /* TLB Hit.  Note that Alpha statically predicts forward branch as
+       not taken, so arrange the fallthru as the common case.
+
+       R0 contains the guest address, and R1 contains the pointer
+       to CPU_ENV plus the TLB entry offset.  */
+
+    tcg_out_ld(s, TCG_TYPE_I64, r1, r1,
+               offsetof(CPUArchState, tlb_table[mem_index][0].addend));
+    tcg_out_fmt_opr(s, INSN_ADDQ, r0, r1, r0);
+    ofs = 0;
+#else
+    if (TARGET_LONG_BITS == 32) {
+        r0 = TCG_REG_A1;
+        tgen_ext32u(s, addr_reg, r0);
+    } else {
+        r0 = addr_reg;
+    }
+    if (USE_GUEST_BASE_REG) {
+        tcg_out_fmt_opr(s, INSN_ADDQ, r0, TCG_GUEST_BASE_REG, TCG_REG_A1);
+        r0 = TCG_REG_A1;
+        ofs = 0;
+    } else {
+        ofs = GUEST_BASE;
+    }
+#endif
+
+#if defined(TARGET_WORDS_BIGENDIAN)
+    /* Byte swap if necessary.  */
+    if ((sizeop & 3) > 0) {
+        tgen_bswap(s, sizeop & 3, data_reg, TCG_REG_A0);
+        data_reg = TCG_REG_A0;
+    }
+#endif
+
+    /* Perform the actual store.  */
+    tcg_out_st_sz(s, sizeop, data_reg, r0, ofs);
+
+#if defined(CONFIG_SOFTMMU)
+    tcg_out_br(s, INSN_BR, TCG_REG_ZERO, label2);
+
+    /* TLB miss.  Call the helper function.  */
+    tcg_out_label(s, label1, s->code_ptr);
+    tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_A0, TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_A2, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_A3, mem_index);
+
+    tcg_out_const_call(s, (tcg_target_long)qemu_st_helpers[sizeop & 3]);
+
+    tcg_out_label(s, label2, s->code_ptr);
+#endif
+}
+
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
+                              const TCGArg *args, const int *const_args)
+{
+    TCGArg arg0, arg1, arg2;
+    AlphaOpcode insn;
+    int c;
+
+    arg0 = args[0];
+    arg1 = args[1];
+    arg2 = args[2];
+
+    switch (opc) {
+    case INDEX_op_exit_tb:
+        tcg_out_ld(s, TCG_TYPE_PTR, TMP_REG1, TCG_REG_SP, TB_RET_OFS);
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_V0, arg0);
+        tcg_out_fmt_jmp(s, INSN_RET, TCG_REG_ZERO, TMP_REG1, 0);
+        break;
+
+    case INDEX_op_goto_tb:
+        if (s->tb_jmp_offset) {
+            /* Direct jump method.  In the general case we output:
+                 br   $at,.+4
+                 ldah $at,hi($at)
+                 lda  $at,lo($at)
+                 jmp  $31,($at),0
+               We need to modify two instructions to set the link.
+               We want that modification to be atomic, so we arrange
+               for the ldah+lda pair to be 8-byte aligned.  Which
+               means that the first branch should be 4 mod 8.  */
+            if (((uintptr_t)s->code_ptr & 7) == 0) {
+                tcg_out32(s, INSN_NOP);
+            }
+            tcg_out_fmt_br(s, INSN_BR, TMP_REG1, 0);
+            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+            s->code_ptr += 8;
+        } else {
+            /* Indirect jump method.  */
+            tcg_out_ld(s, TCG_TYPE_PTR, TMP_REG1, TCG_REG_ZERO,
+                       (tcg_target_long)(s->tb_next + arg0));
+        }
+        tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_ZERO, TMP_REG1, 0);
+        s->tb_next_offset[arg0] = s->code_ptr - s->code_buf;
+        break;
+
+    case INDEX_op_call:
+        if (const_args[0]) {
+            tcg_out_const_call(s, arg0);
+        } else {
+            tcg_out_fmt_jmp(s, INSN_JSR, TCG_REG_RA, TCG_REG_PV, 0);
+        }
+        break;
+
+    case INDEX_op_jmp:
+        tcg_out_fmt_jmp(s, INSN_JMP, TCG_REG_ZERO, arg0, 0);
+        break;
+
+    case INDEX_op_br:
+        tcg_out_br(s, INSN_BR, TCG_REG_ZERO, arg0);
+        break;
+
+    case INDEX_op_ld8u_i32:
+    case INDEX_op_ld8u_i64:
+        c = 0;
+        goto do_load;
+    case INDEX_op_ld8s_i32:
+    case INDEX_op_ld8s_i64:
+        c = 0 | 4;
+        goto do_load;
+    case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16u_i64:
+        c = 1;
+        goto do_load;
+    case INDEX_op_ld16s_i32:
+    case INDEX_op_ld16s_i64:
+        c = 1 | 4;
+        goto do_load;
+    case INDEX_op_ld32u_i64:
+        c = 2;
+        goto do_load;
+    case INDEX_op_ld_i32:
+    case INDEX_op_ld32s_i64:
+        c = 2 | 4;
+        goto do_load;
+    case INDEX_op_ld_i64:
+        c = 3;
+    do_load:
+        tcg_out_ld_sz(s, c, arg0, arg1, arg2);
+        break;
+
+    case INDEX_op_st8_i32:
+    case INDEX_op_st8_i64:
+        c = 0;
+        goto do_store;
+    case INDEX_op_st16_i32:
+    case INDEX_op_st16_i64:
+        c = 1;
+        goto do_store;
+    case INDEX_op_st_i32:
+    case INDEX_op_st32_i64:
+        c = 2;
+        goto do_store;
+    case INDEX_op_st_i64:
+        c = 3;
+    do_store:
+        tcg_out_st_sz(s, c, arg0, arg1, arg2);
+        break;
+
+    case INDEX_op_sub_i32:
+        if (const_args[2]) {
+            arg2 = -arg2;
+        } else {
+            insn = INSN_SUBL;
+            goto do_arith;
+        }
+        /* FALLTHRU */
+
+    case INDEX_op_add_i32:
+        if (const_args[2]) {
+            if ((int32_t)arg2 >= 0) {
+                tcg_out_fmt_opi(s, INSN_ADDL, arg1, (int32_t)arg2, arg0);
+            } else {
+                tcg_out_fmt_opi(s, INSN_SUBL, arg1, -(int32_t)arg2, arg0);
+            }
+        } else {
+            insn = INSN_ADDL;
+            goto do_arith;
+        }
+        break;
+
+    case INDEX_op_sub_i64:
+        if (const_args[2]) {
+            arg2 = -arg2;
+        } else {
+            insn = INSN_SUBQ;
+            goto do_arith;
+        }
+        /* FALLTHRU */
+
+    case INDEX_op_add_i64:
+        if (const_args[2]) {
+            tcg_out_mem_long(s, INSN_LDA, arg0, arg1, arg2);
+        } else {
+            insn = INSN_ADDQ;
+            goto do_arith;
+        }
+        break;
+
+    case INDEX_op_mul_i32:
+        insn = INSN_MULL;
+        goto do_arith;
+
+    case INDEX_op_mul_i64:
+        insn = INSN_MULQ;
+        goto do_arith;
+
+    case INDEX_op_and_i32:
+    case INDEX_op_and_i64:
+        if (const_args[2]) {
+            if (opc == INDEX_op_and_i32) {
+                arg2 = (int32_t)arg2;
+            }
+            tgen_andi(s, arg1, arg2, arg0);
+            break;
+        }
+        insn = INSN_AND;
+        goto do_arith;
+
+    case INDEX_op_andc_i32:
+    case INDEX_op_andc_i64:
+        if (const_args[2]) {
+            if (opc == INDEX_op_andc_i32) {
+                arg2 = (int32_t)arg2;
+            }
+            tgen_andi(s, arg1, ~arg2, arg0);
+            break;
+        }
+        insn = INSN_BIC;
+        goto do_arith;
+
+    case INDEX_op_or_i32:
+    case INDEX_op_or_i64:
+        insn = INSN_BIS;
+        goto do_arith;
+
+    case INDEX_op_orc_i32:
+    case INDEX_op_orc_i64:
+        insn = INSN_ORNOT;
+        goto do_arith;
+
+    case INDEX_op_xor_i32:
+    case INDEX_op_xor_i64:
+        insn = INSN_XOR;
+        goto do_arith;
+
+    case INDEX_op_eqv_i32:
+    case INDEX_op_eqv_i64:
+        insn = INSN_EQV;
+        goto do_arith;
+
+    case INDEX_op_shl_i32:
+        /* Make sure to preserve the sign-extension in the result.
+           Thus the special casing of shifts by 1, 2 and 3.  */
+        if (const_args[2]) {
+            arg2 &= 31;
+            switch (arg2) {
+            case 0:
+                tcg_out_mov(s, TCG_TYPE_I32, arg0, arg1);
+                break;
+            case 1:
+                tcg_out_fmt_opr(s, INSN_ADDL, arg1, arg1, arg0);
+                break;
+            case 2:
+                tcg_out_fmt_opr(s, INSN_S4ADDL, arg1, TCG_REG_ZERO, arg0);
+                break;
+            case 3:
+                tcg_out_fmt_opr(s, INSN_S8ADDL, arg1, TCG_REG_ZERO, arg0);
+                break;
+            default:
+                tcg_out_fmt_opi(s, INSN_SLL, arg1, arg2, arg0);
+                tgen_ext32s(s, arg0, arg0);
+                break;
+            }
+        } else {
+            /* ??? TCG has no requirement to truncate the shift yet.  */
+            tcg_out_fmt_opr(s, INSN_SLL, arg1, arg2, arg0);
+            tgen_ext32s(s, arg0, arg0);
+        }
+        break;
+
+    case INDEX_op_shl_i64:
+        insn = INSN_SLL;
+        goto do_arith;
+
+    case INDEX_op_shr_i32:
+        /* Recall that the input is sign-extended, which means that we
+           need to mask the high bits that we'll be shifting in.  There
+           are three common cases that can perform the shift+mask in
+           one instruction.  Otherwise, we'll need a separate mask.  */
+        if (const_args[2]) {
+            arg2 &= 31;
+            switch (arg2) {
+            case 0:
+                tcg_out_mov(s, TCG_TYPE_I32, arg0, arg1);
+                break;
+            case 8:
+                tcg_out_fmt_opi(s, INSN_INSLH, arg1, 7, arg0);
+                break;
+            case 16:
+                tcg_out_fmt_opi(s, INSN_EXTWL, arg1, 2, arg0);
+                break;
+            case 24:
+                tcg_out_fmt_opi(s, INSN_EXTBL, arg1, 3, arg0);
+                break;
+            case 25 ... 31:
+                tcg_out_fmt_opi(s, INSN_SRL, arg1, arg2, arg0);
+                tcg_out_fmt_opi(s, INSN_AND, arg0,
+                                (1 << (32 - arg2)) - 1, arg0);
+                break;
+            default:
+                tgen_ext32u(s, arg1, arg0);
+                tcg_out_fmt_opi(s, INSN_SRL, arg0, arg2, arg0);
+                break;
+            }
+        } else {
+            /* Here we need to be careful about a shift of zero,
+               for which we'd need to re-sign-extend the output.  */
+            tgen_ext32u(s, arg1, TMP_REG1);
+            tcg_out_fmt_opr(s, INSN_SRL, TMP_REG1, arg2, arg0);
+            tgen_ext32s(s, arg0, arg0);
+        }
+        break;
+
+    case INDEX_op_shr_i64:
+        insn = INSN_SRL;
+        goto do_arith;
+
+    case INDEX_op_sar_i32:
+        /* Note that since the input is already sign-extended,
+           we need not do anything special here.  */
+    case INDEX_op_sar_i64:
+        insn = INSN_SRA;
+        goto do_arith;
+
+    do_arith:
+        if (const_args[2]) {
+            tcg_out_fmt_opi(s, insn, arg1, arg2, arg0);
+        } else {
+            tcg_out_fmt_opr(s, insn, arg1, arg2, arg0);
+        }
+        break;
+
+    case INDEX_op_not_i32:
+    case INDEX_op_not_i64:
+        if (const_args[1]) {
+            tcg_out_fmt_opi(s, INSN_ORNOT, TCG_REG_ZERO, arg1, arg0);
+        } else {
+            tcg_out_fmt_opr(s, INSN_ORNOT, TCG_REG_ZERO, arg1, arg0);
+        }
+        break;
+
+    case INDEX_op_deposit_i32:
+        tcg_out_deposit(s, arg0, arg1, arg2, args[3], args[4], 0);
+        break;
+    case INDEX_op_deposit_i64:
+        tcg_out_deposit(s, arg0, arg1, arg2, args[3], args[4], 1);
+        break;
+
+    case INDEX_op_brcond_i32:
+    case INDEX_op_brcond_i64:
+        tcg_out_brcond(s, arg2, arg0, arg1, const_args[1], args[3]);
+        break;
+
+    case INDEX_op_setcond_i32:
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], arg0, arg1, arg2, const_args[2]);
+        break;
+
+    case INDEX_op_movcond_i32:
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond(s, args[5], arg0, arg1, arg2, const_args[2],
+                        args[3], const_args[3]);
+        break;
+
+    case INDEX_op_ext8s_i32:
+    case INDEX_op_ext8s_i64:
+        c = 0 | 4;
+        goto do_sign_extend;
+    case INDEX_op_ext16s_i32:
+    case INDEX_op_ext16s_i64:
+        c = 1 | 4;
+        goto do_sign_extend;
+    case INDEX_op_ext32s_i64:
+        c = 2 | 4;
+    do_sign_extend:
+        tgen_extend(s, c, arg1, arg0);
+        break;
+
+    case INDEX_op_div_i32:
+        c = 2 | 4;
+        goto do_div;
+    case INDEX_op_rem_i32:
+        c = 2 | 4 | 8;
+        goto do_div;
+    case INDEX_op_divu_i32:
+        c = 2;
+        goto do_div;
+    case INDEX_op_remu_i32:
+        c = 2 | 8;
+        goto do_div;
+    case INDEX_op_div_i64:
+        c = 3 | 4;
+        goto do_div;
+    case INDEX_op_rem_i64:
+        c = 3 | 4 | 8;
+        goto do_div;
+    case INDEX_op_divu_i64:
+        c = 3;
+        goto do_div;
+    case INDEX_op_remu_i64:
+        c = 3 | 8;
+    do_div:
+        tcg_out_div(s, c);
+        break;
+
+    case INDEX_op_bswap16_i32:
+    case INDEX_op_bswap16_i64:
+        c = 1;
+        goto do_bswap;
+    case INDEX_op_bswap32_i32:
+        c = 2 | 4;
+        goto do_bswap;
+    case INDEX_op_bswap32_i64:
+        c = 2;
+        goto do_bswap;
+    case INDEX_op_bswap64_i64:
+        c = 3;
+    do_bswap:
+        tgen_bswap(s, c, arg1, arg0);
+        break;
+
+    case INDEX_op_qemu_ld8u:
+        c = 0;
+        goto do_qemu_load;
+    case INDEX_op_qemu_ld8s:
+        c = 0 | 4;
+        goto do_qemu_load;
+    case INDEX_op_qemu_ld16u:
+        c = 1;
+        goto do_qemu_load;
+    case INDEX_op_qemu_ld16s:
+        c = 1 | 4;
+        goto do_qemu_load;
+    case INDEX_op_qemu_ld32:
+    case INDEX_op_qemu_ld32s:
+        c = 2 | 4;
+        goto do_qemu_load;
+    case INDEX_op_qemu_ld32u:
+        c = 2;
+        goto do_qemu_load;
+    case INDEX_op_qemu_ld64:
+        c = 3;
+    do_qemu_load:
+        tcg_out_qemu_ld(s, args, c);
+        break;
+
+    case INDEX_op_qemu_st8:
+        c = 0;
+        goto do_qemu_store;
+    case INDEX_op_qemu_st16:
+        c = 1;
+        goto do_qemu_store;
+    case INDEX_op_qemu_st32:
+        c = 2;
+        goto do_qemu_store;
+    case INDEX_op_qemu_st64:
+        c = 3;
+    do_qemu_store:
+        tcg_out_qemu_st(s, args, c);
+        break;
+
+    case INDEX_op_mov_i32:
+    case INDEX_op_mov_i64:
+    case INDEX_op_movi_i32:
+    case INDEX_op_movi_i64:
+        /* These four are handled by tcg.c directly.  */
+    default:
+        tcg_abort();
+    }
+}
+
+static const TCGTargetOpDef alpha_op_defs[] = {
+    { INDEX_op_exit_tb,         { } },
+    { INDEX_op_goto_tb,         { } },
+    { INDEX_op_call,            { "ci" } },
+    { INDEX_op_jmp,             { "r" } },
+    { INDEX_op_br,              { } },
+
+    { INDEX_op_mov_i32,         { "r", "r" } },
+    { INDEX_op_movi_i32,        { "r" } },
+
+    { INDEX_op_ld8u_i32,        { "r", "r" } },
+    { INDEX_op_ld8s_i32,        { "r", "r" } },
+    { INDEX_op_ld16u_i32,       { "r", "r" } },
+    { INDEX_op_ld16s_i32,       { "r", "r" } },
+    { INDEX_op_ld_i32,          { "r", "r" } },
+    { INDEX_op_st8_i32,         { "rJ", "r" } },
+    { INDEX_op_st16_i32,        { "rJ", "r" } },
+    { INDEX_op_st_i32,          { "rJ", "r" } },
+
+    { INDEX_op_add_i32,         { "r", "rJ", "rK" } },
+    { INDEX_op_mul_i32,         { "r", "rJ", "rI" } },
+    { INDEX_op_sub_i32,         { "r", "rJ", "rK" } },
+    { INDEX_op_and_i32,         { "r", "rJ", "rM" } },
+    { INDEX_op_or_i32,          { "r", "rJ", "rI" } },
+    { INDEX_op_xor_i32,         { "r", "rJ", "rI" } },
+    { INDEX_op_andc_i32,        { "r", "rJ", "rM" } },
+    { INDEX_op_orc_i32,         { "r", "rJ", "rI" } },
+    { INDEX_op_eqv_i32,         { "r", "rJ", "rI" } },
+    { INDEX_op_not_i32,         { "r", "rI" } },
+
+    { INDEX_op_shl_i32,         { "r", "rJ", "rI" } },
+    { INDEX_op_shr_i32,         { "r", "rJ", "rI" } },
+    { INDEX_op_sar_i32,         { "r", "rJ", "rI" } },
+
+    { INDEX_op_deposit_i32,     { "r", "rJ", "rJ" } },
+
+    { INDEX_op_div_i32,         { "c", "a", "b" } },
+    { INDEX_op_rem_i32,         { "c", "a", "b" } },
+    { INDEX_op_divu_i32,        { "c", "a", "b" } },
+    { INDEX_op_remu_i32,        { "c", "a", "b" } },
+
+    { INDEX_op_brcond_i32,      { "rJ", "rI" } },
+    { INDEX_op_setcond_i32,     { "r", "rJ", "rI" } },
+    { INDEX_op_movcond_i32,     { "r", "rJ", "rI", "rI", "0" } },
+
+    { INDEX_op_mov_i64,         { "r", "r" } },
+    { INDEX_op_movi_i64,        { "r" } },
+
+    { INDEX_op_ld8u_i64,        { "r", "r" } },
+    { INDEX_op_ld8s_i64,        { "r", "r" } },
+    { INDEX_op_ld16u_i64,       { "r", "r" } },
+    { INDEX_op_ld16s_i64,       { "r", "r" } },
+    { INDEX_op_ld32u_i64,       { "r", "r" } },
+    { INDEX_op_ld32s_i64,       { "r", "r" } },
+    { INDEX_op_ld_i64,          { "r", "r" } },
+    { INDEX_op_st8_i64,         { "rJ", "r" } },
+    { INDEX_op_st16_i64,        { "rJ", "r" } },
+    { INDEX_op_st32_i64,        { "rJ", "r" } },
+    { INDEX_op_st_i64,          { "rJ", "r" } },
+
+    { INDEX_op_add_i64,         { "r", "rJ", "ri" } },
+    { INDEX_op_mul_i64,         { "r", "rJ", "rI" } },
+    { INDEX_op_sub_i64,         { "r", "rJ", "ri" } },
+    { INDEX_op_and_i64,         { "r", "rJ", "rM" } },
+    { INDEX_op_or_i64,          { "r", "rJ", "rI" } },
+    { INDEX_op_xor_i64,         { "r", "rJ", "rI" } },
+    { INDEX_op_andc_i64,        { "r", "rJ", "rM" } },
+    { INDEX_op_orc_i64,         { "r", "rJ", "rI" } },
+    { INDEX_op_eqv_i64,         { "r", "rJ", "rI" } },
+    { INDEX_op_not_i64,         { "r", "rI" } },
+
+    { INDEX_op_shl_i64,         { "r", "rJ", "rI" } },
+    { INDEX_op_shr_i64,         { "r", "rJ", "rI" } },
+    { INDEX_op_sar_i64,         { "r", "rJ", "rI" } },
+
+    { INDEX_op_deposit_i64,     { "r", "rJ", "rJ" } },
+
+    { INDEX_op_div_i64,         { "c", "a", "b" } },
+    { INDEX_op_rem_i64,         { "c", "a", "b" } },
+    { INDEX_op_divu_i64,        { "c", "a", "b" } },
+    { INDEX_op_remu_i64,        { "c", "a", "b" } },
+
+    { INDEX_op_brcond_i64,      { "rJ", "rI" } },
+    { INDEX_op_setcond_i64,     { "r", "rJ", "rI" } },
+    { INDEX_op_movcond_i64,     { "r", "rJ", "rI", "rI", "0" } },
+
+    { INDEX_op_ext8s_i32,       { "r", "rJ" } },
+    { INDEX_op_ext16s_i32,      { "r", "rJ" } },
+    { INDEX_op_ext8s_i64,       { "r", "rJ" } },
+    { INDEX_op_ext16s_i64,      { "r", "rJ" } },
+    { INDEX_op_ext32s_i64,      { "r", "rJ" } },
+
+    { INDEX_op_bswap16_i32,     { "r", "rJ" } },
+    { INDEX_op_bswap32_i32,     { "r", "rJ" } },
+    { INDEX_op_bswap16_i64,     { "r", "rJ" } },
+    { INDEX_op_bswap32_i64,     { "r", "rJ" } },
+    { INDEX_op_bswap64_i64,     { "r", "rJ" } },
+
+    { INDEX_op_qemu_ld8u,       { "r", "L" } },
+    { INDEX_op_qemu_ld8s,       { "r", "L" } },
+    { INDEX_op_qemu_ld16u,      { "r", "L" } },
+    { INDEX_op_qemu_ld16s,      { "r", "L" } },
+    { INDEX_op_qemu_ld32,       { "r", "L" } },
+    { INDEX_op_qemu_ld32u,      { "r", "L" } },
+    { INDEX_op_qemu_ld32s,      { "r", "L" } },
+    { INDEX_op_qemu_ld64,       { "r", "L" } },
+
+    { INDEX_op_qemu_st8,        { "L", "L" } },
+    { INDEX_op_qemu_st16,       { "L", "L" } },
+    { INDEX_op_qemu_st32,       { "L", "L" } },
+    { INDEX_op_qemu_st64,       { "L", "L" } },
+    { -1 },
+};
+
+
+/*
+ * Generate global QEMU prologue and epilogue code
+ */
+void tcg_target_qemu_prologue(TCGContext *s)
+{
+    static const TCGReg save_regs[] = {
+        TCG_REG_RA,
+        TCG_REG_S0,
+        TCG_REG_S1,
+        TCG_REG_S2,
+        TCG_REG_S3,
+        TCG_REG_S4,
+        /* TCG_REG_S5 -- currently used for the global env.  */
+        TCG_REG_S6,
+    };
+
+    long i, frame_size, save_ofs;
+    uint8_t *ret_loc, *ent_loc;
+
+    /* The shape of the stack frame is:
+       input sp
+         [ Register save area ]
+         [ TB return address ]
+         [ CPU_TEMP_BUF_NLONGS ]
+         [ TCG_STATIC_CALL_ARGS_SIZE ]
+       sp
+    */
+
+    save_ofs = TB_RET_OFS + 8;
+    frame_size = save_ofs + ARRAY_SIZE(save_regs) * 8;
+    frame_size += TCG_TARGET_STACK_ALIGN - 1;
+    frame_size &= -TCG_TARGET_STACK_ALIGN;
+
+    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+    /* TB Prologue.  */
+    ent_loc = s->code_ptr;
+
+    /* Allocate the stack frame.  */
+    tcg_out_fmt_mem(s, INSN_LDA, TCG_REG_SP, TCG_REG_SP, -frame_size);
+
+    /* Save all callee saved registers.  */
+    for (i = 0; i < ARRAY_SIZE(save_regs); i++) {
+        tcg_out_fmt_mem(s, INSN_STQ, save_regs[i], TCG_REG_SP, save_ofs + i*8);
+    }
+
+    /* Store the return address of the TB.  */
+    ret_loc = s->code_ptr;
+    tcg_out_fmt_mem(s, INSN_LDA, TMP_REG1, TCG_REG_PV, 0);
+    tcg_out_fmt_mem(s, INSN_STQ, TMP_REG1, TCG_REG_SP, TB_RET_OFS);
+
+    /* Copy the ENV pointer into place.  */
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_A0);
+
+    /* Setup TCG_GUEST_BASE_REG if desired.  */
+    if (USE_GUEST_BASE_REG) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
+
+    /* Invoke the TB.  */
+    tcg_out_fmt_jmp(s, INSN_JSR, TCG_REG_ZERO, TCG_REG_A1, 0);
+
+    /* Fill in the offset for the TB return address, as described above.  */
+    i = s->code_ptr - ent_loc;
+    assert(i == (int16_t)i);
+    *(int16_t *)ret_loc = i;
+
+    /* TB epilogue. */
+
+    /* Restore all callee saved registers.  */
+    for (i = 0; i < ARRAY_SIZE(save_regs); i++) {
+        tcg_out_fmt_mem(s, INSN_LDQ, save_regs[i], TCG_REG_SP, save_ofs + i*8);
+    }
+
+    /* Deallocate the stack frame.  */
+    tcg_out_fmt_mem(s, INSN_LDA, TCG_REG_SP, TCG_REG_SP, frame_size);
+
+    tcg_out_fmt_jmp(s, INSN_RET, TCG_REG_ZERO, TCG_REG_RA, 0);
+}
+
+
+void tcg_target_init(TCGContext *s)
+{
+#if !defined(CONFIG_USER_ONLY)
+    /* fail safe */
+    assert((1 << CPU_TLB_ENTRY_BITS) == sizeof(CPUTLBEntry));
+#endif
+
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
+
+    tcg_regset_set32(tcg_target_call_clobber_regs, 0, 0xffffffff);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_GP);
+    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_SP);
+
+    tcg_regset_clear(s->reserved_regs);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
+    tcg_regset_set_reg(s->reserved_regs, TMP_REG1);
+    tcg_regset_set_reg(s->reserved_regs, TMP_REG2);
+
+    tcg_add_target_add_op_defs(alpha_op_defs);
+}
+
+void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
+{
+    long disp, hi, lo, insn1, insn2;
+
+    /* Try a direct branch first.  */
+    disp = addr - (jmp_addr + 4);
+    if (disp >= -0x400000 && disp < 0x400000) {
+        insn1 = INSN_BR | INSN_RA(TCG_REG_ZERO) | INSN_DISP21(disp >> 2);
+        /* The second insn is dead code, but don't leave the memory totally
+           uninitialized.  If the garbage is an illegal insn the prefetch
+           unit can flush the pipeline in order to prove the illegal insn
+           isn't executed.  */
+        insn2 = INSN_NOP;
+    } else {
+        /* Failing that, do an ldah+lda pair to make the distance.
+           Given that the code buffer is limited to 2G, this should
+           always reach.  */
+        disp = addr - jmp_addr;
+        lo = (int16_t)disp;
+        hi = (int16_t)((disp - lo) >> 16);
+        assert((hi << 16) + lo != disp);
+        insn1 = INSN_LDAH | INSN_RA(TMP_REG1)
+                | INSN_RB(TMP_REG1) | INSN_DISP16(hi);
+        insn2 = INSN_LDA | INSN_RA(TMP_REG1)
+                | INSN_RB(TMP_REG1) | INSN_DISP16(lo);
+    }
+    *(uint64_t *)jmp_addr = insn1 + (insn2 << 32);
+
+    flush_icache_range(jmp_addr, jmp_addr + 8);
+}
diff --git a/tcg/alpha/tcg-target.h b/tcg/alpha/tcg-target.h
new file mode 100644
index 0000000..3611687
--- /dev/null
+++ b/tcg/alpha/tcg-target.h
@@ -0,0 +1,142 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#define TCG_TARGET_ALPHA 1
+
+#define TCG_TARGET_NB_REGS 32
+
+/* Having the zero register ($31) == 0 within TCG simplifies a few things.
+   Thus we have a mapping between TCG regno and hardware regno.  */
+#define HW_TO_TCG_REGNO(x)      ((x) ^ 0x1f)
+#define TCG_TO_HW_REGNO(x)      ((x) ^ 0x1f)
+
+typedef enum TCGReg {
+    TCG_REG_V0 = HW_TO_TCG_REGNO(0),
+
+    TCG_REG_T0 = HW_TO_TCG_REGNO(1),
+    TCG_REG_T1 = HW_TO_TCG_REGNO(2),
+    TCG_REG_T2 = HW_TO_TCG_REGNO(3),
+    TCG_REG_T3 = HW_TO_TCG_REGNO(4),
+    TCG_REG_T4 = HW_TO_TCG_REGNO(5),
+    TCG_REG_T5 = HW_TO_TCG_REGNO(6),
+    TCG_REG_T6 = HW_TO_TCG_REGNO(7),
+    TCG_REG_T7 = HW_TO_TCG_REGNO(8),
+
+    TCG_REG_S0 = HW_TO_TCG_REGNO(9),
+    TCG_REG_S1 = HW_TO_TCG_REGNO(10),
+    TCG_REG_S2 = HW_TO_TCG_REGNO(11),
+    TCG_REG_S3 = HW_TO_TCG_REGNO(12),
+    TCG_REG_S4 = HW_TO_TCG_REGNO(13),
+    TCG_REG_S5 = HW_TO_TCG_REGNO(14),
+    TCG_REG_S6 = HW_TO_TCG_REGNO(15),
+
+    TCG_REG_A0 = HW_TO_TCG_REGNO(16),
+    TCG_REG_A1 = HW_TO_TCG_REGNO(17),
+    TCG_REG_A2 = HW_TO_TCG_REGNO(18),
+    TCG_REG_A3 = HW_TO_TCG_REGNO(19),
+    TCG_REG_A4 = HW_TO_TCG_REGNO(20),
+    TCG_REG_A5 = HW_TO_TCG_REGNO(21),
+
+    TCG_REG_T8 = HW_TO_TCG_REGNO(22),
+    TCG_REG_T9 = HW_TO_TCG_REGNO(23),
+    TCG_REG_T10 = HW_TO_TCG_REGNO(24),
+    TCG_REG_T11 = HW_TO_TCG_REGNO(25),
+
+    TCG_REG_RA = HW_TO_TCG_REGNO(26),
+    TCG_REG_PV = HW_TO_TCG_REGNO(27),
+    TCG_REG_AT = HW_TO_TCG_REGNO(28),
+    TCG_REG_GP = HW_TO_TCG_REGNO(29),
+    TCG_REG_SP = HW_TO_TCG_REGNO(30),
+
+    TCG_REG_ZERO = HW_TO_TCG_REGNO(31)
+} TCGReg;
+
+/* Used for function call generation.  */
+#define TCG_REG_CALL_STACK TCG_REG_SP
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+
+/* We have signed extension instructions.  */
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_ext8s_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
+
+/* We have single-output division routines.  */
+#define TCG_TARGET_HAS_div_i32          1
+#define TCG_TARGET_HAS_div_i64          1
+
+/* We have conditional move.  */
+#define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_movcond_i64      1
+
+/* We have optimized bswap routines.  */
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_bswap16_i64      1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
+
+/* We have NOT via ORNOT.  */
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_not_i64          1
+
+/* We have some compound logical instructions.  */
+#define TCG_TARGET_HAS_andc_i32         1
+#define TCG_TARGET_HAS_andc_i64         1
+#define TCG_TARGET_HAS_orc_i32          1
+#define TCG_TARGET_HAS_orc_i64          1
+#define TCG_TARGET_HAS_eqv_i32          1
+#define TCG_TARGET_HAS_eqv_i64          1
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i32          0
+#define TCG_TARGET_HAS_nor_i64          0
+
+/* We can do better for specific cases of deposit.  */
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_deposit_i64      1
+
+#define TCG_TARGET_deposit_i32_valid(ofs, len) \
+  (((ofs) & 7) == 0 && ((len) == 8 || (len) == 16 || (len) == 32))
+
+/* The default implementations of these are fine.  */
+#define TCG_TARGET_HAS_neg_i32          0
+#define TCG_TARGET_HAS_neg_i64          0
+#define TCG_TARGET_HAS_ext8u_i32        0
+#define TCG_TARGET_HAS_ext16u_i32       0
+#define TCG_TARGET_HAS_ext8u_i64        0
+#define TCG_TARGET_HAS_ext16u_i64       0
+#define TCG_TARGET_HAS_ext32u_i64       0
+#define TCG_TARGET_HAS_rot_i32          0
+#define TCG_TARGET_HAS_rot_i64          0
+
+#define TCG_TARGET_HAS_GUEST_BASE
+
+#define TCG_AREG0 TCG_REG_S6
+
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+    __asm__ __volatile__ ("call_pal 0x86");
+}
-- 
1.7.11.4
[Prev in Thread]
Current Thread
[Next in Thread]
[Qemu-devel] [PATCH 0/3] Alpha TCG host, Richard Henderson, 2012/09/29
- [Qemu-devel] [PATCH 1/3] tcg-alpha: Fix type mismatch errors in cpu_signal_handler., Richard Henderson, 2012/09/29
  - Re: [Qemu-devel] [PATCH 1/3] tcg-alpha: Fix type mismatch errors in cpu_signal_handler., Stefan Weil, 2012/09/29
- [Qemu-devel] [PATCH 2/3] tcg-alpha: Don't use a custom linker script., Richard Henderson, 2012/09/29
- [Qemu-devel] [PATCH 3/3] tcg-alpha: New TCG target., Richard Henderson <=
Prev by Date: [Qemu-devel] [PATCH 2/3] tcg-alpha: Don't use a custom linker script.
Next by Date: Re: [Qemu-devel] [PATCH 1/3] tcg-alpha: Fix type mismatch errors in cpu_signal_handler.
Previous by thread: [Qemu-devel] [PATCH 2/3] tcg-alpha: Don't use a custom linker script.
Index(es):
- Date
- Thread