[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [patch] gcc4 host support
From: |
Paul Brook |
Subject: |
[Qemu-devel] [patch] gcc4 host support |
Date: |
Wed, 11 May 2005 22:04:09 +0100 |
User-agent: |
KMail/1.7.2 |
The attached patch adds support for gcc4 x86 and x86_64 hosts.
The main problem with gcc4 is that we can no longer force gcc to place the
return from each function at the end of the function.
My solution is to search the function for the "ret" instruction and replace
them with a jmp to the next block of code. On RISC targets this would be
easy. On x86/x86_64 it's significantly harder because instructions are
variable length. This creates two problems:
- Identify the return instruction: Dyngen traces through the code following
any branches. When generating a "ret" to exit the translation block, or a
"jmp" which will be patched at runtime we generate a privileged instruction
instead. dyngen recognises these and replaces them up with the correct value.
For simplicity we still require a single ret instruction. This is easy to
achieve with the existing FORCE_RET markers.
- Replacing the ret with a jmp: If the ret is not the last instruction we need
to replace it with a jmp to the next op. Unfortunately a jmp instruction is 2
or 5 bytes, whereas a ret is just one byte long. To do the replacement we
need to move some of the surrounding code out of the way. I've made the
FORCE_RET macro insert 4 bytes of nops. This guarantees that we always have 4
bytes we can move without having to redirect any jmps. In almost all cases
dyngen can strip these nop instructions, so they never make it into the
generated code.
The ppc target code used it's own RETURN macro. I've replaced this with the
standard FORCE_RET macro (to get the necessary nops), and changed dyngen so
that it can insert nops after each op for debugging purposes.
I've successfully booted the nbench floppy undef i386-sofmmu guest on
i686-linux, x86_64-linux and windows hosts with this patch, and verified that
arm-user emulation still works. I also compiled op.c with -freorder-block,
and it still worked.
On x86-64 the gcc4 compiled qemu runs a few percent faster than with the
redhat 3.2.3 system compiler.
On x86 I've had to hack round other bugs (gcc doesn't like doing 64-bit
arithmetic with only three 32-bit registers), so is noticably slower. These
hacks also slow down gcc3.3 by a similar amount.
Depending on the optimization options used it's also necessary to add
FORCE_RET markers to more op.c. I'll submit those separately.
Paul
Index: target-i386/op.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-i386/op.c,v
retrieving revision 1.37
diff -u -p -r1.37 op.c
--- target-i386/op.c 26 Apr 2005 20:38:17 -0000 1.37
+++ target-i386/op.c 9 May 2005 01:33:04 -0000
@@ -1008,6 +1008,7 @@ void OPPROTO op_aaa(void)
}
EAX = (EAX & ~0xffff) | al | (ah << 8);
CC_SRC = eflags;
+ FORCE_RET();
}
void OPPROTO op_aas(void)
@@ -1032,6 +1033,7 @@ void OPPROTO op_aas(void)
}
EAX = (EAX & ~0xffff) | al | (ah << 8);
CC_SRC = eflags;
+ FORCE_RET();
}
void OPPROTO op_daa(void)
@@ -1059,6 +1061,7 @@ void OPPROTO op_daa(void)
eflags |= parity_table[al]; /* pf */
eflags |= (al & 0x80); /* sf */
CC_SRC = eflags;
+ FORCE_RET();
}
void OPPROTO op_das(void)
@@ -1089,6 +1092,7 @@ void OPPROTO op_das(void)
eflags |= parity_table[al]; /* pf */
eflags |= (al & 0x80); /* sf */
CC_SRC = eflags;
+ FORCE_RET();
}
/* segment handling */
@@ -1608,6 +1612,7 @@ void OPPROTO op_flds_FT0_A0(void)
#else
FT0 = ldfl(A0);
#endif
+ FORCE_RET();
}
void OPPROTO op_fldl_FT0_A0(void)
@@ -1618,6 +1623,7 @@ void OPPROTO op_fldl_FT0_A0(void)
#else
FT0 = ldfq(A0);
#endif
+ FORCE_RET();
}
/* helpers are needed to avoid static constant reference. XXX: find a better
way */
@@ -1663,6 +1669,7 @@ void OPPROTO op_fild_FT0_A0(void)
#else
FT0 = (CPU86_LDouble)ldsw(A0);
#endif
+ FORCE_RET();
}
void OPPROTO op_fildl_FT0_A0(void)
@@ -1673,6 +1680,7 @@ void OPPROTO op_fildl_FT0_A0(void)
#else
FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
#endif
+ FORCE_RET();
}
void OPPROTO op_fildll_FT0_A0(void)
@@ -1683,6 +1691,7 @@ void OPPROTO op_fildll_FT0_A0(void)
#else
FT0 = (CPU86_LDouble)((int64_t)ldq(A0));
#endif
+ FORCE_RET();
}
#endif
@@ -2229,6 +2238,7 @@ void OPPROTO op_fldcw_A0(void)
{
env->fpuc = lduw(A0);
update_fp_status();
+ FORCE_RET();
}
void OPPROTO op_fclex(void)
Index: target-i386/ops_mem.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-i386/ops_mem.h,v
retrieving revision 1.6
diff -u -p -r1.6 ops_mem.h
--- target-i386/ops_mem.h 13 Mar 2005 09:52:09 -0000 1.6
+++ target-i386/ops_mem.h 9 May 2005 01:33:04 -0000
@@ -1,51 +1,61 @@
void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T0_A0)(void)
{
T0 = glue(ldub, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_ldsb, MEMSUFFIX), _T0_A0)(void)
{
T0 = glue(ldsb, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_lduw, MEMSUFFIX), _T0_A0)(void)
{
T0 = glue(lduw, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_ldsw, MEMSUFFIX), _T0_A0)(void)
{
T0 = glue(ldsw, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T0_A0)(void)
{
T0 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T1_A0)(void)
{
T1 = glue(ldub, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_ldsb, MEMSUFFIX), _T1_A0)(void)
{
T1 = glue(ldsb, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_lduw, MEMSUFFIX), _T1_A0)(void)
{
T1 = glue(lduw, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_ldsw, MEMSUFFIX), _T1_A0)(void)
{
T1 = glue(ldsw, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T1_A0)(void)
{
T1 = glue(ldl, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_stb, MEMSUFFIX), _T0_A0)(void)
@@ -92,6 +102,7 @@ void OPPROTO glue(glue(op_ldq, MEMSUFFIX
uint64_t *p;
p = (uint64_t *)((char *)env + PARAM1);
*p = glue(ldq, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_stq, MEMSUFFIX), _env_A0)(void)
@@ -108,6 +119,7 @@ void OPPROTO glue(glue(op_ldo, MEMSUFFIX
p = (XMMReg *)((char *)env + PARAM1);
p->XMM_Q(0) = glue(ldq, MEMSUFFIX)(A0);
p->XMM_Q(1) = glue(ldq, MEMSUFFIX)(A0 + 8);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_sto, MEMSUFFIX), _env_A0)(void)
@@ -123,21 +135,25 @@ void OPPROTO glue(glue(op_sto, MEMSUFFIX
void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T0_A0)(void)
{
T0 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T1_A0)(void)
{
T1 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T0_A0)(void)
{
T0 = glue(ldq, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T1_A0)(void)
{
T1 = glue(ldq, MEMSUFFIX)(A0);
+ FORCE_RET();
}
void OPPROTO glue(glue(op_stq, MEMSUFFIX), _T0_A0)(void)
Index: target-i386/ops_template_mem.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-i386/ops_template_mem.h,v
retrieving revision 1.5
diff -u -p -r1.5 ops_template_mem.h
--- target-i386/ops_template_mem.h 3 Mar 2005 01:14:55 -0000 1.5
+++ target-i386/ops_template_mem.h 9 May 2005 01:33:04 -0000
@@ -284,6 +284,7 @@ void OPPROTO glue(glue(op_shld, MEM_SUFF
#endif
CC_SRC = tmp;
CC_DST = T0;
+ FORCE_RET();
}
void OPPROTO glue(glue(op_shld, MEM_SUFFIX), _T0_T1_ECX_cc)(void)
@@ -326,6 +327,7 @@ void OPPROTO glue(glue(op_shrd, MEM_SUFF
#endif
CC_SRC = tmp;
CC_DST = T0;
+ FORCE_RET();
}
@@ -369,6 +371,7 @@ void OPPROTO glue(glue(op_shld, MEM_SUFF
#endif
CC_SRC = tmp;
CC_DST = T0;
+ FORCE_RET();
}
void OPPROTO glue(glue(op_shld, MEM_SUFFIX), _T0_T1_ECX_cc)(void)
@@ -407,6 +410,7 @@ void OPPROTO glue(glue(op_shrd, MEM_SUFF
#endif
CC_SRC = tmp;
CC_DST = T0;
+ FORCE_RET();
}
@@ -445,6 +449,7 @@ void OPPROTO glue(glue(op_adc, MEM_SUFFI
CC_SRC = T1;
CC_DST = T0;
CC_OP = CC_OP_ADDB + SHIFT + cf * 4;
+ FORCE_RET();
}
void OPPROTO glue(glue(op_sbb, MEM_SUFFIX), _T0_T1_cc)(void)
@@ -458,6 +463,7 @@ void OPPROTO glue(glue(op_sbb, MEM_SUFFI
CC_SRC = T1;
CC_DST = T0;
CC_OP = CC_OP_SUBB + SHIFT + cf * 4;
+ FORCE_RET();
}
void OPPROTO glue(glue(op_cmpxchg, MEM_SUFFIX), _T0_T1_EAX_cc)(void)
Index: target-ppc/exec.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-ppc/exec.h,v
retrieving revision 1.10
diff -u -p -r1.10 exec.h
--- target-ppc/exec.h 13 Mar 2005 17:01:22 -0000 1.10
+++ target-ppc/exec.h 9 May 2005 01:33:04 -0000
@@ -33,11 +33,7 @@ register uint32_t T2 asm(AREG3);
#define FT1 (env->ft1)
#define FT2 (env->ft2)
-#if defined (DEBUG_OP)
-#define RETURN() __asm__ __volatile__("nop");
-#else
-#define RETURN() __asm__ __volatile__("");
-#endif
+#define RETURN() FORCE_RET()
#include "cpu.h"
#include "exec-all.h"
Index: target-ppc/op.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-ppc/op.c,v
retrieving revision 1.16
diff -u -p -r1.16 op.c
--- target-ppc/op.c 13 Mar 2005 17:01:22 -0000 1.16
+++ target-ppc/op.c 9 May 2005 01:33:04 -0000
@@ -489,11 +489,13 @@ PPC_OP(test_ctr)
PPC_OP(test_ctr_true)
{
T0 = (regs->ctr != 0 && (T0 & PARAM(1)) != 0);
+ FORCE_RET();
}
PPC_OP(test_ctr_false)
{
T0 = (regs->ctr != 0 && (T0 & PARAM(1)) == 0);
+ FORCE_RET();
}
PPC_OP(test_ctrz)
@@ -504,11 +506,13 @@ PPC_OP(test_ctrz)
PPC_OP(test_ctrz_true)
{
T0 = (regs->ctr == 0 && (T0 & PARAM(1)) != 0);
+ FORCE_RET();
}
PPC_OP(test_ctrz_false)
{
T0 = (regs->ctr == 0 && (T0 & PARAM(1)) == 0);
+ FORCE_RET();
}
PPC_OP(test_true)
@@ -1335,9 +1339,10 @@ PPC_OP(fnabs)
}
/* fneg */
+void do_fneg (void);
PPC_OP(fneg)
{
- FT0 = -FT0;
+ do_fneg();
RETURN();
}
Index: target-ppc/op_helper.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-ppc/op_helper.c,v
retrieving revision 1.12
diff -u -p -r1.12 op_helper.c
--- target-ppc/op_helper.c 13 Mar 2005 17:01:22 -0000 1.12
+++ target-ppc/op_helper.c 9 May 2005 01:33:04 -0000
@@ -428,6 +428,11 @@ void do_fnabs (void)
FT0 = p.d;
}
+void do_fneg (void)
+{
+ FT0 = -FT0;
+}
+
/* Instruction cache invalidation helper */
#define ICACHE_LINE_SIZE 32
Index: target-sparc/exec.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-sparc/exec.h,v
retrieving revision 1.10
diff -u -p -r1.10 exec.h
--- target-sparc/exec.h 13 Feb 2005 19:02:42 -0000 1.10
+++ target-sparc/exec.h 9 May 2005 01:33:04 -0000
@@ -34,6 +34,7 @@ void set_cwp(int new_cwp);
void do_fitos(void);
void do_fitod(void);
void do_fabss(void);
+void do_fnegs(void);
void do_fsqrts(void);
void do_fsqrtd(void);
void do_fcmps(void);
Index: target-sparc/op.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-sparc/op.c,v
retrieving revision 1.15
diff -u -p -r1.15 op.c
--- target-sparc/op.c 13 Mar 2005 09:55:49 -0000 1.15
+++ target-sparc/op.c 9 May 2005 01:33:04 -0000
@@ -871,7 +871,7 @@ void OPPROTO op_flush_T0(void)
void OPPROTO op_fnegs(void)
{
- FT0 = -FT1;
+ do_fnegs();
}
void OPPROTO op_fabss(void)
Index: target-sparc/op_helper.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-sparc/op_helper.c,v
retrieving revision 1.12
diff -u -p -r1.12 op_helper.c
--- target-sparc/op_helper.c 6 Apr 2005 20:44:48 -0000 1.12
+++ target-sparc/op_helper.c 9 May 2005 01:33:04 -0000
@@ -25,6 +25,11 @@ void do_fabss(void)
FT0 = float32_abs(FT1);
}
+void do_fnegs(void)
+{
+ FT0 = float32_chs(FT1);
+}
+
void do_fsqrts(void)
{
FT0 = float32_sqrt(FT1, &env->fp_status);
Index: target-sparc/op_mem.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-sparc/op_mem.h,v
retrieving revision 1.5
diff -u -p -r1.5 op_mem.h
--- target-sparc/op_mem.h 30 Jan 2005 22:39:04 -0000 1.5
+++ target-sparc/op_mem.h 9 May 2005 01:33:04 -0000
@@ -3,12 +3,14 @@
void OPPROTO glue(glue(op_, name), MEMSUFFIX)(void) \
{ \
T1 = glue(qp, MEMSUFFIX)(T0); \
+ FORCE_RET(); \
}
#define SPARC_ST_OP(name, op) \
void OPPROTO glue(glue(op_, name), MEMSUFFIX)(void) \
{ \
glue(op, MEMSUFFIX)(T0, T1); \
+ FORCE_RET() \
}
SPARC_LD_OP(ld, ldl);
@@ -26,12 +28,14 @@ void OPPROTO glue(op_std, MEMSUFFIX)(voi
{
glue(stl, MEMSUFFIX)(T0, T1);
glue(stl, MEMSUFFIX)((T0 + 4), T2);
+ FORCE_RET();
}
void OPPROTO glue(op_ldstub, MEMSUFFIX)(void)
{
T1 = glue(ldub, MEMSUFFIX)(T0);
glue(stb, MEMSUFFIX)(T0, 0xff); /* XXX: Should be Atomically */
+ FORCE_RET();
}
void OPPROTO glue(op_swap, MEMSUFFIX)(void)
@@ -39,33 +43,39 @@ void OPPROTO glue(op_swap, MEMSUFFIX)(vo
target_ulong tmp = glue(ldl, MEMSUFFIX)(T0);
glue(stl, MEMSUFFIX)(T0, T1); /* XXX: Should be Atomically */
T1 = tmp;
+ FORCE_RET();
}
void OPPROTO glue(op_ldd, MEMSUFFIX)(void)
{
T1 = glue(ldl, MEMSUFFIX)(T0);
T0 = glue(ldl, MEMSUFFIX)((T0 + 4));
+ FORCE_RET();
}
/*** Floating-point store ***/
void OPPROTO glue(op_stf, MEMSUFFIX) (void)
{
glue(stfl, MEMSUFFIX)(T0, FT0);
+ FORCE_RET();
}
void OPPROTO glue(op_stdf, MEMSUFFIX) (void)
{
glue(stfq, MEMSUFFIX)(T0, DT0);
+ FORCE_RET();
}
/*** Floating-point load ***/
void OPPROTO glue(op_ldf, MEMSUFFIX) (void)
{
FT0 = glue(ldfl, MEMSUFFIX)(T0);
+ FORCE_RET();
}
void OPPROTO glue(op_lddf, MEMSUFFIX) (void)
{
DT0 = glue(ldfq, MEMSUFFIX)(T0);
+ FORCE_RET();
}
#undef MEMSUFFIX