qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [patch] make qemu work with GCC 4


From: Michael Matz
Subject: [Qemu-devel] [patch] make qemu work with GCC 4
Date: Tue, 28 Aug 2007 21:57:56 +0200 (CEST)

Hi,

[please keep me CCed, I'm not on this list]

the below patch let's qemu be compiled by GCC 4.2 (probably also 4.1 and 
others) for most hosts (i386,x86_64,ia64,ppc).  s390 as host is missing, 
and needs a compiler change to emit the literal store inline again, as the 
literal pool at the end fundamentally breaks the assumption that qemu can 
paste together the code snippets by patching out the return.  I have no 
HOST_{ARM,MIPS*,ALPHA,SPARC*,M68K} machines to compile for that.

It specifically changes these things:

* ppc: adds -fno-section-anchors to OP_CFLAGS, as dyngen isn't prepared
       to deal with the relocs resulting from using section anchors
* ppc: on target-alpha op_reset_FT GCC4 uses a floating point constant 0.0
       to reset the ft regs, which in turn is loaded from the data 
       section.  The reloc for that is unhandled.  Using -ffast-math would 
       work around this, but I chose to be conservative and change only
       the op.c snippet in question.  See the comment there.
* i386: well, most of you will know that GCC4 doesn't compile qemu because 
       of reload.  The inherent problem is, that qemu uses 64bit
       entities in some places (sometimes structs), which GCC (4.x) 
       manages to place in registers, i.e. needs 2 hardregs.  But it 
       sometimes just so happens that an instruction needing such DImode
       reg also has a memory operand with an indexed address (reg plus 
       reg), hence two hardregs more.  But qemu by default leaves just 
       three free registers for compiling op.c --> boom.  This is somewhat 
       hard to work around in GCC (trust me :) ).

       I solved that by placing one of the T[012] operands into memory
       for HOST_I386, thereby freeing one reg.  Here's some justification 
       of why that doesn't really cost performance: with three free regs
       GCC is already spilling like mad in the snippets, we just trade one
       of those memory accesses (to stack) with one other mem access to 
       the cpu_state structure, which will be in cache.

       Additionally I made sure that I put the least used Tx global into 
       memory.  I haven't done much performance measurements but noticed 
       no glaring problems.

       Two more obvious changes in an inline asm in softmmu_header.h were 
       necessary too.

A qemu with that patch was tested on HOST_I386 with all images from the 
qemu homepage (i.e. arm, mips, mipsel, coldfire (that doesn't work) and 
sparc), some i386 boot isos and a freedos image.  It also was tested on 
HOST_X86_64 installing openSUSE beta-something for i386 and x86_64.  I 
haven't yet tested it on ia64 or ppc hosts.

The whole patch is against a 0.9.0-cvs version from 2007-07-09 (Alex might 
know the exact checkout date), so chances are that it still applies :)


Ciao,
Michael.

diff -urp qemu-0.9.0.cvs.orig/softmmu_header.h qemu-0.9.0.cvs/softmmu_header.h
--- qemu-0.9.0.cvs.orig/softmmu_header.h        2007-08-21 18:58:00.000000000 
+0200
+++ qemu-0.9.0.cvs/softmmu_header.h     2007-08-21 20:40:23.000000000 +0200
@@ -254,14 +254,18 @@ static inline void glue(glue(st, SUFFIX)
                   : "r" (ptr), 
 /* NOTE: 'q' would be needed as constraint, but we could not use it
    with T1 ! */
+#if DATA_SIZE == 1 || DATA_SIZE == 2
+                 "q" (v),
+#else
                   "r" (v), 
+#endif
                   "i" ((CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS), 
                   "i" (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), 
                   "i" (TARGET_PAGE_MASK | (DATA_SIZE - 1)),
                   "m" (*(uint32_t *)offsetof(CPUState, 
tlb_table[CPU_MEM_INDEX][0].addr_write)),
                   "i" (CPU_MEM_INDEX),
                   "m" (*(uint8_t *)&glue(glue(__st, SUFFIX), MMUSUFFIX))
-                  : "%eax", "%ecx", "%edx", "memory", "cc");
+                  : "%eax", "%edx", "memory", "cc");
 }
 
 #else
diff -urp qemu-0.9.0.cvs.orig/target-alpha/cpu.h 
qemu-0.9.0.cvs/target-alpha/cpu.h
--- qemu-0.9.0.cvs.orig/target-alpha/cpu.h      2007-06-03 23:02:37.000000000 
+0200
+++ qemu-0.9.0.cvs/target-alpha/cpu.h   2007-08-22 00:54:15.000000000 +0200
@@ -278,6 +278,8 @@ struct CPUAlphaState {
      * used to emulate 64 bits target on 32 bits hosts
      */ 
     target_ulong t0, t1, t2;
+#elif defined(HOST_I386)
+    target_ulong t2;
 #endif
     /* */
     double ft0, ft1, ft2;
diff -urp qemu-0.9.0.cvs.orig/target-alpha/exec.h 
qemu-0.9.0.cvs/target-alpha/exec.h
--- qemu-0.9.0.cvs.orig/target-alpha/exec.h     2007-06-03 19:44:36.000000000 
+0200
+++ qemu-0.9.0.cvs/target-alpha/exec.h  2007-08-21 21:28:58.000000000 +0200
@@ -40,7 +40,11 @@ register struct CPUAlphaState *env asm(A
 
 register uint64_t T0 asm(AREG1);
 register uint64_t T1 asm(AREG2);
+#ifndef HOST_I386
 register uint64_t T2 asm(AREG3);
+#else
+#define T2 (env->t2)
+#endif
 
 #endif /* TARGET_LONG_BITS > HOST_LONG_BITS */
 
diff -urp qemu-0.9.0.cvs.orig/target-arm/cpu.h qemu-0.9.0.cvs/target-arm/cpu.h
--- qemu-0.9.0.cvs.orig/target-arm/cpu.h        2007-06-24 14:09:48.000000000 
+0200
+++ qemu-0.9.0.cvs/target-arm/cpu.h     2007-08-21 21:38:36.000000000 +0200
@@ -52,6 +52,9 @@ typedef uint32_t ARMReadCPFunc(void *opa
  */
 
 typedef struct CPUARMState {
+#if defined(HOST_I386)
+    uint32_t t1;
+#endif
     /* Regs for current mode.  */
     uint32_t regs[16];
     /* Frequently accessed CPSR bits are stored separately for efficiently.
diff -urp qemu-0.9.0.cvs.orig/target-arm/exec.h qemu-0.9.0.cvs/target-arm/exec.h
--- qemu-0.9.0.cvs.orig/target-arm/exec.h       2007-06-03 19:44:36.000000000 
+0200
+++ qemu-0.9.0.cvs/target-arm/exec.h    2007-08-21 21:48:48.000000000 +0200
@@ -23,7 +23,12 @@
 register struct CPUARMState *env asm(AREG0);
 register uint32_t T0 asm(AREG1);
 register uint32_t T1 asm(AREG2);
+#ifndef HOST_I386
 register uint32_t T2 asm(AREG3);
+#else
+#define T2 (env->t1)
+#endif
+
 
 /* TODO: Put these in FP regs on targets that have such things.  */
 /* It is ok for FT0s and FT0d to overlap.  Likewise FT1s and FT1d.  */
diff -urp qemu-0.9.0.cvs.orig/target-i386/cpu.h qemu-0.9.0.cvs/target-i386/cpu.h
--- qemu-0.9.0.cvs.orig/target-i386/cpu.h       2007-06-03 23:02:37.000000000 
+0200
+++ qemu-0.9.0.cvs/target-i386/cpu.h    2007-08-21 21:16:47.000000000 +0200
@@ -427,6 +427,8 @@ typedef struct CPUX86State {
 #if TARGET_LONG_BITS > HOST_LONG_BITS
     /* temporaries if we cannot store them in host registers */
     target_ulong t0, t1, t2;
+#elif defined(HOST_I386)
+    target_ulong t1;
 #endif
 
     /* standard registers */
diff -urp qemu-0.9.0.cvs.orig/target-i386/exec.h 
qemu-0.9.0.cvs/target-i386/exec.h
--- qemu-0.9.0.cvs.orig/target-i386/exec.h      2007-06-26 10:35:18.000000000 
+0200
+++ qemu-0.9.0.cvs/target-i386/exec.h   2007-08-21 21:30:40.000000000 +0200
@@ -44,7 +44,11 @@ register struct CPUX86State *env asm(ARE
 /* XXX: use unsigned long instead of target_ulong - better code will
    be generated for 64 bit CPUs */
 register target_ulong T0 asm(AREG1);
+#ifndef HOST_I386
 register target_ulong T1 asm(AREG2);
+#else
+#define T1 (env->t1)
+#endif
 register target_ulong T2 asm(AREG3);
 
 /* if more registers are available, we define some registers too */
diff -urp qemu-0.9.0.cvs.orig/target-mips/cpu.h qemu-0.9.0.cvs/target-mips/cpu.h
--- qemu-0.9.0.cvs.orig/target-mips/cpu.h       2007-06-23 20:04:11.000000000 
+0200
+++ qemu-0.9.0.cvs/target-mips/cpu.h    2007-08-21 21:17:23.000000000 +0200
@@ -60,7 +60,10 @@ struct CPUMIPSState {
     target_ulong t0;
     target_ulong t1;
     target_ulong t2;
+#elif defined(HOST_I386)
+    target_ulong t1;
 #endif
+    
     target_ulong HI, LO;
     /* Floating point registers */
     fpr_t fpr[32];
diff -urp qemu-0.9.0.cvs.orig/target-mips/exec.h 
qemu-0.9.0.cvs/target-mips/exec.h
--- qemu-0.9.0.cvs.orig/target-mips/exec.h      2007-06-03 19:44:36.000000000 
+0200
+++ qemu-0.9.0.cvs/target-mips/exec.h   2007-08-21 21:37:06.000000000 +0200
@@ -17,7 +17,11 @@ register struct CPUMIPSState *env asm(AR
 #else
 register target_ulong T0 asm(AREG1);
 register target_ulong T1 asm(AREG2);
+#ifndef HOST_I386
 register target_ulong T2 asm(AREG3);
+#else
+#define T2 (env->t1)
+#endif
 #endif
 
 #if defined (USE_HOST_FLOAT_REGS)
diff -urp qemu-0.9.0.cvs.orig/target-ppc/cpu.h qemu-0.9.0.cvs/target-ppc/cpu.h
--- qemu-0.9.0.cvs.orig/target-ppc/cpu.h        2007-06-23 18:02:43.000000000 
+0200
+++ qemu-0.9.0.cvs/target-ppc/cpu.h     2007-08-21 21:19:57.000000000 +0200
@@ -694,6 +694,8 @@ struct CPUPPCState {
      * used to emulate 64 bits target on 32 bits hosts
      */ 
     target_ulong t0, t1, t2;
+#elif defined(HOST_I386)
+    target_ulong t1;
 #endif
     ppc_avr_t t0_avr, t1_avr, t2_avr;
 
diff -urp qemu-0.9.0.cvs.orig/target-ppc/exec.h qemu-0.9.0.cvs/target-ppc/exec.h
--- qemu-0.9.0.cvs.orig/target-ppc/exec.h       2007-06-03 19:44:36.000000000 
+0200
+++ qemu-0.9.0.cvs/target-ppc/exec.h    2007-08-21 21:41:41.000000000 +0200
@@ -40,7 +40,11 @@ register struct CPUPPCState *env asm(ARE
 #else
 register unsigned long T0 asm(AREG1);
 register unsigned long T1 asm(AREG2);
+#ifndef HOST_I386
 register unsigned long T2 asm(AREG3);
+#else
+#define T2 (env->t1)
+#endif
 #endif
 /* We may, sometime, need 64 bits registers on 32 bits target */
 #if defined(TARGET_PPC64) || defined(TARGET_PPCEMB) || (HOST_LONG_BITS == 64)
diff -urp qemu-0.9.0.cvs.orig/target-sparc/exec.h 
qemu-0.9.0.cvs/target-sparc/exec.h
--- qemu-0.9.0.cvs.orig/target-sparc/exec.h     2007-06-03 19:44:37.000000000 
+0200
+++ qemu-0.9.0.cvs/target-sparc/exec.h  2007-08-21 21:26:38.000000000 +0200
@@ -32,9 +32,13 @@ register uint32_t T2 asm(AREG4);
 
 #else
 #define REGWPTR env->regwptr
+#ifndef HOST_I386
 register uint32_t T2 asm(AREG3);
-#endif
 #define reg_T2
+#else
+#define T2 (env->t2)
+#endif
+#endif
 #endif
 
 #define FT0 (env->ft0)
diff -urp qemu-0.9.0.cvs.orig/target-z80/cpu.h qemu-0.9.0.cvs/target-z80/cpu.h
--- qemu-0.9.0.cvs.orig/target-z80/cpu.h        2007-08-21 18:58:00.000000000 
+0200
+++ qemu-0.9.0.cvs/target-z80/cpu.h     2007-08-21 21:14:37.000000000 +0200
@@ -147,6 +147,8 @@ typedef struct CPUZ80State {
 #if TARGET_LONG_BITS > HOST_LONG_BITS
     /* temporaries if we cannot store them in host registers */
     target_ulong t0, t1, t2;
+#elif defined(HOST_I386)
+    target_ulong t1;
 #endif
 
     /* Z80 registers */
diff -urp qemu-0.9.0.cvs.orig/target-z80/exec.h qemu-0.9.0.cvs/target-z80/exec.h
--- qemu-0.9.0.cvs.orig/target-z80/exec.h       2007-08-21 18:58:00.000000000 
+0200
+++ qemu-0.9.0.cvs/target-z80/exec.h    2007-08-21 21:13:47.000000000 +0200
@@ -40,7 +40,11 @@ register struct CPUZ80State *env asm(ARE
 /* XXX: use unsigned long instead of target_ulong - better code will
    be generated for 64 bit CPUs */
 register target_ulong T0 asm(AREG1);
+#ifdef HOST_I386
+#define T1 (env->t1)
+#else
 register target_ulong T1 asm(AREG2);
+#endif
 register target_ulong T2 asm(AREG3);
 
 /* if more registers are available, we define some registers too */
--- qemu-0.9.0.cvs.orig/Makefile.target 2007-08-21 18:58:01.000000000 +0200
+++ qemu-0.9.0.cvs/Makefile.target      2007-08-22 02:02:55.000000000 +0200
@@ -127,6 +127,7 @@ endif
 
 ifeq ($(ARCH),ppc)
 CPPFLAGS+= -D__powerpc__
+OP_CFLAGS+= -fno-section-anchors
 ifdef CONFIG_LINUX_USER
 BASE_LDFLAGS+=-Wl,-T,$(SRC_PATH)/$(ARCH).ld
 endif
--- qemu-0.9.0.cvs/target-alpha/op_template.h.mm        2007-08-22 
03:17:57.000000000 +0000
+++ qemu-0.9.0.cvs/target-alpha/op_template.h   2007-08-22 03:15:49.000000000 
+0000
@@ -28,7 +28,26 @@ void OPPROTO glue(op_reset_T, REG) (void
 
 void OPPROTO glue(op_reset_FT, REG) (void)
 {
+#ifdef HOST_PPC
+    /* We have a problem with HOST_PPC here:
+       We want this code:
+         glue(FT, REG) = 0;
+       unfortunately GCC4 notices that this stores (double)0.0 into
+       env->ft0 and emits that constant into the .rodata, and instructions
+       to load that zero from there.  But that construct can't be parsed by 
dyngen.
+       We could add -ffast-math for compiling op.c, that would just make it 
generate
+       two stores of zeros into both words of ft0.  But -ffast-math may have 
other
+       side-effects regarding the emulation.  We could use __builtin_memset,
+       which perhaps would be the sanest.  That relies on -O2 and our other 
options
+       to inline that memset, which currently it does, but who knows for how 
long.
+       So, we simply do that by hand, and a barely typesafe way :-/  */
+    union baeh { double d; unsigned int i[2];};
+    union baeh *p = (union baeh*)&(glue(FT, REG));
+    p->i[0] = 0;
+    p->i[1] = 0;
+#else
     glue(FT, REG) = 0;
+#endif
     RETURN();
 }
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]