[Qemu-devel] [patch] Arm VFP

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [patch] Arm VFP

From:	Paul Brook
Subject:	[Qemu-devel] [patch] Arm VFP
Date:	Fri, 11 Feb 2005 01:17:41 +0000
User-agent:	KMail/1.7.2

The attached patch adds target support for the Arm VFP (Vector Floating Point) 
coprocessor.

The vector bits of this chip are kinda interesting, but other than that it's 
fairly straightforward.

I've successfully run nbench compiled for vfp, and got a fairly significant 
speedup.

Paul

Index: Makefile.target
===================================================================
RCS file: /cvsroot/qemu/qemu/Makefile.target,v
retrieving revision 1.57
diff -u -p -r1.57 Makefile.target
--- Makefile.target     10 Feb 2005 21:48:51 -0000      1.57
+++ Makefile.target     11 Feb 2005 00:27:39 -0000
@@ -259,6 +259,10 @@ ifeq ($(TARGET_BASE_ARCH), sparc)
 LIBOBJS+= op_helper.o helper.o
 endif
 
+ifeq ($(TARGET_BASE_ARCH), arm)
+LIBOBJS+= op_helper.o
+endif
+
 # NOTE: the disassembler code is only needed for debugging
 LIBOBJS+=disas.o 
 ifeq ($(findstring i386, $(TARGET_ARCH) $(ARCH)),i386)
Index: cpu-exec.c
===================================================================
RCS file: /cvsroot/qemu/qemu/cpu-exec.c,v
retrieving revision 1.48
diff -u -p -r1.48 cpu-exec.c
--- cpu-exec.c  10 Feb 2005 22:04:41 -0000      1.48
+++ cpu-exec.c  11 Feb 2005 00:27:40 -0000
@@ -347,7 +347,8 @@ int cpu_exec(CPUState *env1)
                 cs_base = env->segs[R_CS].base;
                 pc = cs_base + env->eip;
 #elif defined(TARGET_ARM)
-                flags = env->thumb;
+                flags = env->thumb | (env->vfp.vec_len << 1)
+                        | (env->vfp.vec_stride << 4);
                 cs_base = 0;
                 pc = env->regs[15];
 #elif defined(TARGET_SPARC)
@@ -620,6 +621,7 @@ int cpu_exec(CPUState *env1)
 #endif
 #elif defined(TARGET_ARM)
     env->cpsr = compute_cpsr();
+    /* XXX: Save/restore host fpu exception state?.  */
 #elif defined(TARGET_SPARC)
 #elif defined(TARGET_PPC)
 #else
Index: target-arm/cpu.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-arm/cpu.h,v
retrieving revision 1.6
diff -u -p -r1.6 cpu.h
--- target-arm/cpu.h    7 Feb 2005 23:10:07 -0000       1.6
+++ target-arm/cpu.h    11 Feb 2005 00:27:42 -0000
@@ -29,6 +29,14 @@
 #define EXCP_PREFETCH_ABORT  3
 #define EXCP_DATA_ABORT      4
 
+/* We currently assume float and double are IEEE single and double
+   precision respectively.
+   Doing runtime conversions is tricky because VFP registers may contain
+   integer values (eg. as the result of a FTOSI instruction).
+   A double precision register load/store must also load/store the
+   corresponding single precision pair, although it is undefined how
+   these overlap.  */
+
 typedef struct CPUARMState {
     uint32_t regs[16];
     uint32_t cpsr;
@@ -50,6 +58,7 @@ typedef struct CPUARMState {
     int interrupt_request;
     struct TranslationBlock *current_tb;
     int user_mode_only;
+    uint32_t address;
 
     /* in order to avoid passing too many arguments to the memory
        write helpers, we store some rarely used information in the CPU
@@ -58,6 +67,25 @@ typedef struct CPUARMState {
                                    written */
     unsigned long mem_write_vaddr; /* target virtual addr at which the
                                       memory was written */
+    /* VFP coprocessor state.  */
+    struct {
+        union {
+            float s[32];
+            double d[16];
+        } regs;
+
+        /* We store these fpcsr fields separately for convenience.  */
+        int vec_len;
+        int vec_stride;
+
+        uint32_t fpscr;
+
+        /* Temporary variables if we don't have spare fp regs.  */
+        float tmp0s, tmp1s;
+        double tmp0d, tmp1d;
+
+    } vfp;
+
     /* user data */
     void *opaque;
 } CPUARMState;
Index: target-arm/exec.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-arm/exec.h,v
retrieving revision 1.4
diff -u -p -r1.4 exec.h
--- target-arm/exec.h   7 Feb 2005 23:10:07 -0000       1.4
+++ target-arm/exec.h   11 Feb 2005 00:27:42 -0000
@@ -24,13 +24,16 @@ register uint32_t T0 asm(AREG1);
 register uint32_t T1 asm(AREG2);
 register uint32_t T2 asm(AREG3);
 
+/* TODO: Put these in FP regs on targets that have such things.  */
+/* It is ok for FT0s and FT0d to overlap.  Likewise FT1s and FT1d.  */
+#define FT0s env->vfp.tmp0s
+#define FT1s env->vfp.tmp1s
+#define FT0d env->vfp.tmp0d
+#define FT1d env->vfp.tmp1d
+
 #include "cpu.h"
 #include "exec-all.h"
 
-void cpu_lock(void);
-void cpu_unlock(void);
-void cpu_loop_exit(void);
-
 /* Implemented CPSR bits.  */
 #define CACHED_CPSR_BITS 0xf8000000
 static inline int compute_cpsr(void)
@@ -51,3 +54,24 @@ static inline void regs_to_env(void)
 
 int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address, int rw,
                               int is_user, int is_softmmu);
+
+/* In op_helper.c */
+
+void cpu_lock(void);
+void cpu_unlock(void);
+void cpu_loop_exit(void);
+
+void raise_exception(int);
+
+void do_vfp_abss(void);
+void do_vfp_absd(void);
+void do_vfp_negs(void);
+void do_vfp_negd(void);
+void do_vfp_sqrts(void);
+void do_vfp_sqrtd(void);
+void do_vfp_cmps(void);
+void do_vfp_cmpd(void);
+void do_vfp_cmpes(void);
+void do_vfp_cmped(void);
+void do_vfp_set_fpscr(void);
+void do_vfp_get_fpscr(void);
Index: target-arm/op.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-arm/op.c,v
retrieving revision 1.8
diff -u -p -r1.8 op.c
--- target-arm/op.c     7 Feb 2005 12:42:35 -0000       1.8
+++ target-arm/op.c     11 Feb 2005 00:27:43 -0000
@@ -1,7 +1,7 @@
 /*
  *  ARM micro operations
  * 
- *  Copyright (c) 2003 Fabrice Bellard
+ *  Copyright (c) 2003-2005 Fabrice Bellard, Paul Brook.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -857,17 +857,284 @@ void OPPROTO op_undef_insn(void)
     cpu_loop_exit();
 }
 
-/* thread support */
+/* VFP support.  We follow the convention used for VFP instrunctions:
+   Single precition routines have a "s" suffix, double precision a
+   "d" suffix.  */
 
-spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+#define VFP_OP(name, p) void OPPROTO op_vfp_##name##p(void)
 
-void cpu_lock(void)
+#define VFP_BINOP(name, op) \
+VFP_OP(name, s)             \
+{                           \
+    FT0s = FT0s op FT1s;    \
+}                           \
+VFP_OP(name, d)             \
+{                           \
+    FT0d = FT0d op FT1d;    \
+}
+VFP_BINOP(add, +)
+VFP_BINOP(sub, -)
+VFP_BINOP(mul, *)
+VFP_BINOP(div, /)
+#undef VFP_BINOP
+
+#define VFP_HELPER(name)  \
+VFP_OP(name, s)           \
+{                         \
+    do_vfp_##name##s();    \
+}                         \
+VFP_OP(name, d)           \
+{                         \
+    do_vfp_##name##d();    \
+}
+VFP_HELPER(abs)
+VFP_HELPER(sqrt)
+VFP_HELPER(cmp)
+VFP_HELPER(cmpe)
+#undef VFP_HELPER
+
+/* XXX: Will this do the right thing for NANs.  Should invert the signbit
+   without looking at the rest of the value.  */
+VFP_OP(neg, s)
+{
+    FT0s = -FT0s;
+}
+
+VFP_OP(neg, d)
+{
+    FT0d = -FT0d;
+}
+
+VFP_OP(F1_ld0, s)
+{
+    FT1s = 0.0f;
+}
+
+VFP_OP(F1_ld0, d)
+{
+    FT1d = 0.0;
+}
+
+/* Helper routines to perform bitwise copies between float and int.  */
+static inline float vfp_itos(uint32_t i)
+{
+    union {
+        uint32_t i;
+        float s;
+    } v;
+
+    v.i = i;
+    return v.s;
+}
+
+static inline uint32_t vfp_stoi(float s)
+{
+    union {
+        uint32_t i;
+        float s;
+    } v;
+
+    v.s = s;
+    return v.i;
+}
+
+/* Integer to float conversion.  */
+VFP_OP(uito, s)
+{
+    FT0s = (float)(uint32_t)vfp_stoi(FT0s);
+}
+
+VFP_OP(uito, d)
 {
-    spin_lock(&global_cpu_lock);
+    FT0d = (double)(uint32_t)vfp_stoi(FT0s);
 }
 
-void cpu_unlock(void)
+VFP_OP(sito, s)
 {
-    spin_unlock(&global_cpu_lock);
+    FT0s = (float)(int32_t)vfp_stoi(FT0s);
 }
 
+VFP_OP(sito, d)
+{
+    FT0d = (double)(int32_t)vfp_stoi(FT0s);
+}
+
+/* Float to integer conversion.  */
+VFP_OP(toui, s)
+{
+    FT0s = vfp_itos((uint32_t)FT0s);
+}
+
+VFP_OP(toui, d)
+{
+    FT0s = vfp_itos((uint32_t)FT0d);
+}
+
+VFP_OP(tosi, s)
+{
+    FT0s = vfp_itos((int32_t)FT0s);
+}
+
+VFP_OP(tosi, d)
+{
+    FT0s = vfp_itos((int32_t)FT0d);
+}
+
+/* TODO: Set rounding mode properly.  */
+VFP_OP(touiz, s)
+{
+    FT0s = vfp_itos((uint32_t)FT0s);
+}
+
+VFP_OP(touiz, d)
+{
+    FT0s = vfp_itos((uint32_t)FT0d);
+}
+
+VFP_OP(tosiz, s)
+{
+    FT0s = vfp_itos((int32_t)FT0s);
+}
+
+VFP_OP(tosiz, d)
+{
+    FT0s = vfp_itos((int32_t)FT0d);
+}
+
+/* floating point conversion */
+VFP_OP(fcvtd, s)
+{
+    FT0d = (double)FT0s;
+}
+
+VFP_OP(fcvts, d)
+{
+    FT0s = (float)FT0d;
+}
+
+/* VFP register transfer */
+#define VFP_GET(t, p, n)                    \
+void OPPROTO op_vfp_mov_F##t##_##p##n(void) \
+{                                           \
+    FT##t##p = env->vfp.regs.p[n];          \
+}
+
+#define VFP_SET(t, p, n)                    \
+void OPPROTO op_vfp_mov_##p##n##_F##t(void) \
+{                                           \
+    env->vfp.regs.p[n] = FT##t##p;          \
+}
+
+#define VFP_GET_SET_s(n)  \
+    VFP_SET(0, s, n)      \
+    VFP_GET(0, s, n)      \
+    VFP_GET(1, s, n)
+
+#define VFP_GET_SET_BOTH(n) \
+    VFP_GET_SET_s(n)        \
+    VFP_SET(0, d, n)        \
+    VFP_GET(0, d, n)        \
+    VFP_GET(1, d, n)
+
+VFP_GET_SET_BOTH(0)
+VFP_GET_SET_BOTH(1)
+VFP_GET_SET_BOTH(2)
+VFP_GET_SET_BOTH(3)
+VFP_GET_SET_BOTH(4)
+VFP_GET_SET_BOTH(5)
+VFP_GET_SET_BOTH(6)
+VFP_GET_SET_BOTH(7)
+VFP_GET_SET_BOTH(8)
+VFP_GET_SET_BOTH(9)
+VFP_GET_SET_BOTH(10)
+VFP_GET_SET_BOTH(11)
+VFP_GET_SET_BOTH(12)
+VFP_GET_SET_BOTH(13)
+VFP_GET_SET_BOTH(14)
+VFP_GET_SET_BOTH(15)
+VFP_GET_SET_s(16)
+VFP_GET_SET_s(17)
+VFP_GET_SET_s(18)
+VFP_GET_SET_s(19)
+VFP_GET_SET_s(20)
+VFP_GET_SET_s(21)
+VFP_GET_SET_s(22)
+VFP_GET_SET_s(23)
+VFP_GET_SET_s(24)
+VFP_GET_SET_s(25)
+VFP_GET_SET_s(26)
+VFP_GET_SET_s(27)
+VFP_GET_SET_s(28)
+VFP_GET_SET_s(29)
+VFP_GET_SET_s(30)
+VFP_GET_SET_s(31)
+#undef VFP_GET
+#undef VFP_SET
+#undef VFP_GET_SET_BOTH
+#undef VFP_GET_SET_s
+
+void OPPROTO op_vfp_movl_T0_fpscr(void)
+{
+    do_vfp_get_fpscr ();
+}
+
+void OPPROTO op_vfp_movl_T0_fpscr_flags(void)
+{
+    T0 = env->vfp.fpscr & (0xf << 28);
+}
+
+void OPPROTO op_vfp_movl_fpscr_T0(void)
+{
+    do_vfp_set_fpscr();
+}
+
+/* Move between FT0s to T0  */
+void OPPROTO op_vfp_mrs(void)
+{
+    T0 = vfp_stoi(FT0s);
+}
+
+void OPPROTO op_vfp_msr(void)
+{
+    FT0s = vfp_itos(T0);
+}
+
+/* Move between FT0d and {T0,T1} */
+void OPPROTO op_vfp_mrrd(void)
+{
+    CPU_DoubleU u;
+    
+    u.d = FT0d;
+    T0 = u.l.lower;
+    T1 = u.l.upper;
+}
+
+void OPPROTO op_vfp_mdrr(void)
+{
+    CPU_DoubleU u;
+    
+    u.l.lower = T0;
+    u.l.upper = T1;
+    FT0d = u.d;
+}
+
+/* Floating point load/store.  Address is in T1 */
+void OPPROTO op_vfp_lds(void)
+{
+    FT0s = ldfl((void *)T1);
+}
+
+void OPPROTO op_vfp_ldd(void)
+{
+    FT0d = ldfq((void *)T1);
+}
+
+void OPPROTO op_vfp_sts(void)
+{
+    stfl((void *)T1, FT0s);
+}
+
+void OPPROTO op_vfp_std(void)
+{
+    stfq((void *)T1, FT0d);
+}
Index: target-arm/op_helper.c
===================================================================
RCS file: target-arm/op_helper.c
diff -N target-arm/op_helper.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ target-arm/op_helper.c      11 Feb 2005 00:27:43 -0000
@@ -0,0 +1,229 @@
+/*
+ *  ARM helper routines
+ * 
+ *  Copyright (c) 2005 Paul Brook
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <math.h>
+#include <fenv.h>
+#include "exec.h"
+
+/* If the host doesn't define C99 math intrinsics then use the normal
+   operators.  This may generate excess exceptions, but it's probably
+   near enough for most things.  */
+#ifndef isless
+#define isless(x, y) (x < y)
+#endif
+#ifndef isgreater
+#define isgreater(x, y) (x > y)
+#endif
+#ifndef isunordered
+#define isunordered(x, y) (!((x < y) || (x >= y)))
+#endif
+
+void raise_exception(int tt)
+{
+    env->exception_index = tt;
+    cpu_loop_exit();
+}
+
+/* thread support */
+
+spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+
+void cpu_lock(void)
+{
+    spin_lock(&global_cpu_lock);
+}
+
+void cpu_unlock(void)
+{
+    spin_unlock(&global_cpu_lock);
+}
+
+/* VFP support.  */
+
+void do_vfp_abss(void)
+{
+  FT0s = fabsf(FT0s);
+}
+
+void do_vfp_absd(void)
+{
+  FT0d = fabs(FT0d);
+}
+
+void do_vfp_sqrts(void)
+{
+  FT0s = sqrtf(FT0s);
+}
+
+void do_vfp_sqrtd(void)
+{
+  FT0d = sqrt(FT0d);
+}
+
+/* We use an == operator first to generate teh correct floating point
+   exception.  Subsequent comparisons use the exception-safe macros.  */
+#define DO_VFP_cmp(p)                     \
+void do_vfp_cmp##p(void)                  \
+{                                         \
+    uint32_t flags;                       \
+    if (FT0##p == FT1##p)                 \
+        flags = 0xc;                      \
+    else if (isless (FT0##p, FT1##p))     \
+        flags = 0x8;                      \
+    else if (isgreater (FT0##p, FT1##p))  \
+        flags = 0x2;                      \
+    else /* unordered */                  \
+        flags = 0x3;                      \
+    env->vfp.fpscr = (flags << 28) | (env->vfp.fpscr & 0x0fffffff); \
+    FORCE_RET();                          \
+}
+DO_VFP_cmp(s)
+DO_VFP_cmp(d)
+#undef DO_VFP_cmp
+
+/* We use a > operator first to get FP exceptions right.  */
+#define DO_VFP_cmpe(p)                      \
+void do_vfp_cmpe##p(void)                   \
+{                                           \
+    uint32_t flags;                         \
+    if (FT0##p > FT1##p)                    \
+        flags = 0x2;                        \
+    else if (isless (FT0##p, FT1##p))       \
+        flags = 0x8;                        \
+    else if (isunordered (FT0##p, FT1##p))  \
+        flags = 0x3;                        \
+    else /* equal */                        \
+        flags = 0xc;                        \
+    env->vfp.fpscr = (flags << 28) | (env->vfp.fpscr & 0x0fffffff); \
+    FORCE_RET();                            \
+}
+DO_VFP_cmpe(s)
+DO_VFP_cmpe(d)
+#undef DO_VFP_cmpe
+
+/* Convert host exception flags to vfp form.  */
+int vfp_exceptbits_from_host(int host_bits)
+{
+    int target_bits = 0;
+
+#ifdef FE_INVALID
+    if (host_bits & FE_INVALID)
+        target_bits |= 1;
+#endif
+#ifdef FE_DIVBYZERO
+    if (host_bits & FE_DIVBYZERO)
+        target_bits |= 2;
+#endif
+#ifdef FE_OVERFLOW
+    if (host_bits & FE_OVERFLOW)
+        target_bits |= 4;
+#endif
+#ifdef FE_UNDERFLOW
+    if (host_bits & FE_UNDERFLOW)
+        target_bits |= 8;
+#endif
+#ifdef FE_INEXACT
+    if (host_bits & FE_INEXACT)
+        target_bits |= 0x10;
+#endif
+    /* C doesn't define an inexact exception.  */
+    return target_bits;
+}
+
+/* Convert vfp exception flags to target form.  */
+int vfp_host_exceptbits_to_host(int target_bits)
+{
+    int host_bits = 0;
+
+#ifdef FE_INVALID
+    if (target_bits & 1)
+        host_bits |= FE_INVALID;
+#endif
+#ifdef FE_DIVBYZERO
+    if (target_bits & 2)
+        host_bits |= FE_DIVBYZERO;
+#endif
+#ifdef FE_OVERFLOW
+    if (target_bits & 4)
+        host_bits |= FE_OVERFLOW;
+#endif
+#ifdef FE_UNDERFLOW
+    if (target_bits & 8)
+        host_bits |= FE_UNDERFLOW;
+#endif
+#ifdef FE_INEXACT
+    if (target_bits & 0x10)
+        host_bits |= FE_INEXACT;
+#endif
+    return host_bits;
+}
+
+void do_vfp_set_fpscr(void)
+{
+    int i;
+    uint32_t changed;
+
+    changed = env->vfp.fpscr;
+    env->vfp.fpscr = (T0 & 0xffc8ffff);
+    env->vfp.vec_len = (T0 >> 16) & 7;
+    env->vfp.vec_stride = (T0 >> 20) & 3;
+
+    changed ^= T0;
+    if (changed & (3 << 22)) {
+        i = (T0 >> 22) & 3;
+        switch (i) {
+        case 0:
+            i = FE_TONEAREST;
+            break;
+        case 1:
+            i = FE_UPWARD;
+            break;
+        case 2:
+            i = FE_DOWNWARD;
+            break;
+        case 3:
+            i = FE_TOWARDZERO;
+            break;
+        }
+        fesetround (i);
+    }
+
+    /* Clear host exception flags.  */
+    feclearexcept(FE_ALL_EXCEPT);
+
+#ifdef feenableexcept
+    if (changed & 0x1f00) {
+        i = vfp_exceptbits_to_host((T0 >> 8) & 0x1f);
+        feenableexcept (i);
+        fedisableexcept (FE_ALL_EXCEPT & ~i);
+    }
+#endif
+    /* XXX: FZ and DN are not implemented.  */
+}
+
+void do_vfp_get_fpscr(void)
+{
+    int i;
+
+    T0 = (env->vfp.fpscr & 0xffc8ffff) | (env->vfp.vec_len << 16)
+          | (env->vfp.vec_stride << 20);
+    i = fetestexcept(FE_ALL_EXCEPT);
+    T0 |= vfp_exceptbits_from_host(i);
+}
Index: target-arm/translate.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-arm/translate.c,v
retrieving revision 1.17
diff -u -p -r1.17 translate.c
--- target-arm/translate.c      7 Feb 2005 23:10:07 -0000       1.17
+++ target-arm/translate.c      11 Feb 2005 00:27:43 -0000
@@ -1,7 +1,7 @@
 /*
  *  ARM translation
  * 
- *  Copyright (c) 2003 Fabrice Bellard
+ *  Copyright (c) 2003-2005 Fabrice Bellard, Paul Brook
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -354,7 +354,561 @@ static inline void gen_add_datah_offset(
     }
 }
 
-static void disas_arm_insn(DisasContext *s)
+#define VFP_OP(name)                      \
+static inline void gen_vfp_##name(int dp) \
+{                                         \
+    if (dp)                               \
+        gen_op_vfp_##name##d();           \
+    else                                  \
+        gen_op_vfp_##name##s();           \
+}
+
+VFP_OP(add)
+VFP_OP(sub)
+VFP_OP(mul)
+VFP_OP(div)
+VFP_OP(neg)
+VFP_OP(abs)
+VFP_OP(sqrt)
+VFP_OP(cmp)
+VFP_OP(cmpe)
+VFP_OP(F1_ld0)
+VFP_OP(uito)
+VFP_OP(sito)
+VFP_OP(toui)
+VFP_OP(touiz)
+VFP_OP(tosi)
+VFP_OP(tosiz)
+VFP_OP(ld)
+VFP_OP(st)
+
+#undef VFP_OP
+
+#define F(t) \
+    F1(t,  0), F1(t,  1), F1(t,  2), F1(t,  3), \
+    F1(t,  4), F1(t,  5), F1(t,  6), F1(t,  7), \
+    F1(t,  8), F1(t,  9), F1(t, 10), F1(t, 11), \
+    F1(t, 12), F1(t, 13), F1(t, 14), F1(t, 15), \
+    F1(t, 16), F1(t, 17), F1(t, 18), F1(t, 19), \
+    F1(t, 20), F1(t, 21), F1(t, 22), F1(t, 23), \
+    F1(t, 24), F1(t, 25), F1(t, 26), F1(t, 27), \
+    F1(t, 28), F1(t, 29), F1(t, 30), F1(t, 31)
+#define F1(t, n) gen_op_vfp_mov_F##t##_s##n
+static GenOpFunc *gen_op_movs_F0_vreg[32] = { F(0) };
+static GenOpFunc *gen_op_movs_F1_vreg[32] = { F(1) };
+#undef F1
+
+#define F1(t, n) gen_op_vfp_mov_s##n##_F##t
+static GenOpFunc *gen_op_movs_vreg_F0[32] = { F(0) };
+#undef F1
+#undef F
+
+#define F(t) \
+    F1(t,  0), F1(t,  1), F1(t , 2), F1(t,  3), \
+    F1(t,  4), F1(t,  5), F1(t,  6), F1(t,  7), \
+    F1(t,  8), F1(t,  9), F1(t, 10), F1(t, 11), \
+    F1(t, 12), F1(t, 13), F1(t, 14), F1(t, 15)
+#define F1(t, n) gen_op_vfp_mov_F##t##_d##n
+static GenOpFunc *gen_op_movd_F0_vreg[16] = { F(0) };
+static GenOpFunc *gen_op_movd_F1_vreg[16] = { F(1) };
+#undef F1
+
+#define F1(t, n) gen_op_vfp_mov_d##n##_F##t
+static GenOpFunc *gen_op_movd_vreg_F0[16] = { F(0) };
+#undef F1
+#undef F
+
+static inline void gen_mov_F0_vreg(int dp, int reg)
+{
+  if (dp)
+      gen_op_movd_F0_vreg[reg]();
+  else
+      gen_op_movs_F0_vreg[reg]();
+}
+
+static inline void gen_mov_F1_vreg(int dp, int reg)
+{
+  if (dp)
+      gen_op_movd_F1_vreg[reg]();
+  else
+      gen_op_movs_F1_vreg[reg]();
+}
+
+static inline void gen_mov_vreg_F0(int dp, int reg)
+{
+  if (dp)
+      gen_op_movd_vreg_F0[reg]();
+  else
+      gen_op_movs_vreg_F0[reg]();
+}
+
+/* Disassemble a VFP instruction.  Returns nonzero if an error occured
+   (ie. an undefined instruction).  */
+static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
+{
+    uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
+    int dp, veclen;
+
+    dp = ((insn & 0xf00) == 0xb00);
+    switch ((insn >> 24) & 0xf) {
+    case 0xe:
+        if (insn & (1 << 4)) {
+            /* single register transfer */
+            if ((insn & 0x6f) != 0x00)
+                return 1;
+            rd = (insn >> 12) & 0xf;
+            if (dp) {
+                if (insn & 0x80)
+                    return 1;
+                rn = (insn >> 16) & 0xf;
+                /* Get the existing value even for arm->vfp moves because
+                   we only set half the register.  */
+                gen_mov_F0_vreg(1, rn);
+                gen_op_vfp_mrrd();
+                if (insn & (1 << 20)) {
+                    /* vfp->arm */
+                    if (insn & (1 << 21))
+                        gen_movl_reg_T1(s, rd);
+                    else
+                        gen_movl_reg_T0(s, rd);
+                } else {
+                    /* arm->vfp */
+                    if (insn & (1 << 21))
+                        gen_movl_T1_reg(s, rd);
+                    else
+                        gen_movl_T0_reg(s, rd);
+                    gen_op_vfp_mdrr();
+                    gen_mov_vreg_F0(dp, rn);
+                }
+            } else {
+                rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
+                if (insn & (1 << 20)) {
+                    /* vfp->arm */
+                    if (insn & (1 << 21)) {
+                        /* system register */
+                        switch (rn) {
+                        case 0: /* fpsid */
+                            n = 0x0091A0000;
+                            break;
+                        case 2: /* fpscr */
+                           if (rd == 15)
+                               gen_op_vfp_movl_T0_fpscr_flags();
+                           else
+                               gen_op_vfp_movl_T0_fpscr();
+                            break;
+                        default:
+                            return 1;
+                        }
+                    } else {
+                        gen_mov_F0_vreg(0, rn);
+                        gen_op_vfp_mrs();
+                    }
+                    if (rd == 15) {
+                        /* This will only set the 4 flag bits */
+                        gen_op_movl_psr_T0();
+                    } else
+                        gen_movl_reg_T0(s, rd);
+                } else {
+                    /* arm->vfp */
+                    gen_movl_T0_reg(s, rd);
+                    if (insn & (1 << 21)) {
+                        /* system register */
+                        switch (rn) {
+                        case 0: /* fpsid */
+                            /* Writes are ignored.  */
+                            break;
+                        case 2: /* fpscr */
+                            gen_op_vfp_movl_fpscr_T0();
+                            /* This could change vector settings, so jump to
+                               the next instuction.  */
+                            gen_op_movl_T0_im(s->pc);
+                            gen_movl_reg_T0(s, 15);
+                            s->is_jmp = DISAS_UPDATE;
+                            break;
+                        default:
+                            return 1;
+                        }
+                    } else {
+                        gen_op_vfp_msr();
+                        gen_mov_vreg_F0(0, rn);
+                    }
+                }
+            }
+        } else {
+            /* data processing */
+            /* The opcode is in bits 23, 21, 20 and 6.  */
+            op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
+            if (dp) {
+                if (op == 15) {
+                    /* rn is opcode */
+                    rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
+                } else {
+                    /* rn is register number */
+                    if (insn & (1 << 7))
+                        return 1;
+                    rn = (insn >> 16) & 0xf;
+                }
+
+                if (op == 15 && (rn == 15 || rn > 17)) {
+                    /* Integer or single precision destination.  */
+                    rd = ((insn >> 11) & 0x1e) | ((insn >> 22) & 1);
+                } else {
+                    if (insn & (1 << 22))
+                        return 1;
+                    rd = (insn >> 12) & 0xf;
+                }
+
+                if (op == 15 && (rn == 16 || rn == 17)) {
+                    /* Integer source.  */
+                    rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
+                } else {
+                    if (insn & (1 << 5))
+                        return 1;
+                    rm = insn & 0xf;
+                }
+            } else {
+                rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
+                if (op == 15 && rn == 15) {
+                    /* Double precision destination.  */
+                    if (insn & (1 << 22))
+                        return 1;
+                    rd = (insn >> 12) & 0xf;
+                } else
+                    rd = ((insn >> 11) & 0x1e) | ((insn >> 22) & 1);
+                rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
+            }
+
+            veclen = env->vfp.vec_len;
+            if (op == 15 && rn > 3)
+                veclen = 0;
+
+            /* Shut up compiler warnings.  */
+            delta_m = 0;
+            delta_d = 0;
+            bank_mask = 0;
+            
+            if (veclen > 0) {
+                if (dp)
+                    bank_mask = 0xc;
+                else
+                    bank_mask = 0x18;
+
+                /* Figure out what type of vector operation this is.  */
+                if ((rd & bank_mask) == 0) {
+                    /* scalar */
+                    veclen = 0;
+                } else {
+                    if (dp)
+                        delta_d = (env->vfp.vec_stride >> 1) + 1;
+                    else
+                        delta_d = env->vfp.vec_stride + 1;
+
+                    if ((rm & bank_mask) == 0) {
+                        /* mixed scalar/vector */
+                        delta_m = 0;
+                    } else {
+                        /* vector */
+                        delta_m = delta_d;
+                    }
+                }
+            }
+
+            /* Load the initial operands.  */
+            if (op == 15) {
+                switch (rn) {
+                case 16:
+                case 17:
+                    /* Integer source */
+                    gen_mov_F0_vreg(0, rm);
+                    break;
+                case 8:
+                case 9:
+                    /* Compare */
+                    gen_mov_F0_vreg(dp, rd);
+                    gen_mov_F1_vreg(dp, rm);
+                    break;
+                case 10:
+                case 11:
+                    /* Compare with zero */
+                    gen_mov_F0_vreg(dp, rd);
+                    gen_vfp_F1_ld0(dp);
+                    break;
+                default:
+                    /* One source operand.  */
+                    gen_mov_F0_vreg(dp, rm);
+                }
+            } else {
+                /* Two source operands.  */
+                gen_mov_F0_vreg(dp, rn);
+                gen_mov_F1_vreg(dp, rm);
+            }
+
+            for (;;) {
+                /* Perform the calculation.  */
+                switch (op) {
+                case 0: /* mac: fd + (fn * fm) */
+                    gen_vfp_mul(dp);
+                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_add(dp);
+                    break;
+                case 1: /* nmac: fd - (fn * fm) */
+                    gen_vfp_mul(dp);
+                    gen_vfp_neg(dp);
+                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_add(dp);
+                    break;
+                case 2: /* msc: -fd + (fn * fm) */
+                    gen_vfp_mul(dp);
+                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_sub(dp);
+                    break;
+                case 3: /* nmsc: -fd - (fn * fm)  */
+                    gen_vfp_mul(dp);
+                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_add(dp);
+                    gen_vfp_neg(dp);
+                    break;
+                case 4: /* mul: fn * fm */
+                    gen_vfp_mul(dp);
+                    break;
+                case 5: /* nmul: -(fn * fm) */
+                    gen_vfp_mul(dp);
+                    gen_vfp_neg(dp);
+                    break;
+                case 6: /* add: fn + fm */
+                    gen_vfp_add(dp);
+                    break;
+                case 7: /* sub: fn - fm */
+                    gen_vfp_sub(dp);
+                    break;
+                case 8: /* div: fn / fm */
+                    gen_vfp_div(dp);
+                    break;
+                case 15: /* extension space */
+                    switch (rn) {
+                    case 0: /* cpy */
+                        /* no-op */
+                        break;
+                    case 1: /* abs */
+                        gen_vfp_abs(dp);
+                        break;
+                    case 2: /* neg */
+                        gen_vfp_neg(dp);
+                        break;
+                    case 3: /* sqrt */
+                        gen_vfp_sqrt(dp);
+                        break;
+                    case 8: /* cmp */
+                        gen_vfp_cmp(dp);
+                        break;
+                    case 9: /* cmpe */
+                        gen_vfp_cmpe(dp);
+                        break;
+                    case 10: /* cmpz */
+                        gen_vfp_cmp(dp);
+                        break;
+                    case 11: /* cmpez */
+                        gen_vfp_F1_ld0(dp);
+                        gen_vfp_cmpe(dp);
+                        break;
+                    case 15: /* single<->double conversion */
+                        if (dp)
+                            gen_op_vfp_fcvtsd();
+                        else
+                            gen_op_vfp_fcvtds();
+                        break;
+                    case 16: /* fuito */
+                        gen_vfp_uito(dp);
+                        break;
+                    case 17: /* fsito */
+                        gen_vfp_sito(dp);
+                        break;
+                    case 24: /* ftoui */
+                        gen_vfp_toui(dp);
+                        break;
+                    case 25: /* ftouiz */
+                        gen_vfp_touiz(dp);
+                        break;
+                    case 26: /* ftosi */
+                        gen_vfp_tosi(dp);
+                        break;
+                    case 27: /* ftosiz */
+                        gen_vfp_tosiz(dp);
+                        break;
+                    default: /* undefined */
+                        printf ("rn:%d\n", rn);
+                        return 1;
+                    }
+                    break;
+                default: /* undefined */
+                    printf ("op:%d\n", op);
+                    return 1;
+                }
+
+                /* Write back the result.  */
+                if (op == 15 && (rn >= 8 && rn <= 11))
+                    ; /* Comparison, do nothing.  */
+                else if (op == 15 && rn > 17)
+                    /* Integer result.  */
+                    gen_mov_vreg_F0(0, rd);
+                else if (op == 15 && rn == 15)
+                    /* conversion */
+                    gen_mov_vreg_F0(!dp, rd);
+                else
+                    gen_mov_vreg_F0(dp, rd);
+
+                /* break out of the loop if we have finished  */
+                if (veclen == 0)
+                    break;
+
+                if (op == 15 && delta_m == 0) {
+                    /* single source one-many */
+                    while (veclen--) {
+                        rd = ((rd + delta_d) & (bank_mask - 1))
+                             | (rd & bank_mask);
+                        gen_mov_vreg_F0(dp, rd);
+                    }
+                    break;
+                }
+                /* Setup the next operands.  */
+                veclen--;
+                rd = ((rd + delta_d) & (bank_mask - 1))
+                     | (rd & bank_mask);
+
+                if (op == 15) {
+                    /* One source operand.  */
+                    rm = ((rm + delta_m) & (bank_mask - 1))
+                         | (rm & bank_mask);
+                    gen_mov_F0_vreg(dp, rm);
+                } else {
+                    /* Two source operands.  */
+                    rn = ((rn + delta_d) & (bank_mask - 1))
+                         | (rn & bank_mask);
+                    gen_mov_F0_vreg(dp, rn);
+                    if (delta_m) {
+                        rm = ((rm + delta_m) & (bank_mask - 1))
+                             | (rm & bank_mask);
+                        gen_mov_F1_vreg(dp, rm);
+                    }
+                }
+            }
+        }
+        break;
+    case 0xc:
+    case 0xd:
+        if (dp && (insn & (1 << 22))) {
+            /* two-register transfer */
+            rn = (insn >> 16) & 0xf;
+            rd = (insn >> 12) & 0xf;
+            if (dp) {
+                if (insn & (1 << 5))
+                    return 1;
+                rm = insn & 0xf;
+            } else
+                rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
+
+            if (insn & (1 << 20)) {
+                /* vfp->arm */
+                if (dp) {
+                    gen_mov_F0_vreg(1, rm);
+                    gen_op_vfp_mrrd();
+                    gen_movl_reg_T0(s, rd);
+                    gen_movl_reg_T1(s, rn);
+                } else {
+                    gen_mov_F0_vreg(0, rm);
+                    gen_op_vfp_mrs();
+                    gen_movl_reg_T0(s, rn);
+                    gen_mov_F0_vreg(0, rm + 1);
+                    gen_op_vfp_mrs();
+                    gen_movl_reg_T0(s, rd);
+                }
+            } else {
+                /* arm->vfp */
+                if (dp) {
+                    gen_movl_T0_reg(s, rd);
+                    gen_movl_T1_reg(s, rn);
+                    gen_op_vfp_mdrr();
+                    gen_mov_vreg_F0(1, rm);
+                } else {
+                    gen_movl_T0_reg(s, rn);
+                    gen_op_vfp_msr();
+                    gen_mov_vreg_F0(0, rm);
+                    gen_movl_T0_reg(s, rd);
+                    gen_op_vfp_msr();
+                    gen_mov_vreg_F0(0, rm + 1);
+                }
+            }
+        } else {
+            /* Load/store */
+            rn = (insn >> 16) & 0xf;
+            if (dp)
+                rd = (insn >> 12) & 0xf;
+            else
+                rd = ((insn >> 11) & 0x1e) | ((insn >> 22) & 1);
+            gen_movl_T1_reg(s, rn);
+            if ((insn & 0x01200000) == 0x01000000) {
+                /* Single load/store */
+                offset = (insn & 0xff) << 2;
+                if ((insn & (1 << 23)) == 0)
+                    offset = -offset;
+                gen_op_addl_T1_im(offset);
+                if (insn & (1 << 20)) {
+                    gen_vfp_ld(dp);
+                    gen_mov_vreg_F0(dp, rd);
+                } else {
+                    gen_mov_F0_vreg(dp, rd);
+                    gen_vfp_st(dp);
+                }
+            } else {
+                /* load/store multiple */
+                if (dp)
+                    n = (insn >> 1) & 0x7f;
+                else
+                    n = insn & 0xff;
+
+                if (insn & (1 << 24)) /* pre-decrement */
+                    gen_op_addl_T1_im(-((insn & 0xff) << 2));
+
+                if (dp)
+                    offset = 8;
+                else
+                    offset = 4;
+                for (i = 0; i < n; i++) {
+                    if (insn & (1 << 20)) {
+                        /* load */
+                        gen_vfp_ld(dp);
+                        gen_mov_vreg_F0(dp, rd + i);
+                    } else {
+                        /* store */
+                        gen_mov_F0_vreg(dp, rd + i);
+                        gen_vfp_st(dp);
+                    }
+                    gen_op_addl_T1_im(offset);
+                }
+                if (insn & (1 << 21)) {
+                    /* writeback */
+                    if (insn & (1 << 24))
+                        offset = -offset * n;
+                    else if (dp && (insn & 1))
+                        offset = 4;
+                    else
+                        offset = 0;
+
+                    if (offset != 0)
+                        gen_op_addl_T1_im(offset);
+                    gen_movl_reg_T1(s, rn);
+                }
+            }
+        }
+        break;
+    default:
+        /* Should never happen.  */
+        return 1;
+    }
+    return 0;
+}
+
+static void disas_arm_insn(CPUState * env, DisasContext *s)
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
     
@@ -363,6 +917,7 @@ static void disas_arm_insn(DisasContext 
     
     cond = insn >> 28;
     if (cond == 0xf){
+        /* Unconditional instructions.  */
         if ((insn & 0x0d70f000) == 0x0550f000)
             return; /* PLD */
         else if ((insn & 0x0e000000) == 0x0a000000) {
@@ -381,6 +936,10 @@ static void disas_arm_insn(DisasContext 
             gen_op_movl_T0_im(val);
             gen_bx(s);
             return;
+        } else if ((insn & 0x0fe00000) == 0x0c400000) {
+            /* Coprocessor double register transfer.  */
+        } else if ((insn & 0x0f000010) == 0x0e000010) {
+            /* Additional coprocessor register transfer.  */
         }
         goto illegal_op;
     }
@@ -934,6 +1493,22 @@ static void disas_arm_insn(DisasContext 
                 s->is_jmp = DISAS_TB_JUMP;
             }
             break;
+        case 0xc:
+        case 0xd:
+        case 0xe:
+            /* Coprocessor.  */
+            op1 = (insn >> 8) & 0xf;
+            switch (op1) {
+            case 10:
+            case 11:
+                if (disas_vfp_insn (env, s, insn))
+                    goto illegal_op;
+                break;
+            default:
+                /* unknown coprocessor.  */
+                goto illegal_op;
+            }
+            break;
         case 0xf:
             /* swi */
             gen_op_movl_T0_im((long)s->pc);
@@ -1484,7 +2059,7 @@ static inline int gen_intermediate_code_
         if (env->thumb)
           disas_thumb_insn(dc);
         else
-          disas_arm_insn(dc);
+          disas_arm_insn(env, dc);
     } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end && 
              (dc->pc - pc_start) < (TARGET_PAGE_SIZE - 32));
     switch(dc->is_jmp) {
@@ -1557,6 +2132,11 @@ void cpu_dump_state(CPUState *env, FILE 
                     int flags)
 {
     int i;
+    struct {
+        uint32_t i;
+        float s;
+    } s0, s1;
+    CPU_DoubleU d;
 
     for(i=0;i<16;i++) {
         cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
@@ -1566,11 +2146,23 @@ void cpu_dump_state(CPUState *env, FILE 
             cpu_fprintf(f, " ");
     }
     cpu_fprintf(f, "PSR=%08x %c%c%c%c\n", 
-            env->cpsr, 
+             env->cpsr, 
             env->cpsr & (1 << 31) ? 'N' : '-',
             env->cpsr & (1 << 30) ? 'Z' : '-',
             env->cpsr & (1 << 29) ? 'C' : '-',
             env->cpsr & (1 << 28) ? 'V' : '-');
+
+    for (i = 0; i < 16; i++) {
+        s0.s = env->vfp.regs.s[i * 2];
+        s1.s = env->vfp.regs.s[i * 2 + 1];
+        d.d = env->vfp.regs.d[i];
+        cpu_fprintf(f, "s%02d=%08x(%8f) s%02d=%08x(%8f) d%02d=%08x%08x(%8f)\n",
+                    i * 2, (int)s0.i, s0.s,
+                    i * 2 + 1, (int)s0.i, s0.s,
+                    i, (int)(uint32_t)d.l.upper, (int)(uint32_t)d.l.lower,
+                    d.d);
+        cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.fpscr);
+    }
 }
 
 target_ulong cpu_get_phys_page_debug(CPUState *env, target_ulong addr)

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [patch] Arm VFP, Paul Brook <=

Prev by Date: Re: [Qemu-devel] The QEMU Accelerator Module
Next by Date: [Qemu-devel] [PATCH] Re: KQEMU error, unable to get major 254
Previous by thread: [Qemu-devel] KQEMU error, unable to get major 254
Next by thread: [Qemu-devel] Arm benchmarks
Index(es):
- Date
- Thread