qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exc


From: Jaume Martí
Subject: Re: [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG.
Date: Sat, 19 Jul 2014 02:36:51 +0200

Hello,

I attach a patch with the fix for the issues pointed out by Richard.
Maybe it would be useful to have the option to disabled this feature
at compile time, for performance reasons.
Please review and apply.

Best regards,
Jaume

Signed-off-by: Jaume Marti Farriol (address@hidden)
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 1141054..73f8f6b 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -865,7 +865,7 @@ static void setup_sigcontext(struct target_sigcontext *sc,
     __put_user(env->regs[R_ESP], &sc->esp_at_signal);
     __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);

-        cpu_x86_fsave(env, fpstate_addr, 1);
+        cpu_x86_fsave(env, fpstate_addr);
         fpstate->status = fpstate->sw;
         magic = 0xffff;
     __put_user(magic, &fpstate->magic);
@@ -1068,7 +1068,7 @@ restore_sigcontext(CPUX86State *env, struct
target_sigcontext *sc, int *peax)
                 if (!access_ok(VERIFY_READ, fpstate_addr,
                                sizeof(struct target_fpstate)))
                         goto badframe;
-                cpu_x86_frstor(env, fpstate_addr, 1);
+                cpu_x86_frstor(env, fpstate_addr);
  }

         *peax = tswapl(sc->eax);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index e634d83..4274ce3 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -819,10 +819,11 @@ typedef struct CPUX86State {
     uint16_t fpuc;
     uint8_t fptags[8];   /* 0 = valid, 1 = empty */
     FPReg fpregs[8];
-    /* KVM-only so far */
-    uint16_t fpop;
+    uint32_t fpop;
     uint64_t fpip;
     uint64_t fpdp;
+    uint32_t fpcs;
+    uint32_t fpds;

     /* emulator internal variables */
     float_status fp_status;
@@ -1067,8 +1068,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
 /* the following helpers are only usable in user mode simulation as
    they can trigger unexpected exceptions */
 void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
-void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
-void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr);
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr);

 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index 1b2900d..6886031 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -56,6 +56,8 @@
 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)

+#define FPUS(env) ((env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11))
+
 static inline void fpush(CPUX86State *env)
 {
     env->fpstt = (env->fpstt - 1) & 7;
@@ -604,6 +606,10 @@ void helper_fninit(CPUX86State *env)
     env->fptags[5] = 1;
     env->fptags[6] = 1;
     env->fptags[7] = 1;
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

 /* BCD ops */
@@ -961,13 +967,13 @@ void helper_fxam_ST0(CPUX86State *env)
     }
 }

-void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int fpus, fptag, exp, i;
+    int fptag, exp, i;
     uint64_t mant;
     CPU_LDoubleU tmp;

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 7; i >= 0; i--) {
         fptag <<= 2;
@@ -987,83 +993,150 @@ void helper_fstenv(CPUX86State *env,
target_ulong ptr, int data32)
             }
         }
     }
+
     if (data32) {
         /* 32 bit */
-        cpu_stl_data(env, ptr, env->fpuc);
-        cpu_stl_data(env, ptr + 4, fpus);
-        cpu_stl_data(env, ptr + 8, fptag);
-        cpu_stl_data(env, ptr + 12, 0); /* fpip */
-        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
-        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
-        cpu_stl_data(env, ptr + 24, 0); /* fpos */
+        cpu_stw_data(env, ptr, env->fpuc);
+        cpu_stw_data(env, ptr + 4, FPUS(env));
+        cpu_stw_data(env, ptr + 8, fptag);
+        if (protected_mode) {
+            cpu_stl_data(env, ptr + 12, env->fpip);
+            cpu_stl_data(env, ptr + 16,
+                        ((env->fpop & 0x7ff) << 16) | (env->fpcs & 0xffff));
+            cpu_stl_data(env, ptr + 20, env->fpdp);
+            cpu_stl_data(env, ptr + 24, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stl_data(env, ptr + 12, env->fpip); /* fpip[15..00] */
+            cpu_stl_data(env, ptr + 16, ((((env->fpip >> 16) & 0xffff) << 12) |
+                        (env->fpop & 0x7ff))); /* fpip[31..16], fpop */
+            cpu_stl_data(env, ptr + 20, env->fpdp); /* fpdp[15..00] */
+            cpu_stl_data(env, ptr + 24,
+                        (env->fpdp >> 4) & 0xffff000); /* fpdp[31..16] */
+        }
     } else {
         /* 16 bit */
         cpu_stw_data(env, ptr, env->fpuc);
-        cpu_stw_data(env, ptr + 2, fpus);
+        cpu_stw_data(env, ptr + 2, FPUS(env));
         cpu_stw_data(env, ptr + 4, fptag);
-        cpu_stw_data(env, ptr + 6, 0);
-        cpu_stw_data(env, ptr + 8, 0);
-        cpu_stw_data(env, ptr + 10, 0);
-        cpu_stw_data(env, ptr + 12, 0);
+        if (protected_mode) {
+            cpu_stw_data(env, ptr + 6, env->fpip);
+            cpu_stw_data(env, ptr + 8, env->fpcs);
+            cpu_stw_data(env, ptr + 10, env->fpdp);
+            cpu_stw_data(env, ptr + 12, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stw_data(env, ptr + 6, env->fpip); /* fpip[15..0] */
+            cpu_stw_data(env, ptr + 8, ((env->fpip >> 4) & 0xf000) |
+                        (env->fpop & 0x7ff)); /* fpip[19..16], fpop */
+            cpu_stw_data(env, ptr + 10, env->fpdp); /* fpdp[15..0] */
+            cpu_stw_data(env, ptr + 12,
+                        (env->fpdp >> 4) & 0xf000); /* fpdp[19..16] */
+        }
     }
+
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

-void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int i, fpus, fptag;
+    int tmp, i, fpus, fptag;

     if (data32) {
+        /* 32 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 4);
         fptag = cpu_lduw_data(env, ptr + 8);
+        if (protected_mode) {
+            env->fpip = cpu_ldl_data(env, ptr + 12);
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpcs = tmp & 0xffff;
+            env->fpop = tmp >> 16;
+            env->fpdp = cpu_ldl_data(env, ptr + 20);
+            env->fpds = cpu_lduw_data(env, ptr + 24);
+        } else {
+            /* Real mode */
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpip = ((tmp & 0xffff000) << 4) |
+                        cpu_lduw_data(env, ptr + 12);
+            env->fpop = tmp & 0x7ff;
+            env->fpdp = (cpu_ldl_data(env, ptr + 24) << 4) |
+                        cpu_lduw_data(env, ptr + 20);
+        }
     } else {
+        /* 16 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 2);
         fptag = cpu_lduw_data(env, ptr + 4);
+        if (protected_mode) {
+            /* Protected mode  */
+            env->fpip = cpu_lduw_data(env, ptr + 6);
+            env->fpcs = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 10);
+            env->fpds = cpu_lduw_data(env, ptr + 12);
+        } else {
+            /* Real mode  */
+            tmp = cpu_lduw_data(env, ptr + 8);
+            env->fpip = ((tmp & 0xf000) << 4) | cpu_lduw_data(env, ptr + 6);
+            env->fpop = tmp & 0x7ff;
+            env->fpdp = cpu_lduw_data(env, ptr + 12) << 4 |
+                        cpu_lduw_data(env, ptr + 10);
+        }
     }
+
     env->fpstt = (fpus >> 11) & 7;
     env->fpus = fpus & ~0x3800;
     for (i = 0; i < 8; i++) {
         env->fptags[i] = ((fptag & 3) == 3);
         fptag >>= 2;
     }
+
+    env->fpip &= 0xffffffff;
+    env->fpdp &= 0xffffffff;
+    if (!protected_mode) {
+        env->fpcs = 0;
+        env->fpds = 0;
+    }
 }

-void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fsave(CPUX86State *env, target_ulong ptr, int data32,
+                  int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fstenv(env, ptr, data32);
+    helper_fstenv(env, ptr, data32, protected_mode);

-    ptr += (14 << data32);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }
     for (i = 0; i < 8; i++) {
         tmp = ST(i);
         helper_fstt(env, tmp, ptr);
         ptr += 10;
     }

-    /* fninit */
-    env->fpus = 0;
-    env->fpstt = 0;
-    env->fpuc = 0x37f;
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
+    helper_fninit(env);
 }

-void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void helper_frstor(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fldenv(env, ptr, data32);
-    ptr += (14 << data32);
+    helper_fldenv(env, ptr, data32, protected_mode);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }

     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, ptr);
@@ -1072,21 +1145,22 @@ void helper_frstor(CPUX86State *env,
target_ulong ptr, int data32)
     }
 }

-#if defined(CONFIG_USER_ONLY)
-void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
+#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) && TARGET_ABI_BITS == 32
+
+void cpu_x86_fsave(CPUX86State *env, target_ulong ptr)
 {
-    helper_fsave(env, ptr, data32);
+    helper_fsave(env, ptr, 1, 1);
 }

-void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void cpu_x86_frstor(CPUX86State *env, target_ulong ptr)
 {
-    helper_frstor(env, ptr, data32);
+    helper_frstor(env, ptr, 1, 1);
 }
 #endif

-void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxsave(CPUX86State *env, target_ulong ptr, int data32, int data64)
 {
-    int fpus, fptag, i, nb_xmm_regs;
+    int i, nb_xmm_regs, fptag;
     floatx80 tmp;
     target_ulong addr;

@@ -1095,25 +1169,36 @@ void helper_fxsave(CPUX86State *env,
target_ulong ptr, int data64)
         raise_exception(env, EXCP0D_GPF);
     }

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 0; i < 8; i++) {
         fptag |= (env->fptags[i] << i);
     }
+    fptag ^= 0xff;
+
     cpu_stw_data(env, ptr, env->fpuc);
-    cpu_stw_data(env, ptr + 2, fpus);
-    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
+    cpu_stw_data(env, ptr + 2, FPUS(env));
+    cpu_stw_data(env, ptr + 4, fptag & 0xff);
+    cpu_stw_data(env, ptr + 6, env->fpop);
+
 #ifdef TARGET_X86_64
     if (data64) {
-        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
-        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
+        /* 64 bit */
+        cpu_stq_data(env, ptr + 8, env->fpip);
+        cpu_stq_data(env, ptr + 16, env->fpdp);
     } else
 #endif
     {
-        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
-        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
-        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
-        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
+        if (data32) {
+            /* 32 bit */
+            cpu_stl_data(env, ptr + 8, env->fpip);
+            cpu_stl_data(env, ptr + 16, env->fpdp);
+        } else {
+            /* 16 bit */
+            cpu_stw_data(env, ptr + 8, env->fpip);
+            cpu_stw_data(env, ptr + 16, env->fpdp);
+        }
+        cpu_stw_data(env, ptr + 12, env->fpcs & 0xffff);
+        cpu_stw_data(env, ptr + 20, env->fpds & 0xffff);
     }

     addr = ptr + 0x20;
@@ -1146,7 +1231,7 @@ void helper_fxsave(CPUX86State *env,
target_ulong ptr, int data64)
     }
 }

-void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data32, int data64)
 {
     int i, fpus, fptag, nb_xmm_regs;
     floatx80 tmp;
@@ -1167,6 +1252,30 @@ void helper_fxrstor(CPUX86State *env,
target_ulong ptr, int data64)
         env->fptags[i] = ((fptag >> i) & 1);
     }

+    env->fpop = (cpu_lduw_data(env, ptr + 6) >> 5) & 0x7ff;
+
+#ifdef TARGET_X86_64
+    if (data64) {
+        /* 64 bit */
+        env->fpip = cpu_ldq_data(env, ptr + 8);
+        env->fpdp = cpu_ldq_data(env, ptr + 16);
+    } else
+#endif
+    {
+        if (data32) {
+            /* 32 bit */
+            env->fpip = cpu_ldl_data(env, ptr + 8);
+            env->fpdp = cpu_ldl_data(env, ptr + 16);
+        } else {
+            /* 16 bit */
+            env->fpip = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 16);
+        }
+
+        env->fpcs = cpu_lduw_data(env, ptr + 12);
+        env->fpds = cpu_lduw_data(env, ptr + 20);
+    }
+
     addr = ptr + 0x20;
     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, addr);
@@ -1195,6 +1304,11 @@ void helper_fxrstor(CPUX86State *env,
target_ulong ptr, int data64)
             }
         }
     }
+
+    if (!data64) {
+        env->fpip &= 0xffffffff;
+        env->fpdp &= 0xffffffff;
+    }
 }

 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 8eb0145..9c4fd22 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -183,12 +183,12 @@ DEF_HELPER_1(frndint, void, env)
 DEF_HELPER_1(fscale, void, env)
 DEF_HELPER_1(fsin, void, env)
 DEF_HELPER_1(fcos, void, env)
-DEF_HELPER_3(fstenv, void, env, tl, int)
-DEF_HELPER_3(fldenv, void, env, tl, int)
-DEF_HELPER_3(fsave, void, env, tl, int)
-DEF_HELPER_3(frstor, void, env, tl, int)
-DEF_HELPER_3(fxsave, void, env, tl, int)
-DEF_HELPER_3(fxrstor, void, env, tl, int)
+DEF_HELPER_4(fstenv, void, env, tl, int, int)
+DEF_HELPER_4(fldenv, void, env, tl, int, int)
+DEF_HELPER_4(fsave, void, env, tl, int, int)
+DEF_HELPER_4(frstor, void, env, tl, int, int)
+DEF_HELPER_4(fxsave, void, env, tl, int, int)
+DEF_HELPER_4(fxrstor, void, env, tl, int, int)

 DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 16d2f6a..500f04f 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -397,7 +397,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT16(env.fpop, X86CPU),
+        VMSTATE_UINT32(env.fpop, X86CPU),
         VMSTATE_UINT64(env.fpip, X86CPU),
         VMSTATE_UINT64(env.fpdp, X86CPU),
         VMSTATE_END_OF_LIST()
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 6fcd824..8e490de 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -58,6 +58,9 @@
 #endif

 //#define MACRO_TEST   1
+#define IS_PROTECTED_MODE(s) (s->pe && !s->vm86)
+#define FP_EP_VALID 0x80000000
+#define FP_EP_INVALID 0

 /* global register indexes */
 static TCGv_ptr cpu_env;
@@ -65,6 +68,11 @@ static TCGv cpu_A0;
 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
+static TCGv_i32 cpu_fpop;
+static TCGv cpu_fpip;
+static TCGv cpu_fpdp;
+static TCGv_i32 cpu_fpds;
+static TCGv_i32 cpu_fpcs;
 /* local temps */
 static TCGv cpu_T[2];
 /* local register indexes (only used inside old micro ops) */
@@ -74,6 +82,9 @@ static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;

 static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
+static uint16_t gen_opc_fp_op[OPC_BUF_SIZE];
+static uint16_t gen_opc_fp_cs[OPC_BUF_SIZE];
+static target_ulong gen_opc_fp_ip[OPC_BUF_SIZE];

 #include "exec/gen-icount.h"

@@ -104,6 +115,10 @@ typedef struct DisasContext {
     int ss32;   /* 32 bit stack segment */
     CCOp cc_op;  /* current CC operation */
     bool cc_op_dirty;
+    uint16_t fp_op;
+    bool fp_ep_dirty;
+    target_ulong fp_ip;
+    uint16_t fp_cs;
     int addseg; /* non zero if either DS/ES/SS have a non zero base */
     int f_st;   /* currently unused */
     int vm86;   /* vm86 mode */
@@ -208,6 +223,62 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_CLR] = 0,
 };

+static inline bool instr_is_x87_nc(int modrm, int b)
+{
+    int op, mod, rm;
+    switch (b) {
+    case 0xd8 ... 0xdf:
+        /* floats */
+        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        if (mod != 3) {
+            /* memory */
+            switch (op) {
+            case 0x0c: /* fldenv */
+            case 0x0d: /* fldcw */
+            case 0x0e: /* fstenv, fnstenv */
+            case 0x0f: /* fstcw, fnstcw */
+            case 0x2c: /* frstor */
+            case 0x2e: /* fsave, fnsave */
+            case 0x2f: /* fstsw, fnstsw */
+                return false;
+            default:
+                return true;
+            }
+        } else {
+            /* register */
+            switch (op) {
+            case 0x0a:
+                return false; /* fnop, Illegal op */
+            case 0x0e: /* fdecstp, fincstp */
+            case 0x28: /* ffree */
+                return false;
+            case 0x1c:
+                switch (rm) {
+                case 1: /* feni */
+                    return true;
+                case 2: /* fclex, fnclex */
+                case 3: /* finit, fninit */
+                    return false;
+                case 4: /* fsetpm */
+                    return true;
+                default: /* Illegal op */
+                    return false;
+                }
+            case 0x3c:
+                return false; /* fstsw, fnstsw, Illegal op */
+            default:
+                return true;
+            }
+        }
+    /*case 0x9b: // fwait, wait
+        return false;*/
+    default:
+        return false;
+    }
+}
+
 static void set_cc_op(DisasContext *s, CCOp op)
 {
     int dead;
@@ -253,6 +324,23 @@ static void gen_update_cc_op(DisasContext *s)
     }
 }

+static void set_ep(DisasContext *s, int fp_op, int fp_ip, int fp_cs) {
+    s->fp_op = FP_EP_VALID | fp_op;
+    s->fp_ip = fp_ip;
+    s->fp_cs = fp_cs;
+    s->fp_ep_dirty = true;
+}
+
+static void gen_update_ep(DisasContext *s)
+{
+    if (s->fp_ep_dirty) {
+        tcg_gen_movi_i32(cpu_fpop, s->fp_op);
+        tcg_gen_movi_tl(cpu_fpip, s->fp_ip);
+        tcg_gen_movi_i32(cpu_fpcs, s->fp_cs);
+        s->fp_ep_dirty = false;
+    }
+}
+
 #ifdef TARGET_X86_64

 #define NB_OP_SIZES 4
@@ -666,6 +754,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp
ot, target_ulong cur_eip,
     state_saved = 0;
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(cur_eip);
         state_saved = 1;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -686,6 +775,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp
ot, target_ulong cur_eip,
     if(s->flags & HF_SVMI_MASK) {
         if (!state_saved) {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(cur_eip);
         }
         svm_flags |= (1 << (4 + ot));
@@ -1097,6 +1187,7 @@ static inline void gen_jcc1(DisasContext *s, int
b, int l1)
     CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);

     gen_update_cc_op(s);
+    gen_update_ep(s);
     if (cc.mask != -1) {
         tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
         cc.reg = cpu_T[0];
@@ -1580,14 +1671,14 @@ static void gen_rot_rm_T1(DisasContext *s,
TCGMemOp ot, int op1, int is_right)
     t0 = tcg_const_i32(0);
     t1 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
-    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
+    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
     tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
                         cpu_tmp2_i32, cpu_tmp3_i32);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);

-    /* The CC_OP value is no longer predictable.  */
+    /* The CC_OP value is no longer predictable.  */
     set_cc_op(s, CC_OP_DYNAMIC);
 }

@@ -1863,7 +1954,7 @@ static void gen_shifti(DisasContext *s1, int op,
TCGMemOp ot, int d, int c)
     }
 }

-static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
+static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm, int b)
 {
     target_long disp;
     int havesib;
@@ -1871,6 +1962,7 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
     int index;
     int scale;
     int mod, rm, code, override, must_add_seg;
+    int curr_instr_is_x87_nc;
     TCGv sum;

     override = s->override;
@@ -1950,6 +2042,13 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             tcg_gen_addi_tl(cpu_A0, sum, disp);
         }

+        curr_instr_is_x87_nc = instr_is_x87_nc(modrm, b);
+        if (curr_instr_is_x87_nc) {
+            tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+            if (s->aflag == MO_32) {
+                tcg_gen_ext32u_tl(cpu_fpdp, cpu_fpdp);
+            }
+        }
         if (must_add_seg) {
             if (override < 0) {
                 if (base == R_EBP || base == R_ESP) {
@@ -1961,6 +2060,12 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)

             tcg_gen_ld_tl(cpu_tmp0, cpu_env,
                           offsetof(CPUX86State, segs[override].base));
+
+            if (curr_instr_is_x87_nc) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[override].selector));
+            }
+
             if (CODE64(s)) {
                 if (s->aflag == MO_32) {
                     tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
@@ -1970,6 +2075,11 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             }

             tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+        } else {
+            if (curr_instr_is_x87_nc) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }

         if (s->aflag == MO_32) {
@@ -2039,8 +2149,22 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
                     override = R_DS;
                 }
             }
+            if (instr_is_x87_nc(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[override].selector));
+            }
             gen_op_addl_A0_seg(s, override);
+        } else {
+            if (instr_is_x87_nc(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }
+#ifdef TARGET_X86_64
+        tcg_gen_andi_tl(cpu_fpdp, cpu_fpdp, 0xffffffff);
+#endif
         break;

     default:
@@ -2130,7 +2254,7 @@ static void gen_add_A0_ds_seg(DisasContext *s)
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
    OR_TMP0 */
 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
-                           TCGMemOp ot, int reg, int is_store)
+                           TCGMemOp ot, int reg, int is_store, int b)
 {
     int mod, rm;

@@ -2147,7 +2271,7 @@ static void gen_ldst_modrm(CPUX86State *env,
DisasContext *s, int modrm,
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         }
     } else {
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         if (is_store) {
             if (reg != OR_TMP0)
                 gen_op_mov_v_reg(ot, cpu_T[0], reg);
@@ -2250,7 +2374,7 @@ static void gen_cmovcc1(CPUX86State *env,
DisasContext *s, TCGMemOp ot, int b,
 {
     CCPrepare cc;

-    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

     cc = gen_prepare_cc(s, b, cpu_T[1]);
     if (cc.mask != -1) {
@@ -2297,6 +2421,7 @@ static void gen_movl_seg_T0(DisasContext *s, int
seg_reg, target_ulong cur_eip)
     if (s->pe && !s->vm86) {
         /* XXX: optimize by finding processor state dynamically */
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(cur_eip);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
@@ -2326,6 +2451,7 @@ gen_svm_check_intercept_param(DisasContext *s,
target_ulong pc_start,
     if (likely(!(s->flags & HF_SVMI_MASK)))
         return;
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(pc_start - s->cs_base);
     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
                                          tcg_const_i64(param));
@@ -2513,6 +2639,7 @@ static void gen_enter(DisasContext *s, int
esp_addend, int level)
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
     s->is_jmp = DISAS_TB_JUMP;
@@ -2524,6 +2651,7 @@ static void gen_interrupt(DisasContext *s, int intno,
                           target_ulong cur_eip, target_ulong next_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
                                tcg_const_i32(next_eip - cur_eip));
@@ -2533,6 +2661,7 @@ static void gen_interrupt(DisasContext *s, int intno,
 static void gen_debug(DisasContext *s, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_debug(cpu_env);
     s->is_jmp = DISAS_TB_JUMP;
@@ -2543,6 +2672,7 @@ static void gen_debug(DisasContext *s,
target_ulong cur_eip)
 static void gen_eob(DisasContext *s)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
         gen_helper_reset_inhibit_irq(cpu_env);
     }
@@ -2564,6 +2694,7 @@ static void gen_eob(DisasContext *s)
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     set_cc_op(s, CC_OP_DYNAMIC);
     if (s->jmp_opt) {
         gen_goto_tb(s, tb_num, eip);
@@ -3043,7 +3174,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x0e7: /* movntq */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             break;
         case 0x1e7: /* movntdq */
@@ -3051,20 +3182,20 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12b: /* movntps */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x3f0: /* lddqu */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x22b: /* movntss */
         case 0x32b: /* movntsd */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (b1 & 1) {
                 gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
@@ -3076,13 +3207,13 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
                 tcg_gen_st_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,fpregs[reg].mmx));
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
@@ -3091,15 +3222,15 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
@@ -3107,7 +3238,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x6f: /* movq mm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3124,7 +3255,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x16f: /* movdqa xmm, ea */
         case 0x26f: /* movdqu xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3134,7 +3265,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3149,7 +3280,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x310: /* movsd xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3164,7 +3295,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x012: /* movlps */
         case 0x112: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3176,7 +3307,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x212: /* movsldup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3192,7 +3323,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x312: /* movddup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3206,7 +3337,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3218,7 +3349,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x216: /* movshdup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3256,34 +3387,34 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,

offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x27e: /* movq xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3295,7 +3426,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3310,7 +3441,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x17f: /* movdqa ea, xmm */
         case 0x27f: /* movdqu ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3320,7 +3451,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
             } else {
@@ -3331,7 +3462,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x311: /* movsd ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3343,7 +3474,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x013: /* movlps */
         case 0x113: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3353,7 +3484,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x017: /* movhps */
         case 0x117: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3417,7 +3548,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12a: /* cvtpi2pd */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -3440,7 +3571,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x22a: /* cvtsi2ss */
         case 0x32a: /* cvtsi2sd */
             ot = mo_64_32(s->dflag);
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
@@ -3462,7 +3593,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12d: /* cvtpd2pi */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 gen_ldo_env_A0(s, op2_offset);
             } else {
@@ -3493,7 +3624,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x32d: /* cvtsd2si */
             ot = mo_64_32(s->dflag);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_Q(0)));
                 } else {
@@ -3525,7 +3656,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0xc4: /* pinsrw */
         case 0x1c4:
             s->rip_offset = 1;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
@@ -3559,7 +3690,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3626,7 +3757,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     switch (b) {
                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
@@ -3660,7 +3791,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -3701,7 +3832,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 }

                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
                                  cpu_T[0], tcg_const_i32(8 << ot));

@@ -3729,7 +3860,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     ot = MO_64;
                 }

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b & 1) == 0) {
                     tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
                                        s->mem_index, ot | MO_BE);
@@ -3747,7 +3878,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 gen_op_update1_cc();
@@ -3764,7 +3895,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 {
                     TCGv bound, zero;

-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
                     tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
@@ -3801,7 +3932,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
                 {
                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
@@ -3828,7 +3959,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 switch (ot) {
                 default:
                     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3854,7 +3985,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3872,7 +4003,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3892,7 +4023,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     int end_op;

                     ot = mo_64_32(s->dflag);
-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                     /* Re-use the carry-out from a previous round.  */
                     TCGV_UNUSED(carry_in);
@@ -3971,7 +4102,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 if (ot == MO_64) {
                     tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
                 } else {
@@ -4003,7 +4134,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
@@ -4062,7 +4193,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3)
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                 reg = ((modrm >> 3) & 7) | rex_r;
                 val = cpu_ldub_code(env, s->pc++);
                 switch (b) {
@@ -4199,7 +4330,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldo_env_A0(s, op2_offset);
                 }
             } else {
@@ -4208,7 +4339,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -4242,7 +4373,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 b = cpu_ldub_code(env, s->pc++);
                 if (ot == MO_64) {
                     tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
@@ -4278,7 +4409,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             if (mod != 3) {
                 int sz = 4;

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);

                 switch (b) {
@@ -4326,7 +4457,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         } else {
             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -4404,6 +4535,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     int modrm, reg, rm, mod, op, opreg, val;
     target_ulong next_eip, tval;
     int rex_w, rex_r;
+    int fp_op, fp_ip, fp_cs;

     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(pc_start);
@@ -4595,7 +4727,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     opreg = OR_TMP0;
                 } else if (op == OP_XORL && rm == reg) {
                 xor_zero:
@@ -4616,7 +4748,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 reg = ((modrm >> 3) & 7) | rex_r;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
@@ -4655,7 +4787,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     s->rip_offset = 1;
                 else
                     s->rip_offset = insn_const_size(ot);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = rm;
@@ -4698,7 +4830,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod != 3) {
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -4906,7 +5038,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             }
         }
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op >= 2 && op != 3 && op != 5)
                 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
@@ -4946,6 +5078,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         do_lcall:
             if (s->pe && !s->vm86) {
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4973,6 +5106,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         do_ljmp:
             if (s->pe && !s->vm86) {
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4998,7 +5132,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_v_reg(ot, cpu_T[1], reg);
         gen_op_testl_T0_T1_cc();
         set_cc_op(s, CC_OP_LOGICB + ot);
@@ -5073,7 +5207,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             s->rip_offset = insn_const_size(ot);
         else if (b == 0x6b)
             s->rip_offset = 1;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
             tcg_gen_movi_tl(cpu_T[1], val);
@@ -5130,7 +5264,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
             tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -5159,7 +5293,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_mov_v_reg(ot, t0, rm);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_mov_tl(a0, cpu_A0);
                 gen_op_ld_v(s, ot, t0, a0);
                 rm = 0; /* avoid warning */
@@ -5207,16 +5341,16 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
-#endif
+#endif
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
         set_cc_op(s, CC_OP_EFLAGS);
@@ -5266,7 +5400,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             s->popl_esp_hack = 0;
             gen_pop_update(s, ot);
         }
@@ -5352,7 +5486,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = ((modrm >> 3) & 7) | rex_r;

         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0xc6:
     case 0xc7: /* mov Ev, Iv */
@@ -5361,7 +5495,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod != 3) {
             s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
         }
         val = insn_get(env, s, ot);
         tcg_gen_movi_tl(cpu_T[0], val);
@@ -5377,7 +5511,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         break;
     case 0x8e: /* mov seg, Gv */
@@ -5385,7 +5519,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
-        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
         gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
         if (reg == R_SS) {
             /* if reg == SS, inhibit interrupts/trace */
@@ -5408,7 +5542,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         gen_op_movl_T0_seg(reg);
         ot = mod == 3 ? dflag : MO_16;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
         break;

     case 0x1b6: /* movzbS Gv, Eb */
@@ -5450,7 +5584,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -5468,7 +5602,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         s->override = -1;
         val = s->addseg;
         s->addseg = 0;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         s->addseg = val;
         gen_op_mov_reg_v(ot, reg, cpu_A0);
         break;
@@ -5558,7 +5692,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
@@ -5593,7 +5727,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
@@ -5624,7 +5758,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 if (shift == 2) {
                     s->rip_offset = 1;
                 }
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = (modrm & 7) | REX_B(s);
@@ -5674,7 +5808,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             opreg = OR_TMP0;
         } else {
             opreg = rm;
@@ -5705,7 +5839,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
         if (mod != 3) {
             /* memory op */
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             switch(op) {
             case 0x00 ... 0x07: /* fxxxs */
             case 0x10 ... 0x17: /* fixxxl */
@@ -5832,7 +5966,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x0c: /* fldenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fldenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0d: /* fldcw mem */
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
@@ -5841,8 +5977,11 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 break;
             case 0x0e: /* fnstenv mem */
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fstenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0f: /* fnstcw mem */
                 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
@@ -5863,12 +6002,17 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
             case 0x2c: /* frstor mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_frstor(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2e: /* fnsave mem */
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fsave(cpu_env, cpu_A0,
+                                 tcg_const_i32(dflag == MO_32),
+                                 tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2f: /* fnstsw mem */
                 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
@@ -6209,6 +6353,12 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 goto illegal_op;
             }
         }
+        if (instr_is_x87_nc(modrm, b)) {
+            fp_op = ((b & 0x7) << 8) | (modrm & 0xff);
+            fp_ip = pc_start - s->cs_base;
+            fp_cs = env->segs[R_CS].selector;
+            set_ep(s, fp_op, fp_ip, fp_cs);
+        }
         break;
         /************************/
         /* string ops */
@@ -6393,6 +6543,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     do_lret:
         if (s->pe && !s->vm86) {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(val));
@@ -6430,6 +6581,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             }
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
@@ -6527,7 +6679,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = cpu_ldub_code(env, s->pc++);
         gen_setcc1(s, b, cpu_T[0]);
-        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1, b);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         if (!(s->cpuid_features & CPUID_CMOV)) {
@@ -6657,7 +6809,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         if (mod != 3) {
             s->rip_offset = 1;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -6688,7 +6840,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* specific case: we need to add a displacement */
             gen_exts(ot, cpu_T[1]);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
@@ -6764,7 +6916,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_extu(ot, cpu_T[0]);

         /* Note that lzcnt and tzcnt are in different extensions.  */
@@ -6884,6 +7036,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_fwait(cpu_env);
         }
@@ -6903,6 +7056,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (CODE64(s))
             goto illegal_op;
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(pc_start - s->cs_base);
         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
         break;
@@ -6967,7 +7121,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod == 3)
             goto illegal_op;
         gen_op_mov_v_reg(ot, cpu_T[0], reg);
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         if (ot == MO_16) {
@@ -7095,6 +7249,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
             gen_eob(s);
@@ -7104,6 +7259,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x105: /* syscall */
         /* XXX: is it usable in real mode ? */
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(pc_start - s->cs_base);
         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
         gen_eob(s);
@@ -7113,6 +7269,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
             /* condition codes are modified only in long mode */
@@ -7133,6 +7290,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->is_jmp = DISAS_TB_JUMP;
@@ -7149,7 +7307,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 2: /* lldt */
             if (!s->pe || s->vm86)
@@ -7158,7 +7316,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
@@ -7170,7 +7328,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,tr.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 3: /* ltr */
             if (!s->pe || s->vm86)
@@ -7179,7 +7337,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
@@ -7189,7 +7347,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 5: /* verw */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             gen_update_cc_op(s);
             if (op == 4) {
                 gen_helper_verr(cpu_env, cpu_T[0]);
@@ -7212,7 +7370,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if (mod == 3)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
gdt.limit));
             gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
             gen_add_A0_im(s, 2);
@@ -7241,6 +7399,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                         s->cpl != 0)
                         goto illegal_op;
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
                     gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
                     gen_eob(s);
@@ -7268,7 +7427,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
             } else { /* sidt */
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State, idt.limit));
                 gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
                 gen_add_A0_im(s, 2);
@@ -7371,7 +7530,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             } else {
                 gen_svm_check_intercept(s, pc_start,
                                         op==2 ? SVM_EXIT_GDTR_WRITE :
SVM_EXIT_IDTR_WRITE);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
                 gen_add_A0_im(s, 2);
                 gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
@@ -7394,14 +7553,14 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
 #else
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
 #endif
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1, b);
             break;
         case 6: /* lmsw */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_helper_lmsw(cpu_env, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
@@ -7413,8 +7572,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 } else {
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_helper_invlpg(cpu_env, cpu_A0);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
@@ -7446,6 +7606,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
                         goto illegal_op;
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
                     if (use_icount)
                         gen_io_start();
@@ -7493,7 +7654,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -7514,7 +7675,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, ot, t0, cpu_A0);
                 a0 = tcg_temp_local_new();
                 tcg_gen_mov_tl(a0, cpu_A0);
@@ -7556,7 +7717,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = dflag != MO_16 ? MO_32 : MO_16;
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
@@ -7584,7 +7745,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
@@ -7624,6 +7785,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 4:
             case 8:
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 if (b & 2) {
                     gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -7696,7 +7858,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         reg = ((modrm >> 3) & 7) | rex_r;
         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0x1ae:
         modrm = cpu_ldub_code(env, s->pc++);
@@ -7704,6 +7866,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         op = (modrm >> 3) & 7;
         switch(op) {
         case 0: /* fxsave */
+            gen_update_ep(s);
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
                 (s->prefix & PREFIX_LOCK))
                 goto illegal_op;
@@ -7711,10 +7874,13 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxsave(cpu_env, cpu_A0,
+                              tcg_const_i32(dflag == MO_32),
+                              tcg_const_i32(dflag == MO_64));
             break;
         case 1: /* fxrstor */
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
@@ -7724,10 +7890,12 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxrstor(cpu_env, cpu_A0,
+                               tcg_const_i32(dflag == MO_32),
+                               tcg_const_i32(dflag == MO_64));
             break;
         case 2: /* ldmxcsr */
         case 3: /* stmxcsr */
@@ -7738,7 +7906,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
                 mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op == 2) {
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
                                     s->mem_index, MO_LEUL);
@@ -7763,7 +7931,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 /* clflush */
                 if (!(s->cpuid_features & CPUID_CLFLUSH))
                     goto illegal_op;
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
             }
             break;
         default:
@@ -7775,7 +7943,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         /* ignore for now */
         break;
     case 0x1aa: /* rsm */
@@ -7783,6 +7951,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (!(s->flags & HF_SMM_MASK))
             goto illegal_op;
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(s->pc - s->cs_base);
         gen_helper_rsm(cpu_env);
         gen_eob(s);
@@ -7803,7 +7972,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = mo_64_32(dflag);
         }

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);

@@ -7880,6 +8049,17 @@ void optimize_flags_init(void)
     cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
                                      "cc_src2");

+    cpu_fpop = tcg_global_mem_new_i32(TCG_AREG0,
+                                      offsetof(CPUX86State, fpop), "fpop");
+    cpu_fpip = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpip),
+                                     "fpip");
+    cpu_fpdp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpdp),
+                                     "fpdp");
+    cpu_fpds = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpds),
+                                     "fpds");
+    cpu_fpcs = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpcs),
+                                     "fpcs");
+
     for (i = 0; i < CPU_NB_REGS; ++i) {
         cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
                                          offsetof(CPUX86State, regs[i]),
@@ -7924,6 +8104,8 @@ static inline void
gen_intermediate_code_internal(X86CPU *cpu,
     dc->singlestep_enabled = cs->singlestep_enabled;
     dc->cc_op = CC_OP_DYNAMIC;
     dc->cc_op_dirty = false;
+    dc->fp_op = FP_EP_INVALID;
+    dc->fp_ep_dirty = false;
     dc->cs_base = cs_base;
     dc->tb = tb;
     dc->popl_esp_hack = 0;
@@ -7997,6 +8179,9 @@ static inline void
gen_intermediate_code_internal(X86CPU *cpu,
             }
             tcg_ctx.gen_opc_pc[lj] = pc_ptr;
             gen_opc_cc_op[lj] = dc->cc_op;
+            gen_opc_fp_op[lj] = dc->fp_op;
+            gen_opc_fp_ip[lj] = dc->fp_ip;
+            gen_opc_fp_cs[lj] = dc->fp_cs;
             tcg_ctx.gen_opc_instr_start[lj] = 1;
             tcg_ctx.gen_opc_icount[lj] = num_insns;
         }
@@ -8080,6 +8265,7 @@ void gen_intermediate_code_pc(CPUX86State *env,
TranslationBlock *tb)
 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, int pc_pos)
 {
     int cc_op;
+    uint16_t fp_op;
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
         int i;
@@ -8099,4 +8285,10 @@ void restore_state_to_opc(CPUX86State *env,
TranslationBlock *tb, int pc_pos)
     cc_op = gen_opc_cc_op[pc_pos];
     if (cc_op != CC_OP_DYNAMIC)
         env->cc_op = cc_op;
+    fp_op = gen_opc_fp_op[pc_pos];
+    if (fp_op & FP_EP_VALID) {
+        tcg_gen_movi_i32(cpu_fpop, fp_op);
+        tcg_gen_movi_tl(cpu_fpip, gen_opc_fp_ip[pc_pos]);
+        tcg_gen_movi_i32(cpu_fpcs, gen_opc_fp_cs[pc_pos]);
+    }
 }

On Sun, Jun 22, 2014 at 9:17 PM, Jaume Martí <address@hidden> wrote:
> Thanks Richard for your feedback. I am going to correct the patch and
> resubmit it.
>
> Best regards,
> Jaume
>
> On Sun, Jun 22, 2014 at 8:55 PM, Richard Henderson <address@hidden> wrote:
>> On 06/22/2014 07:55 AM, Jaume Martí wrote:
>>> -        cpu_x86_fsave(env, fpstate_addr, 1);
>>> -        fpstate->status = fpstate->sw;
>>> -        magic = 0xffff;
>>> +    cpu_x86_fsave(env, fpstate_addr);
>>> +    fpstate->status = fpstate->sw;
>>> +    magic = 0xffff;
>>
>> This patch needs to be split into format fixes and the actual change to be
>> reviewed.
>>
>>> -    /* KVM-only so far */
>>> -    uint16_t fpop;
>>> +    union {
>>> +        uint32_t tcg;
>>> +        uint16_t kvm;
>>> +    } fpop;
>>
>> This is highly questionable.
>>
>>>      .fields = (VMStateField[]) {
>>> -        VMSTATE_UINT16(env.fpop, X86CPU),
>>> +        VMSTATE_UINT16(env.fpop.kvm, X86CPU),
>>
>> You're breaking save/restore in tcg.  KVM is not required for migration.
>>
>>> +        if (non_control_x87_instr(modrm, b)) {
>>> +            tcg_gen_movi_i32(cpu_fpop, ((b & 0x7) << 8) | (modrm & 0xff));
>>> +            tcg_gen_movi_tl(cpu_fpip, pc_start - s->cs_base);
>>> +            tcg_gen_movi_i32(cpu_fpcs, env->segs[R_CS].selector);
>>> +        }
>>
>> I strongly suspect you can implement this feature without having to add 3
>> (largely redundant) register writes to every x87 instruction executed.
>>
>> See how restore_state_to_opc works to compute the value of CC_OP during
>> translation.  You can do the same thing to recover these three values.
>>
>> You do have to sync these values before normal exits from the TB, but you 
>> only
>> have to do that once, not once for every insn executed.  See 
>> gen_update_cc_op.
>>
>>
>> r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]