qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [4180] 3DNow! instruction set emulation


From: Fabrice Bellard
Subject: Re: [Qemu-devel] [4180] 3DNow! instruction set emulation
Date: Tue, 08 Apr 2008 23:52:52 +0200
User-agent: Thunderbird 1.5.0.9 (X11/20070212)

Hi,

Without a proper CPUID test for 3Dnow instruction support, this patch is
incomplete to say the least because it adds 3Dnow instructions for all CPUs.

Fabrice.

Aurelien Jarno wrote:
> Revision: 4180
>           http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4180
> Author:   aurel32
> Date:     2008-04-08 19:51:29 +0000 (Tue, 08 Apr 2008)
> 
> Log Message:
> -----------
> 3DNow! instruction set emulation
> 
> (Michael Tross)
> 
> Modified Paths:
> --------------
>     trunk/target-i386/cpu.h
>     trunk/target-i386/helper2.c
>     trunk/target-i386/ops_sse.h
>     trunk/target-i386/translate.c
> 
> Modified: trunk/target-i386/cpu.h
> ===================================================================
> --- trunk/target-i386/cpu.h   2008-04-08 19:51:21 UTC (rev 4179)
> +++ trunk/target-i386/cpu.h   2008-04-08 19:51:29 UTC (rev 4180)
> @@ -428,8 +428,9 @@
>  
>  typedef union {
>      uint8_t _b[8];
> -    uint16_t _w[2];
> -    uint32_t _l[1];
> +    uint16_t _w[4];
> +    uint32_t _l[2];
> +    float32 _s[2];
>      uint64_t q;
>  } MMXReg;
>  
> @@ -444,6 +445,7 @@
>  #define MMX_B(n) _b[7 - (n)]
>  #define MMX_W(n) _w[3 - (n)]
>  #define MMX_L(n) _l[1 - (n)]
> +#define MMX_S(n) _s[1 - (n)]
>  #else
>  #define XMM_B(n) _b[n]
>  #define XMM_W(n) _w[n]
> @@ -455,6 +457,7 @@
>  #define MMX_B(n) _b[n]
>  #define MMX_W(n) _w[n]
>  #define MMX_L(n) _l[n]
> +#define MMX_S(n) _s[n]
>  #endif
>  #define MMX_Q(n) q
>  
> @@ -520,6 +523,7 @@
>          int64_t i64;
>      } fp_convert;
>  
> +    float_status mmx_status; /* for 3DNow! float ops */
>      float_status sse_status;
>      uint32_t mxcsr;
>      XMMReg xmm_regs[CPU_NB_REGS];
> 
> Modified: trunk/target-i386/helper2.c
> ===================================================================
> --- trunk/target-i386/helper2.c       2008-04-08 19:51:21 UTC (rev 4179)
> +++ trunk/target-i386/helper2.c       2008-04-08 19:51:29 UTC (rev 4180)
> @@ -150,7 +150,8 @@
>              CPUID_PSE36,
>          .ext_features = CPUID_EXT_SSE3,
>          .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | 
> -            CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
> +            CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX |
> +            CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
>          .ext3_features = CPUID_EXT3_SVM,
>          .xlevel = 0x8000000A,
>      },
> @@ -201,6 +202,19 @@
>          .features = 0x0383F9FF,
>          .xlevel = 0,
>      },
> +    {
> +        .name = "athlon",
> +        .level = 2,
> +        .vendor1 = 0x68747541, /* "Auth" */
> +        .vendor2 = 0x69746e65, /* "enti" */
> +        .vendor3 = 0x444d4163, /* "cAMD" */
> +        .family = 6,
> +        .model = 2,
> +        .stepping = 3,
> +        .features = PPRO_FEATURES | PPRO_FEATURES | CPUID_PSE36 | CPUID_VME 
> | CPUID_MTRR | CPUID_MCA,
> +        .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | CPUID_EXT2_MMXEXT | 
> CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
> +        .xlevel = 0x80000008,
> +    },
>  };
>  
>  static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char 
> *cpu_model)
> 
> Modified: trunk/target-i386/ops_sse.h
> ===================================================================
> --- trunk/target-i386/ops_sse.h       2008-04-08 19:51:21 UTC (rev 4179)
> +++ trunk/target-i386/ops_sse.h       2008-04-08 19:51:29 UTC (rev 4180)
> @@ -1,5 +1,5 @@
>  /*
> - *  MMX/SSE/SSE2/PNI support
> + *  MMX/3DNow!/SSE/SSE2/SSE3/PNI support
>   *
>   *  Copyright (c) 2005 Fabrice Bellard
>   *
> @@ -409,6 +409,7 @@
>  #define FCMPEQ(a, b) (a) == (b) ? -1 : 0
>  
>  #define FMULLW(a, b) (a) * (b)
> +#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16
>  #define FMULHUW(a, b) (a) * (b) >> 16
>  #define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16
>  
> @@ -455,6 +456,9 @@
>  SSE_OP_L(op_pcmpeql, FCMPEQ)
>  
>  SSE_OP_W(op_pmullw, FMULLW)
> +#if SHIFT == 0
> +SSE_OP_W(op_pmulhrw, FMULHRW)
> +#endif
>  SSE_OP_W(op_pmulhuw, FMULHUW)
>  SSE_OP_W(op_pmulhw, FMULHW)
>  
> @@ -1383,6 +1387,175 @@
>  UNPCK_OP(l, 0)
>  UNPCK_OP(h, 1)
>  
> +/* 3DNow! float ops */
> +#if SHIFT == 0
> +void OPPROTO op_pi2fd(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status);
> +    d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pi2fw(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status);
> +    d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pf2id(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), 
> &env->mmx_status);
> +    d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), 
> &env->mmx_status);
> +}
> +
> +void OPPROTO op_pf2iw(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), 
> &env->mmx_status));
> +    d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), 
> &env->mmx_status));
> +}
> +
> +void OPPROTO op_pfacc(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    MMXReg r;
> +    r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
> +    r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
> +    *d = r;
> +}
> +
> +void OPPROTO op_pfadd(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
> +    d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pfcmpeq(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? 
> -1 : 0;
> +    d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? 
> -1 : 0;
> +}
> +
> +void OPPROTO op_pfcmpge(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? 
> -1 : 0;
> +    d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? 
> -1 : 0;
> +}
> +
> +void OPPROTO op_pfcmpgt(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? 
> -1 : 0;
> +    d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? 
> -1 : 0;
> +}
> +
> +void OPPROTO op_pfmax(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status))
> +        d->MMX_S(0) = s->MMX_S(0);
> +    if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status))
> +        d->MMX_S(1) = s->MMX_S(1);
> +}
> +
> +void OPPROTO op_pfmin(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status))
> +        d->MMX_S(0) = s->MMX_S(0);
> +    if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status))
> +        d->MMX_S(1) = s->MMX_S(1);
> +}
> +
> +void OPPROTO op_pfmul(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
> +    d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pfnacc(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    MMXReg r;
> +    r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
> +    r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
> +    *d = r;
> +}
> +
> +void OPPROTO op_pfpnacc(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    MMXReg r;
> +    r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
> +    r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
> +    *d = r;
> +}
> +
> +void OPPROTO op_pfrcp(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = approx_rcp(s->MMX_S(0));
> +    d->MMX_S(1) = d->MMX_S(0);
> +}
> +
> +void OPPROTO op_pfrsqrt(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
> +    d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
> +    d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
> +    d->MMX_L(0) = d->MMX_L(1);
> +}
> +
> +void OPPROTO op_pfsub(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
> +    d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pfsubr(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status);
> +    d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pswapd(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    MMXReg r;
> +    r.MMX_L(0) = s->MMX_L(1);
> +    r.MMX_L(1) = s->MMX_L(0);
> +    *d = r;
> +}
> +#endif
> +
>  #undef SHIFT
>  #undef XMM_ONLY
>  #undef Reg
> 
> Modified: trunk/target-i386/translate.c
> ===================================================================
> --- trunk/target-i386/translate.c     2008-04-08 19:51:21 UTC (rev 4179)
> +++ trunk/target-i386/translate.c     2008-04-08 19:51:29 UTC (rev 4180)
> @@ -2408,12 +2408,16 @@
>  };
>  
>  #define SSE_SPECIAL ((GenOpFunc2 *)1)
> +#define SSE_DUMMY ((GenOpFunc2 *)2)
>  
>  #define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm }
>  #define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \
>                       gen_op_ ## x ## ss, gen_op_ ## x ## sd, }
>  
>  static GenOpFunc2 *sse_op_table1[256][4] = {
> +    /* 3DNow! extensions */
> +    [0x0e] = { SSE_DUMMY }, /* femms */
> +    [0x0f] = { SSE_DUMMY }, /* pf... */
>      /* pure SSE operations */
>      [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* 
> movups, movupd, movss, movsd */
>      [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* 
> movups, movupd, movss, movsd */
> @@ -2480,7 +2484,7 @@
>      [0x74] = MMX_OP2(pcmpeqb),
>      [0x75] = MMX_OP2(pcmpeqw),
>      [0x76] = MMX_OP2(pcmpeql),
> -    [0x77] = { SSE_SPECIAL }, /* emms */
> +    [0x77] = { SSE_DUMMY }, /* emms */
>      [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps },
>      [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps },
>      [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , 
> movq */
> @@ -2577,6 +2581,33 @@
>      SSE_FOP(cmpord),
>  };
>  
> +static GenOpFunc2 *sse_op_table5[256] = {
> +    [0x0c] = gen_op_pi2fw,
> +    [0x0d] = gen_op_pi2fd,
> +    [0x1c] = gen_op_pf2iw,
> +    [0x1d] = gen_op_pf2id,
> +    [0x8a] = gen_op_pfnacc,
> +    [0x8e] = gen_op_pfpnacc,
> +    [0x90] = gen_op_pfcmpge,
> +    [0x94] = gen_op_pfmin,
> +    [0x96] = gen_op_pfrcp,
> +    [0x97] = gen_op_pfrsqrt,
> +    [0x9a] = gen_op_pfsub,
> +    [0x9e] = gen_op_pfadd,
> +    [0xa0] = gen_op_pfcmpgt,
> +    [0xa4] = gen_op_pfmax,
> +    [0xa6] = gen_op_movq, /* pfrcpit1; no need to actually increase 
> precision */
> +    [0xa7] = gen_op_movq, /* pfrsqit1 */
> +    [0xaa] = gen_op_pfsubr,
> +    [0xae] = gen_op_pfacc,
> +    [0xb0] = gen_op_pfcmpeq,
> +    [0xb4] = gen_op_pfmul,
> +    [0xb6] = gen_op_movq, /* pfrcpit2 */
> +    [0xb7] = gen_op_pmulhrw_mmx,
> +    [0xbb] = gen_op_pswapd,
> +    [0xbf] = gen_op_pavgb_mmx /* pavgusb */
> +};
> +
>  static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
>  {
>      int b1, op1_offset, op2_offset, is_xmm, val, ot;
> @@ -2596,7 +2627,7 @@
>      sse_op2 = sse_op_table1[b][b1];
>      if (!sse_op2)
>          goto illegal_op;
> -    if (b <= 0x5f || b == 0xc6 || b == 0xc2) {
> +    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
>          is_xmm = 1;
>      } else {
>          if (b1 == 0) {
> @@ -2618,8 +2649,8 @@
>      }
>      if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
>          goto illegal_op;
> -    if (b == 0x77) {
> -        /* emms */
> +    if (b == 0x77 || b == 0x0e) {
> +        /* emms or femms */
>          gen_op_emms();
>          return;
>      }
> @@ -3151,6 +3182,13 @@
>              }
>          }
>          switch(b) {
> +        case 0x0f: /* 3DNow! data insns */
> +            val = ldub_code(s->pc++);
> +            sse_op2 = sse_op_table5[val];
> +            if (!sse_op2)
> +                goto illegal_op;
> +            sse_op2(op1_offset, op2_offset);
> +            break;
>          case 0x70: /* pshufx insn */
>          case 0xc6: /* pshufx insn */
>              val = ldub_code(s->pc++);
> @@ -6148,7 +6186,7 @@
>              gen_eob(s);
>          }
>          break;
> -    /* MMX/SSE/SSE2/PNI support */
> +    /* MMX/3DNow!/SSE/SSE2/SSE3 support */
>      case 0x1c3: /* MOVNTI reg, mem */
>          if (!(s->cpuid_features & CPUID_SSE2))
>              goto illegal_op;
> @@ -6214,6 +6252,7 @@
>          case 7: /* sfence / clflush */
>              if ((modrm & 0xc7) == 0xc0) {
>                  /* sfence */
> +                /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX 
> */
>                  if (!(s->cpuid_features & CPUID_SSE))
>                      goto illegal_op;
>              } else {
> @@ -6227,8 +6266,11 @@
>              goto illegal_op;
>          }
>          break;
> -    case 0x10d: /* prefetch */
> +    case 0x10d: /* 3DNow! prefetch(w) */
>          modrm = ldub_code(s->pc++);
> +        mod = (modrm >> 6) & 3;
> +        if (mod == 3)
> +            goto illegal_op;
>          gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
>          /* ignore for now */
>          break;
> @@ -6245,6 +6287,9 @@
>          gen_op_rsm();
>          gen_eob(s);
>          break;
> +    case 0x10e ... 0x10f:
> +        /* 3DNow! instructions, ignore prefixes */
> +        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
>      case 0x110 ... 0x117:
>      case 0x128 ... 0x12f:
>      case 0x150 ... 0x177:
> 
> 
> 
> 
> 





reply via email to

[Prev in Thread] Current Thread [Next in Thread]