[PATCH v2 06/37] target/i386: add ALU load/writeback core

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 06/37] target/i386: add ALU load/writeback core

From:	Paolo Bonzini
Subject:	[PATCH v2 06/37] target/i386: add ALU load/writeback core
Date:	Tue, 20 Sep 2022 19:24:36 +0200

Add generic code generation that takes care of preparing operands
around calls to decode.e.gen in a table-driven manner, so that ALU
operations need not take care of that.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/decode-new.c.inc |  33 ++++++-
 target/i386/tcg/decode-new.h     |   7 ++
 target/i386/tcg/emit.c.inc       | 155 +++++++++++++++++++++++++++++++
 target/i386/tcg/translate.c      |  18 ++++
 4 files changed, 212 insertions(+), 1 deletion(-)

diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index a908e8b086..be4e5705ed 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -513,6 +513,20 @@ static bool decode_insn(DisasContext *s, CPUX86State *env, 
X86DecodeFunc decode_
     return true;
 }
 
+static void decode_temp_free(X86DecodedOp *op)
+{
+    if (op->v_ptr) {
+        tcg_temp_free_ptr(op->v_ptr);
+    }
+}
+
+static void decode_temps_free(X86DecodedInsn *decode)
+{
+    decode_temp_free(&decode->op[0]);
+    decode_temp_free(&decode->op[1]);
+    decode_temp_free(&decode->op[2]);
+}
+
 /*
  * Convert one instruction. s->base.is_jmp is set if the translation must
  * be stopped.
@@ -738,7 +752,24 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, 
int b)
     if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
         gen_load_ea(s, &decode.mem);
     }
-    decode.e.gen(s, env, &decode);
+    if (s->prefix & PREFIX_LOCK) {
+        if (decode.op[0].unit != X86_OP_INT || !decode.op[0].has_ea) {
+            goto illegal_op;
+        }
+        gen_load(s, &decode, 2, s->T1);
+        decode.e.gen(s, env, &decode);
+    } else {
+        if (decode.op[0].unit == X86_OP_MMX) {
+            compute_mmx_offset(&decode.op[0]);
+        } else if (decode.op[0].unit == X86_OP_SSE) {
+            compute_xmm_offset(&decode.op[0]);
+        }
+        gen_load(s, &decode, 1, s->T0);
+        gen_load(s, &decode, 2, s->T1);
+        decode.e.gen(s, env, &decode);
+        gen_writeback(s, &decode, 0, s->T0);
+    }
+    decode_temps_free(&decode);
     return;
  illegal_op:
     gen_illegal_opcode(s);
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 2f22d4d22e..3a856b48e7 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -168,6 +168,13 @@ typedef struct X86DecodedOp {
     MemOp ot;     /* For b/c/d/p/s/q/v/w/y/z */
     X86OpUnit unit;
     bool has_ea;
+    int offset;   /* For MMX and SSE */
+
+    /*
+     * This field is used internally by macros OP0_PTR/OP1_PTR/OP2_PTR,
+     * do not access directly!
+     */
+    TCGv_ptr v_ptr;
 } X86DecodedOp;
 
 struct X86DecodedInsn {
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index e86364ffc1..8f60658537 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -29,3 +29,158 @@ static void gen_load_ea(DisasContext *s, AddressParts *mem)
     TCGv ea = gen_lea_modrm_1(s, *mem);
     gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override);
 }
+
+static inline int mmx_offset(MemOp ot)
+{
+    switch (ot) {
+    case MO_8:
+        return offsetof(MMXReg, MMX_B(0));
+    case MO_16:
+        return offsetof(MMXReg, MMX_W(0));
+    case MO_32:
+        return offsetof(MMXReg, MMX_L(0));
+    case MO_64:
+        return offsetof(MMXReg, MMX_Q(0));
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static inline int xmm_offset(MemOp ot)
+{
+    switch (ot) {
+    case MO_8:
+        return offsetof(ZMMReg, ZMM_B(0));
+    case MO_16:
+        return offsetof(ZMMReg, ZMM_W(0));
+    case MO_32:
+        return offsetof(ZMMReg, ZMM_L(0));
+    case MO_64:
+        return offsetof(ZMMReg, ZMM_Q(0));
+    case MO_128:
+        return offsetof(ZMMReg, ZMM_X(0));
+    case MO_256:
+        return offsetof(ZMMReg, ZMM_Y(0));
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void compute_mmx_offset(X86DecodedOp *op)
+{
+    if (!op->has_ea) {
+        op->offset = offsetof(CPUX86State, fpregs[op->n].mmx) + 
mmx_offset(op->ot);
+    } else {
+        op->offset = offsetof(CPUX86State, mmx_t0) + mmx_offset(op->ot);
+    }
+}
+
+static void compute_xmm_offset(X86DecodedOp *op)
+{
+    if (!op->has_ea) {
+        op->offset = ZMM_OFFSET(op->n) + xmm_offset(op->ot);
+    } else {
+        op->offset = offsetof(CPUX86State, xmm_t0) + xmm_offset(op->ot);
+    }
+}
+
+static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_ofs, 
bool aligned)
+{
+    if (ot == MO_8) {
+        gen_op_ld_v(s, MO_8, temp, s->A0);
+        tcg_gen_st8_tl(temp, cpu_env, dest_ofs);
+    } else if (ot == MO_16) {
+        gen_op_ld_v(s, MO_16, temp, s->A0);
+        tcg_gen_st16_tl(temp, cpu_env, dest_ofs);
+    } else if (ot == MO_32) {
+        gen_op_ld_v(s, MO_32, temp, s->A0);
+        tcg_gen_st32_tl(temp, cpu_env, dest_ofs);
+    } else if (ot == MO_64) {
+        gen_ldq_env_A0(s, dest_ofs);
+    } else if (ot == MO_128) {
+        gen_ldo_env_A0(s, dest_ofs, aligned);
+    } else if (ot == MO_256) {
+        gen_ldy_env_A0(s, dest_ofs, aligned);
+    }
+}
+
+static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
+{
+    X86DecodedOp *op = &decode->op[opn];
+
+    switch (op->unit) {
+    case X86_OP_SKIP:
+        return;
+    case X86_OP_SEG:
+        tcg_gen_ld32u_tl(v, cpu_env,
+                         offsetof(CPUX86State,segs[op->n].selector));
+        break;
+    case X86_OP_CR:
+        tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, cr[op->n]));
+        break;
+    case X86_OP_DR:
+        tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, dr[op->n]));
+        break;
+    case X86_OP_INT:
+        if (op->has_ea) {
+            gen_op_ld_v(s, op->ot, v, s->A0);
+        } else {
+            gen_op_mov_v_reg(s, op->ot, v, op->n);
+        }
+        break;
+    case X86_OP_IMM:
+        tcg_gen_movi_tl(v, decode->immediate);
+        break;
+
+    case X86_OP_MMX:
+        compute_mmx_offset(op);
+        goto load_vector;
+
+    case X86_OP_SSE:
+        compute_xmm_offset(op);
+    load_vector:
+        if (op->has_ea) {
+            gen_load_sse(s, v, op->ot, op->offset, true);
+        }
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, 
TCGv v)
+{
+    X86DecodedOp *op = &decode->op[opn];
+    switch (op->unit) {
+    case X86_OP_SKIP:
+        break;
+    case X86_OP_SEG:
+        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
+        gen_movl_seg_T0(s, op->n);
+        if (s->base.is_jmp) {
+            gen_jmp_im(s, s->pc - s->cs_base);
+            if (op->n == R_SS) {
+                s->flags &= ~HF_TF_MASK;
+                gen_eob_inhibit_irq(s, true);
+            } else {
+                gen_eob(s);
+            }
+        }
+        break;
+    case X86_OP_INT:
+        if (op->has_ea) {
+            gen_op_st_v(s, op->ot, v, s->A0);
+        } else {
+            gen_op_mov_reg_v(s, op->ot, op->n, v);
+        }
+        break;
+    case X86_OP_MMX:
+    case X86_OP_SSE:
+        break;
+    case X86_OP_CR:
+    case X86_OP_DR:
+    default:
+        g_assert_not_reached();
+    }
+}
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index ad14f56a31..e6e82f32cb 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2832,6 +2832,24 @@ static inline void gen_sto_env_A0(DisasContext *s, int 
offset, bool align)
     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
 }
 
+static void gen_ldy_env_A0(DisasContext *s, int offset, bool align)
+{
+    int mem_index = s->mem_index;
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
+                        MO_LEUQ | (align ? MO_ALIGN_32 : 0));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(0)));
+    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(1)));
+
+    tcg_gen_addi_tl(s->tmp0, s->A0, 16);
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(2)));
+    tcg_gen_addi_tl(s->tmp0, s->A0, 24);
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(3)));
+}
+
 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
 {
     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, 
XMM_Q(0)));
-- 
2.37.2

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH v2 00/37] target/i386: new decoder + AVX implementation, Paolo Bonzini, 2022/09/20
- [PATCH v2 01/37] target/i386: Define XMMReg and access macros, align ZMM registers, Paolo Bonzini, 2022/09/20
- [PATCH v2 05/37] target/i386: add core of new i386 decoder, Paolo Bonzini, 2022/09/20
  - Re: [PATCH v2 05/37] target/i386: add core of new i386 decoder, Richard Henderson, 2022/09/24
- [PATCH v2 07/37] target/i386: add CPUID[EAX=7, ECX=0].ECX to DisasContext, Paolo Bonzini, 2022/09/20
- [PATCH v2 06/37] target/i386: add ALU load/writeback core, Paolo Bonzini <=
  - Re: [PATCH v2 06/37] target/i386: add ALU load/writeback core, Richard Henderson, 2022/09/24
- [PATCH v2 25/37] target/i386: clarify (un)signedness of immediates from 0F3Ah opcodes, Paolo Bonzini, 2022/09/20
  - Re: [PATCH v2 25/37] target/i386: clarify (un)signedness of immediates from 0F3Ah opcodes, Richard Henderson, 2022/09/24
- [PATCH v2 02/37] target/i386: make ldo/sto operations consistent with ldq, Paolo Bonzini, 2022/09/20
  - Re: [PATCH v2 02/37] target/i386: make ldo/sto operations consistent with ldq, Richard Henderson, 2022/09/24
- [PATCH v2 08/37] target/i386: add CPUID feature checks to new decoder, Paolo Bonzini, 2022/09/20
- [PATCH v2 22/37] target/i386: reimplement 0x0f 0x78-0x7f, add AVX, Paolo Bonzini, 2022/09/20
  - Re: [PATCH v2 22/37] target/i386: reimplement 0x0f 0x78-0x7f, add AVX, Richard Henderson, 2022/09/24
    - Re: [PATCH v2 22/37] target/i386: reimplement 0x0f 0x78-0x7f, add AVX, Paolo Bonzini, 2022/09/26
    - Re: [PATCH v2 22/37] target/i386: reimplement 0x0f 0x78-0x7f, add AVX, Richard Henderson, 2022/09/26

Prev by Date: [PATCH v2 07/37] target/i386: add CPUID[EAX=7, ECX=0].ECX to DisasContext
Next by Date: [PATCH v2 25/37] target/i386: clarify (un)signedness of immediates from 0F3Ah opcodes
Previous by thread: [PATCH v2 07/37] target/i386: add CPUID[EAX=7, ECX=0].ECX to DisasContext
Next by thread: Re: [PATCH v2 06/37] target/i386: add ALU load/writeback core
Index(es):
- Date
- Thread