qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 07/27] arc: TCG instruction definitions


From: Richard Henderson
Subject: Re: [PATCH 07/27] arc: TCG instruction definitions
Date: Wed, 7 Apr 2021 12:38:16 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.7.1

On 4/5/21 7:31 AM, cupertinomiranda@gmail.com wrote:
+void arc_gen_verifyCCFlag(const DisasCtxt *ctx, TCGv ret)

Why "verify"?  I don't see anything that verifies here...

I'll note that this can be done better, if you expose the actual comparison rather than a simple boolean. This could remove 2-3 insns from gen_cc_prologue().

See e.g. disas_jcc and DisasCompare from target/s390x.


+    { MO_UL, MO_UB, MO_UW }, /* non sign-extended */

"non sign-extended" => "zero-extended".

+void arc_gen_no_further_loads_pending(const DisasCtxt *ctx, TCGv ret)
+{
+    /* TODO: To complete on SMP support. */
+    tcg_gen_movi_tl(ret, 1);
+}
+
+void arc_gen_set_debug(const DisasCtxt *ctx, bool value)
+{
+    /* TODO: Could not find a reson to set this. */
+}

What's the point of these within the semantics? It seems like some sort of in-chip debugging thing that tcg should ignore?

+void
+arc_gen_execute_delayslot(DisasCtxt *ctx, TCGv bta, TCGv take_branch)
+{
+    assert(ctx->insn.limm_p == 0 && !ctx->in_delay_slot);
+
+    ctx->in_delay_slot = true;
+    uint32_t cpc = ctx->cpc;
+    uint32_t pcl = ctx->pcl;
+    insn_t insn = ctx->insn;
+
+    ctx->cpc = ctx->npc;
+    ctx->pcl = ctx->cpc & ((target_ulong) 0xfffffffffffffffc);
+
+    ++ctx->ds;
+
+    TCGLabel *do_not_set_bta_and_de = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_NE, take_branch, 1, do_not_set_bta_and_de);
+    /*
+     * In case an exception should be raised during the execution
+     * of delay slot, bta value is used to set erbta.
+     */
+    tcg_gen_mov_tl(cpu_bta, bta);
+    /* We are in a delay slot */
+    tcg_gen_mov_tl(cpu_DEf, take_branch);
+    gen_set_label(do_not_set_bta_and_de);
+
+    tcg_gen_movi_tl(cpu_is_delay_slot_instruction, 1);
+
+    /* Set the pc to the next pc */
+    tcg_gen_movi_tl(cpu_pc, ctx->npc);
+    /* Necessary for the likely call to restore_state_to_opc() */
+    tcg_gen_insn_start(ctx->npc);

This is unlikely to work reliably.
I suspect it does not work at all with icount.

+    ctx->env->enabled_interrupts = false;

Illegal, as mentioned before.

+    /*
+     * In case we might be in a situation where the delayslot is in a
+     * different MMU page. Make a fake exception to interrupt
+     * delayslot execution in the context of the branch.
+     * The delayslot will then be re-executed in isolation after the
+     * branch code has set bta and DEf status flag.
+     */
+    if ((cpc & PAGE_MASK) < 0x80000000 &&
+        (cpc & PAGE_MASK) != (ctx->cpc & PAGE_MASK)) {
+        ctx->in_delay_slot = false;
+        TCGv dpc = tcg_const_local_tl(ctx->npc);
+        tcg_gen_mov_tl(cpu_pc, dpc);
+        gen_helper_fake_exception(cpu_env, dpc);

I think you should *always* execute the delay slot separately. That's the only way the instruction count will be done right.

I'm pretty sure I asked you before to have a look at some of the other targets that implement delay slots for ideas on how to do this correctly.


+void arc_gen_get_bit(TCGv ret, TCGv a, TCGv pos)
+{
+    tcg_gen_rotr_tl(ret, a, pos);
+    tcg_gen_andi_tl(ret, ret, 1);
+}

Should be a plain shift, not a rotate, surely.

+void arc_gen_extract_bits(TCGv ret, TCGv a, TCGv start, TCGv end)
+{
+    TCGv tmp1 = tcg_temp_new();
+
+    tcg_gen_shr_tl(ret, a, end);
+
+    tcg_gen_sub_tl(tmp1, start, end);
+    tcg_gen_addi_tl(tmp1, tmp1, 1);
+    tcg_gen_shlfi_tl(tmp1, 1, tmp1);
+    tcg_gen_subi_tl(tmp1, tmp1, 1);
+
+    tcg_gen_and_tl(ret, ret, tmp1);

Doesn't work for start == 31, end = 0,
due to shift count of 32.

You could rewrite this to

  t = 31 - start;
  ret = a << t;
  t = 31 - end;
  ret = ret >> t;

Amusingly, there's exactly one instance of extractBits that doesn't use constant arguments, and that's in ROR. And there, the extract *would* use constant arguments if the extract was from @dest instead of from lsrc. At which point you could just use tcg_gen_extract_tl.


+TCGv arc_gen_next_reg(const DisasCtxt *ctx, TCGv reg)
+{
+    int i;
+    for (i = 0; i < 64; i += 2) {
+        if (reg == cpu_r[i]) {
+            return cpu_r[i + 1];
+        }
+    }
+    /* Check if REG is an odd register. */
+    for (i = 1; i < 64; i += 2) {
+        /* If so, that is unsanctioned. */
+        if (reg == cpu_r[i]) {
+            arc_gen_excp(ctx, EXCP_INST_ERROR, 0, 0);
+            return NULL;
+        }
+    }

This is really ugly.  Surely you can do something better.

Perhaps not resolving regno to TCGv quite so early, so that it's easy to simply add one and index.

+void arc_gen_verifyCCFlag(const DisasCtxt *ctx, TCGv ret);
+#define getCCFlag(R)    arc_gen_verifyCCFlag(ctx, R)

I wonder if it would be clearer if the ruby translator simply added the context parameter itself, rather than have 99 macros to do the same.

+#define getNFlag(R)     cpu_Nf
+#define setNFlag(ELEM)  tcg_gen_shri_tl(cpu_Nf, ELEM, (TARGET_LONG_BITS - 1))

I'll note that setting of flags happens much more often than checking of flags. Therefore it is a win if the setter does less work than the getter.

That's why we normally store the N and V flags in-place, in the high bit (see arm, s390x, etc). This makes the setter a simple move, and the getter either a shift or TCG_COND_LT.

+#define setZFlag(ELEM)  \
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_Zf, ELEM, 0);

Similarly, the Z flag can be set with a simple move, and the get can use the setcond.

+#define nextInsnAddressAfterDelaySlot(R) \
+  { \
+    ARCCPU *cpu = env_archcpu(ctx->env); \
+    uint16_t delayslot_buffer[2]; \
+    uint8_t delayslot_length; \
+    ctx->env->pc = ctx->cpc; \
+    ctx->env->stat.is_delay_slot_instruction = 1; \

Again, illegal to read or write env.

+#define setPC(NEW_PC)                                   \
+    do {                                                \
+        gen_goto_tb(ctx, 1, NEW_PC);                    \
+        ret = ret == DISAS_NEXT ? DISAS_NORETURN : ret; \
+    } while (0)

Why is this not unconditionally DISAS_NORETURN?
Because gen_goto_tb always exits.

+/*
+ * An enter_s may change code like below:
+ * ----
+ * r13 .. r26 <== shell opcodes
+ * sp <= pc+56
+ * enter_s
+ * ---
+ * It's not that we are promoting these type of instructions.
+ * nevertheless we must be able to emulate them. Hence, once
+ * again: ret = DISAS_UPDATE
+ */
+#define helperEnter(U6)                 \
+    do {                                \
+        gen_helper_enter(cpu_env, U6);  \
+        ret = DISAS_UPDATE;             \
+    } while (0)

Macro not used?

+/* A leave_s may jump to blink, hence the DISAS_UPDATE */
+#define helperLeave(U7)                                           \

Likewise.


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]