qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 2/3] sh: movca.l cancel by ocbi


From: Shin-ichiro KAWASAKI
Subject: [Qemu-devel] [PATCH 2/3] sh: movca.l cancel by ocbi
Date: Sun, 11 Jan 2009 18:14:40 +0900
User-agent: Thunderbird 2.0.0.19 (Windows/20081209)

Current sh4's "movca.l" instruction is implemented in a same way
as "mov.l". Then, write to memory cannot be canceled by "ocbi"
cache control instrunction.  This makes text area broken on
cache flush by linux kernel.

This patch delays "movca.l" execution and provide the chance to 
cancel it for "ocbi".
# Thank you Edgar, for your advice!

This patch does,
- on executing "movca.l", just records what and where movca 
  should store data.
- on executing "ocbi", find the corresponding record by movca,
  and delete it to cancel.
- lets TCG produce "delayed_movca" instruction at the first
  instruction which is neither "movca.l" nor "ocbi".
- on executing "delayed_movca", does the data store task,
  according to the record. 


Signed-off-by: Shin-ichiro KAWASAKI <address@hidden>

Index: trunk/target-sh4/helper.h
===================================================================
--- trunk/target-sh4/helper.h   (revision 6133)
+++ trunk/target-sh4/helper.h   (working copy)
@@ -45,4 +45,8 @@
 DEF_HELPER_1(ftrc_FT, i32, i32)
 DEF_HELPER_1(ftrc_DT, i32, i64)
 
+DEF_HELPER_2(movca, void, i32, i32)
+DEF_HELPER_1(ocbi, void, i32)
+DEF_HELPER_0(delayed_movca, void)
+
 #include "def-helper.h"
Index: trunk/target-sh4/cpu.h
===================================================================
--- trunk/target-sh4/cpu.h      (revision 6133)
+++ trunk/target-sh4/cpu.h      (working copy)
@@ -70,6 +70,7 @@
  * It is unclear if it is permitted to modify the SR_T flag in a delay slot.
  * The use of DELAY_SLOT_TRUE flag makes us accept such SR_T modification.
  */
+#define MOVCA_DELAYED          (1 << 4)  /* used in translation context */
 
 /* XXXXX The structure could be made more compact */
 typedef struct tlb_t {
@@ -91,6 +92,13 @@
 #define UTLB_SIZE 64
 #define ITLB_SIZE 4
 
+typedef struct delayed_movca_t {
+    uint32_t valid;
+    uint32_t value;
+    uint32_t addr;
+    struct delayed_movca_t * next;
+} delayed_movca_t;
+
 #define NB_MMU_MODES 2
 
 enum sh_features {
@@ -143,6 +151,7 @@
     tlb_t itlb[ITLB_SIZE];     /* instruction translation table */
     void *intc_handle;
     int intr_at_halt;          /* SR_BL ignored during sleep */
+    delayed_movca_t movca_list;
 } CPUSH4State;
 
 CPUSH4State *cpu_sh4_init(const char *cpu_model);
Index: trunk/target-sh4/op_helper.c
===================================================================
--- trunk/target-sh4/op_helper.c        (revision 6133)
+++ trunk/target-sh4/op_helper.c        (working copy)
@@ -20,6 +20,7 @@
 #include <assert.h>
 #include "exec.h"
 #include "helper.h"
+void *qemu_mallocz(size_t size);
 
 #ifndef CONFIG_USER_ONLY
 
@@ -604,3 +605,57 @@
     d.ll = t0;
     return float64_to_int32_round_to_zero(d.d, &env->fp_status);
 }
+
+void helper_movca(uint32_t val, uint32_t addr)
+{
+    delayed_movca_t *cur = &env->movca_list;
+    delayed_movca_t *prev = NULL;
+    while (cur) {
+       if (!cur->valid) {
+           cur->valid = 1;
+           cur->value = val;
+           cur->addr = addr;
+           return;
+       }
+       prev = cur;
+       cur = cur->next;
+    }
+
+    /* movca entry shortage. allocate it. */
+    prev->next = cur = qemu_mallocz(sizeof(delayed_movca_t));
+    if (cur == NULL) {
+       printf("out of memory for delayed movca. @%08x\n", addr);
+       return;
+    }
+    cur->valid = 1;
+    cur->value = val;
+    cur->addr = addr;
+}
+
+void helper_ocbi(uint32_t addr)
+{
+    delayed_movca_t *cur = &env->movca_list;
+
+    while (cur) {
+       if (cur->valid && cur->addr == addr) { /* found! */
+           cur->valid = 0;
+           return;
+       }
+       cur = cur->next;
+    }
+
+    printf("invalid ocbi for address @%08x  pc=%08x\n", addr, env->pc);
+}
+
+void helper_delayed_movca(void)
+{
+    delayed_movca_t *cur = &env->movca_list;
+
+    /* Execute delayed movca tasks. */
+    while (cur) {
+       if (cur->valid)
+           __stl_mmu(cur->addr, cur->value, (env->sr & SR_MD) ? 1 : 0);
+       cur->valid = 0;
+       cur = cur->next;
+    }
+}
Index: trunk/target-sh4/translate.c
===================================================================
--- trunk/target-sh4/translate.c        (revision 6133)
+++ trunk/target-sh4/translate.c        (working copy)
@@ -1533,7 +1533,17 @@
        }
        return;
     case 0x00c3:               /* movca.l R0,@Rm */
-       tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
+       /*
+        * movca.l is used in two ways.
+        *  [1] inhibit memory fetch for faster block transfer.
+        *  [2] flush cache line, being used with ocbi.
+        * For case [1], movca.l work is same as mov.l.
+        * For case [2], movca.l work is same as nop.
+        * Then we need to delay the execution like mov.l to provide
+        * the chance for ocbi to cancel it.
+        */
+       ctx->flags |= MOVCA_DELAYED;
+       gen_helper_movca(REG(0), REG(B11_8));
        return;
     case 0x40a9:
        /* MOVUA.L @Rm,R0 (Rm) -> R0
@@ -1550,11 +1560,7 @@
        tcg_gen_andi_i32(REG(B11_8), cpu_sr, SR_T);
        return;
     case 0x0093:               /* ocbi @Rn */
-       {
-           TCGv dummy = tcg_temp_new();
-           tcg_gen_qemu_ld32s(dummy, REG(B11_8), ctx->memidx);
-           tcg_temp_free(dummy);
-       }
+       gen_helper_ocbi(REG(B11_8));
        return;
     case 0x00a3:               /* ocbp @Rn */
        {
@@ -1781,8 +1787,29 @@
 {
     uint32_t old_flags = ctx->flags;
 
+    /* pre generation */
+    if (ctx->flags & MOVCA_DELAYED) {
+       switch (ctx->opcode & 0xf0ff) {
+       case 0x00c3: /* movca.l */
+       case 0x0093: /* ocbi */
+           /*
+            * Do not generate delayed movca for these instrunctions to
+            * provide ocbi to cancel the execution of delayed movca.l.
+            * This implementation assumes that movca.l and ocbi executed
+            * consequently to flush cache.
+            */
+           break;
+       default:
+           /* generate delayed movca. */
+           ctx->flags &= ~MOVCA_DELAYED;
+           gen_helper_delayed_movca();
+           break;
+       }
+    }
+
     _decode_opc(ctx);
 
+    /* post generation */
     if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
         if (ctx->flags & DELAY_SLOT_CLEARME) {
             gen_store_flags(0);




reply via email to

[Prev in Thread] Current Thread [Next in Thread]