commit 1902e15049a9179ced0c924c5ce7c43f69e74001
Author: Vladimir Prus
Date: Tue Nov 11 12:29:02 2008 +0300
Fix movcal.l/ocbi emulation.
* target-sh4/cpu.h (memory_content_t): New.
(CPUSH4State): New fields movcal_backup and movcal_backup_tail.
* target-sh4/helper.h (helper_movcal)
(helper_discard_movcal_backup, helper_ocbi): New.
* target-sh4/op_helper.c (helper_movcal)
(helper_discard_movcal_backup, helper_ocbi): New.
* target-sh4/translate.c (DisasContext): New field has_movcal.
(sh4_defs): Update CVS for SH7785.
(cpu_sh4_init): Initialize env->movcal_backup_tail.
(_decode_opc): Discard movca.l-backup.
Make use of helper_movcal and helper_ocbi.
(gen_intermediate_code_internal): Initialize has_movcal to 1.
diff --git a/cpu-exec.c b/cpu-exec.c
index cf7c1fb..b0f0b5e 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -174,7 +174,7 @@ static inline TranslationBlock *tb_find_fast(void)
/* we record a subset of the CPU state. It will
always be the same before a given translated block
is executed. */
- cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
tb->flags != flags)) {
diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
index e9eee2c..67741f6 100644
--- a/target-sh4/cpu.h
+++ b/target-sh4/cpu.h
@@ -100,6 +100,12 @@ enum sh_features {
SH_FEATURE_BCR3_AND_BCR4 = 2,
};
+typedef struct memory_content_t {
+ uint32_t address;
+ uint32_t value;
+ struct memory_content_t *next;
+} memory_content_t;
+
typedef struct CPUSH4State {
int id; /* CPU model */
@@ -149,6 +155,8 @@ typedef struct CPUSH4State {
tlb_t itlb[ITLB_SIZE]; /* instruction translation table */
void *intc_handle;
int intr_at_halt; /* SR_BL ignored during sleep */
+ memory_content_t *movcal_backup;
+ memory_content_t **movcal_backup_tail;
} CPUSH4State;
CPUSH4State *cpu_sh4_init(const char *cpu_model);
@@ -294,16 +302,19 @@ static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
env->flags = tb->flags;
}
+#define TB_FLAG_PENDING_MOVCA (1 << 4)
+
static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
target_ulong *cs_base, int *flags)
{
*pc = env->pc;
*cs_base = 0;
*flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL
- | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */
- | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */
+ | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */
+ | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */
| (env->sr & (SR_MD | SR_RB)) /* Bits 29-30 */
- | (env->sr & SR_FD); /* Bit 15 */
+ | (env->sr & SR_FD) /* Bit 15 */
+ | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */
}
#endif /* _CPU_SH4_H */
diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index e665185..4b2fcdd 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -9,6 +9,10 @@ DEF_HELPER_0(debug, void)
DEF_HELPER_1(sleep, void, i32)
DEF_HELPER_1(trapa, void, i32)
+DEF_HELPER_2(movcal, void, i32, i32)
+DEF_HELPER_0(discard_movcal_backup, void)
+DEF_HELPER_1(ocbi, void, i32)
+
DEF_HELPER_2(addv, i32, i32, i32)
DEF_HELPER_2(addc, i32, i32, i32)
DEF_HELPER_2(subv, i32, i32, i32)
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 84e1ad3..e6b71a0 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -18,6 +18,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
*/
#include
+#include
#include "exec.h"
#include "helper.h"
@@ -122,6 +123,55 @@ void helper_trapa(uint32_t tra)
cpu_loop_exit();
}
+void helper_movcal(uint32_t address, uint32_t value)
+{
+ memory_content_t *r = (memory_content_t *)malloc (sizeof(memory_content_t));
+ r->address = address;
+ r->value = value;
+ r->next = NULL;
+
+ *(env->movcal_backup_tail) = r;
+ env->movcal_backup_tail = &(r->next);
+}
+
+void helper_discard_movcal_backup(void)
+{
+ memory_content_t *current = env->movcal_backup;
+
+ while(current)
+ {
+ memory_content_t *next = current->next;
+ free (current);
+ env->movcal_backup = current = next;
+ if (current == 0)
+ env->movcal_backup_tail = &(env->movcal_backup);
+ }
+}
+
+void helper_ocbi(uint32_t address)
+{
+ memory_content_t **current = &(env->movcal_backup);
+ while (*current)
+ {
+ uint32_t a = (*current)->address;
+ if ((a & ~0x1F) == (address & ~0x1F))
+ {
+ memory_content_t *next = (*current)->next;
+
+ stl_data(a, (*current)->value);
+
+ if (next == 0)
+ {
+ env->movcal_backup_tail = current;
+ }
+
+ free (*current);
+ *current = next;
+ break;
+ }
+ }
+}
+
uint32_t helper_addc(uint32_t arg0, uint32_t arg1)
{
uint32_t tmp0, tmp1;
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index cc9f886..4ced176 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -50,6 +50,7 @@ typedef struct DisasContext {
uint32_t delayed_pc;
int singlestep_enabled;
uint32_t features;
+ int has_movcal;
} DisasContext;
#if defined(CONFIG_USER_ONLY)
@@ -283,6 +284,7 @@ CPUSH4State *cpu_sh4_init(const char *cpu_model)
env = qemu_mallocz(sizeof(CPUSH4State));
env->features = def->features;
cpu_exec_init(env);
+ env->movcal_backup_tail = &(env->movcal_backup);
sh4_translate_init();
env->cpu_model_str = cpu_model;
cpu_sh4_reset(env);
@@ -495,6 +497,37 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
static void _decode_opc(DisasContext * ctx)
{
+ /* This code tries to make movcal emulation sufficiently
+ accurate for Linux purposes. This instruction writes
+ memory, and prior to that, always allocates a cache line.
+ It is used in two contexts:
+ - in memcpy, where data is copied in blocks, the first write
+ of to a block uses movca.l for performance.
+ - in arch/sh/mm/cache-sh4.c, movcal.l + ocbi combination is used
+ to flush the cache. Here, the data written by movcal.l is never
+ written to memory, and the data written is just bogus.
+
+ To simulate this, we simulate movcal.l, we store the value to memory,
+ but we also remember the previous content. If we see ocbi, we check
+ if movcal.l for that address was done previously. If so, the write should
+ not have hit the memory, so we restore the previous content.
+ When we see an instruction that is neither movca.l
+ nor ocbi, the previous content is discarded.
+
+ To optimize, we only try to flush stores when we're at the start of
+ TB, or if we already saw movca.l in this TB and did not flush stores
+ yet. */
+ if (ctx->has_movcal)
+ {
+ int opcode = ctx->opcode & 0xf0ff;
+ if (opcode != 0x0093 /* ocbi */
+ && opcode != 0x00c3 /* movca.l */)
+ {
+ gen_helper_discard_movcal_backup ();
+ ctx->has_movcal = 0;
+ }
+ }
+
#if 0
fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode);
#endif
@@ -1545,7 +1578,13 @@ static void _decode_opc(DisasContext * ctx)
}
return;
case 0x00c3: /* movca.l R0,@Rm */
- tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
+ {
+ TCGv val = tcg_temp_new();
+ tcg_gen_qemu_ld32u(val, REG(B11_8), ctx->memidx);
+ gen_helper_movcal (REG(B11_8), val);
+ tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
+ }
+ ctx->has_movcal = 1;
return;
case 0x40a9:
/* MOVUA.L @Rm,R0 (Rm) -> R0
@@ -1594,9 +1633,7 @@ static void _decode_opc(DisasContext * ctx)
break;
case 0x0093: /* ocbi @Rn */
{
- TCGv dummy = tcg_temp_new();
- tcg_gen_qemu_ld32s(dummy, REG(B11_8), ctx->memidx);
- tcg_temp_free(dummy);
+ gen_helper_ocbi (REG(B11_8));
}
return;
case 0x00a3: /* ocbp @Rn */
@@ -1876,6 +1913,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb,
ctx.tb = tb;
ctx.singlestep_enabled = env->singlestep_enabled;
ctx.features = env->features;
+ ctx.has_movcal = (tb->flags & TB_FLAG_PENDING_MOVCA);
#ifdef DEBUG_DISAS
qemu_log_mask(CPU_LOG_TB_CPU,