[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v5 1/2] target/mips: Optimize ILVOD.<B|H|W|D> MSA in
From: |
Mateja Marjanovic |
Subject: |
[Qemu-devel] [PATCH v5 1/2] target/mips: Optimize ILVOD.<B|H|W|D> MSA instructions |
Date: |
Tue, 2 Apr 2019 17:15:20 +0200 |
From: Mateja Marjanovic <address@hidden>
Optimize set of MSA instructions ILVOD, using directly
tcg registers and performing logic on them instead of
using helpers.
instr || before || after
======================================
ilvod.b || 117.50 ms || 24.99 ms
ilvod.h || 93.16 ms || 24.27 ms
ilvod.w || 119.90 ms || 24.81 ms
ilvod.d || 43.01 ms || 20.69 ms
Signed-off-by: Mateja Marjanovic <address@hidden>
---
target/mips/helper.h | 1 -
target/mips/msa_helper.c | 7 ----
target/mips/translate.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 106 insertions(+), 9 deletions(-)
diff --git a/target/mips/helper.h b/target/mips/helper.h
index 2863f60..02e16c7 100644
--- a/target/mips/helper.h
+++ b/target/mips/helper.h
@@ -865,7 +865,6 @@ DEF_HELPER_5(msa_pckod_df, void, env, i32, i32, i32, i32)
DEF_HELPER_5(msa_ilvl_df, void, env, i32, i32, i32, i32)
DEF_HELPER_5(msa_ilvr_df, void, env, i32, i32, i32, i32)
DEF_HELPER_5(msa_ilvev_df, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(msa_ilvod_df, void, env, i32, i32, i32, i32)
DEF_HELPER_5(msa_vshf_df, void, env, i32, i32, i32, i32)
DEF_HELPER_5(msa_srar_df, void, env, i32, i32, i32, i32)
DEF_HELPER_5(msa_srlr_df, void, env, i32, i32, i32, i32)
diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c
index 6c57281..a7ea6aa 100644
--- a/target/mips/msa_helper.c
+++ b/target/mips/msa_helper.c
@@ -1206,13 +1206,6 @@ MSA_FN_DF(ilvr_df)
MSA_FN_DF(ilvev_df)
#undef MSA_DO
-#define MSA_DO(DF) \
- do { \
- pwx->DF[2*i] = pwt->DF[2*i+1]; \
- pwx->DF[2*i+1] = pws->DF[2*i+1]; \
- } while (0)
-MSA_FN_DF(ilvod_df)
-#undef MSA_DO
#undef MSA_LOOP_COND
#define MSA_LOOP_COND(DF) \
diff --git a/target/mips/translate.c b/target/mips/translate.c
index bba8b6c..04406d6 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -28884,6 +28884,96 @@ static void gen_msa_bit(CPUMIPSState *env,
DisasContext *ctx)
tcg_temp_free_i32(tws);
}
+/*
+ * [MSA] ILVOD.B wd, ws, wt
+ *
+ * Vector Interleave Odd (byte data elements)
+ *
+ */
+static inline void gen_ilvod_b(CPUMIPSState *env, uint32_t wd,
+ uint32_t ws, uint32_t wt)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ const uint64_t mask = 0xff00ff00ff00ff00ULL;
+
+ tcg_gen_andi_i64(t1, msa_wr_d[wt * 2], mask);
+ tcg_gen_shri_i64(t1, t1, 8);
+ tcg_gen_andi_i64(t2, msa_wr_d[ws * 2], mask);
+ tcg_gen_or_i64(msa_wr_d[wd * 2], t1, t2);
+
+ tcg_gen_andi_i64(t1, msa_wr_d[wt * 2 + 1], mask);
+ tcg_gen_shri_i64(t1, t1, 8);
+ tcg_gen_andi_i64(t2, msa_wr_d[ws * 2 + 1], mask);
+ tcg_gen_or_i64(msa_wr_d[wd * 2 + 1], t1, t2);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+}
+
+/*
+ * [MSA] ILVOD.H wd, ws, wt
+ *
+ * Vector Interleave Odd (halfword data elements)
+ *
+ */
+static inline void gen_ilvod_h(CPUMIPSState *env, uint32_t wd,
+ uint32_t ws, uint32_t wt)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ const uint64_t mask = 0xffff0000ffff0000ULL;
+
+ tcg_gen_andi_i64(t1, msa_wr_d[wt * 2], mask);
+ tcg_gen_shri_i64(t1, t1, 16);
+ tcg_gen_andi_i64(t2, msa_wr_d[ws * 2], mask);
+ tcg_gen_or_i64(msa_wr_d[wd * 2], t1, t2);
+
+ tcg_gen_andi_i64(t1, msa_wr_d[wt * 2 + 1], mask);
+ tcg_gen_shri_i64(t1, t1, 16);
+ tcg_gen_andi_i64(t2, msa_wr_d[ws * 2 + 1], mask);
+ tcg_gen_or_i64(msa_wr_d[wd * 2 + 1], t1, t2);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+}
+
+/*
+ * [MSA] ILVOD.W wd, ws, wt
+ *
+ * Vector Interleave Odd (word data elements)
+ *
+ */
+static inline void gen_ilvod_w(CPUMIPSState *env, uint32_t wd,
+ uint32_t ws, uint32_t wt)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ const uint64_t mask = 0xffffffff00000000ULL;
+
+ tcg_gen_andi_i64(t1, msa_wr_d[wt * 2], mask);
+ tcg_gen_shri_i64(t1, t1, 32);
+ tcg_gen_deposit_i64(msa_wr_d[wd * 2], msa_wr_d[ws * 2], t1, 0, 32);
+
+ tcg_gen_andi_i64(t1, msa_wr_d[wt * 2 + 1], mask);
+ tcg_gen_shri_i64(t1, t1, 32);
+ tcg_gen_deposit_i64(msa_wr_d[wd * 2 + 1], msa_wr_d[ws * 2 + 1], t1, 0, 32);
+
+ tcg_temp_free_i64(t1);
+}
+
+/*
+ * [MSA] ILVOD.D wd, ws, wt
+ *
+ * Vector Interleave Odd (double data elements)
+ *
+ */
+static inline void gen_ilvod_d(CPUMIPSState *env, uint32_t wd,
+ uint32_t ws, uint32_t wt)
+{
+ tcg_gen_mov_i64(msa_wr_d[wd * 2], msa_wr_d[wt * 2 + 1]);
+ tcg_gen_mov_i64(msa_wr_d[wd * 2 + 1], msa_wr_d[ws * 2 + 1]);
+}
+
static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx)
{
#define MASK_MSA_3R(op) (MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
@@ -29055,7 +29145,22 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext
*ctx)
gen_helper_msa_mod_u_df(cpu_env, tdf, twd, tws, twt);
break;
case OPC_ILVOD_df:
- gen_helper_msa_ilvod_df(cpu_env, tdf, twd, tws, twt);
+ switch (df) {
+ case DF_BYTE:
+ gen_ilvod_b(env, wd, ws, wt);
+ break;
+ case DF_HALF:
+ gen_ilvod_h(env, wd, ws, wt);
+ break;
+ case DF_WORD:
+ gen_ilvod_w(env, wd, ws, wt);
+ break;
+ case DF_DOUBLE:
+ gen_ilvod_d(env, wd, ws, wt);
+ break;
+ default:
+ assert(0);
+ }
break;
case OPC_DOTP_S_df:
--
2.7.4