qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 13/20] target-mips: add MSA 3R format instructions


From: Yongbok Kim
Subject: [Qemu-devel] [PATCH 13/20] target-mips: add MSA 3R format instructions
Date: Mon, 14 Jul 2014 10:55:56 +0100

add MSA 3R format instructions

Signed-off-by: Yongbok Kim <address@hidden>
---
 target-mips/helper.h     |   63 ++
 target-mips/msa_helper.c | 2193 +++++++++++++++++++++++++++++++++++++++++-----
 target-mips/translate.c  |  300 +++++++
 3 files changed, 2329 insertions(+), 227 deletions(-)

diff --git a/target-mips/helper.h b/target-mips/helper.h
index f9406d6..00705c4 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -692,35 +692,98 @@ DEF_HELPER_FLAGS_2(rddsp, 0, tl, tl, env)
 
 /* MIPS SIMD Architecture */
 
+DEF_HELPER_5(msa_add_a_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_adds_a_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_adds_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_adds_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_addv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_addvi_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_4(msa_andi_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_asub_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_asub_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ave_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ave_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_aver_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_aver_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_bclr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_bclri_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_binsl_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_binsli_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_binsr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_binsri_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_bmnzi_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_bmzi_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_bneg_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_bnegi_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_bseli_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_bset_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_bseti_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ceq_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_ceqi_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_cle_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_cle_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_clei_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_clei_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_clt_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_clt_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_clti_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_clti_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_div_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_div_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dotp_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dotp_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dpadd_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dpadd_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dpsub_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dpsub_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_hadd_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_hadd_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_hsub_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_hsub_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ilvev_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ilvl_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ilvod_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ilvr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_ldi_df, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_maddv_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_max_a_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_max_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_max_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_maxi_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_maxi_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_min_a_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_min_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_min_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_mini_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_mini_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_mod_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_mod_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_msubv_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_mulv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_nori_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ori_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_pckev_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_pckod_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_sat_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_sat_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_shf_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_sld_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_sll_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_slli_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_splat_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_sra_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srai_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_srar_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srari_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_srl_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srli_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_srlr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srlri_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_subs_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_subs_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_subsus_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_subsuu_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_subv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_subvi_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_vshf_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_xori_b, void, env, i32, i32, i32)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index 39377d6..bb4ea65 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -195,6 +195,48 @@ static inline void msa_store_wr_elem(CPUMIPSState *env, 
uint64_t val,
     }
 }
 
+static inline int64_t msa_add_a_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
+    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
+    return abs_arg1 + abs_arg2;
+}
+
+void helper_msa_add_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_add_a_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_addv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = (int64_t) ts + tt;
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_addvi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, int64_t u5)
 {
@@ -211,6 +253,23 @@ void helper_msa_addvi_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
+void helper_msa_subv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = (int64_t) ts - tt;
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_subvi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, int64_t u5)
 {
@@ -227,75 +286,84 @@ void helper_msa_subvi_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
-void helper_msa_andi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
-        uint32_t i8)
+static inline int64_t msa_adds_a_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    void *pws = &(env->active_fpu.fpr[ws]);
-    ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        B(pwd, i) = B(pws, i) & i8;
-    } DONE_ALL_ELEMENTS;
-    if (env->active_msa.msair & MSAIR_WRP_BIT) {
-        env->active_msa.msamodify |= (1 << wd);
+    uint64_t max_int = (uint64_t)DF_MAX_INT(df);
+    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
+    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
+    if (abs_arg1 > max_int || abs_arg2 > max_int) {
+        return (int64_t)max_int;
+    } else {
+        return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
     }
 }
 
-void helper_msa_ori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
-        uint32_t i8)
+void helper_msa_adds_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    void *pws = &(env->active_fpu.fpr[ws]);
-    ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        B(pwd, i) = B(pws, i) | i8;
-    } DONE_ALL_ELEMENTS;
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_adds_a_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-void helper_msa_nori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
-        uint32_t i8)
+static inline int64_t msa_adds_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    void *pws = &(env->active_fpu.fpr[ws]);
-    ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        B(pwd, i) = ~(B(pws, i) | i8);
-    } DONE_ALL_ELEMENTS;
-    if (env->active_msa.msair & MSAIR_WRP_BIT) {
-        env->active_msa.msamodify |= (1 << wd);
+    int64_t max_int = DF_MAX_INT(df);
+    int64_t min_int = DF_MIN_INT(df);
+    if (arg1 < 0) {
+        return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
+    } else {
+        return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
     }
 }
 
-void helper_msa_xori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
-        uint32_t i8)
+void helper_msa_adds_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    void *pws = &(env->active_fpu.fpr[ws]);
-    ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        B(pwd, i) = B(pws, i) ^ i8;
-    } DONE_ALL_ELEMENTS;
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_adds_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-static inline int64_t msa_bclr_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
-        int64_t arg2)
+static inline uint64_t msa_adds_u_df(CPUMIPSState *env, uint32_t df,
+        uint64_t arg1, uint64_t arg2)
 {
-    int32_t b_arg2 = BIT_POSITION(arg2, df);
-
-    return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
+    uint64_t max_uint = DF_MAX_UINT(df);
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
 }
 
-void helper_msa_bclri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t m)
+void helper_msa_adds_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
-        ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_bclr_df(env, df, ts, m);
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_adds_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -303,22 +371,28 @@ void helper_msa_bclri_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
-static inline int64_t msa_bneg_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
-        int64_t arg2)
+static inline int64_t msa_subs_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
-    int32_t b_arg2 = BIT_POSITION(arg2, df);
-    return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
+    int64_t max_int = DF_MAX_INT(df);
+    int64_t min_int = DF_MIN_INT(df);
+    if (arg2 > 0) {
+        return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
+    } else {
+        return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
+    }
 }
 
-void helper_msa_bnegi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t m)
+void helper_msa_subs_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_bneg_df(env, df, ts, m);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_subs_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -326,22 +400,24 @@ void helper_msa_bnegi_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
-static inline int64_t msa_bset_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
-        int64_t arg2)
+static inline int64_t msa_subs_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
-    int32_t b_arg2 = BIT_POSITION(arg2, df);
-    return UNSIGNED(arg1 | (1LL << b_arg2), df);
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
 }
 
-void helper_msa_bseti_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t m)
+void helper_msa_subs_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
-        ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_bset_df(env, df, ts, m);
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_subs_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -349,31 +425,34 @@ void helper_msa_bseti_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
-static inline int64_t msa_binsl_df(CPUMIPSState *env, uint32_t df,
-        int64_t dest, int64_t arg1, int64_t arg2)
+static inline int64_t msa_subsuu_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
-    uint64_t u_dest = UNSIGNED(dest, df);
-    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
-    int32_t sh_a = DF_BITS(df) - sh_d;
-    if (sh_d == DF_BITS(df)) {
-        return u_arg1;
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    int64_t max_int = DF_MAX_INT(df);
+    int64_t min_int = DF_MIN_INT(df);
+    if (u_arg1 > u_arg2) {
+        return u_arg1 - u_arg2 < (uint64_t)max_int ?
+            (int64_t)(u_arg1 - u_arg2) :
+            max_int;
     } else {
-        return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
-               UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
+        return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
+            (int64_t)(u_arg1 - u_arg2) :
+            min_int;
     }
 }
 
-void helper_msa_binsli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t m)
+void helper_msa_subsuu_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_load_wr_elem_s64(env, wd, df, i);
-        td = msa_binsl_df(env, df, td, ts, m);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_subsuu_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -381,31 +460,34 @@ void helper_msa_binsli_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
-static inline int64_t msa_binsr_df(CPUMIPSState *env, uint32_t df,
-        int64_t dest, int64_t arg1, int64_t arg2)
+static inline int64_t msa_subsus_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
-    uint64_t u_dest = UNSIGNED(dest, df);
-    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
-    int32_t sh_a = DF_BITS(df) - sh_d;
-    if (sh_d == DF_BITS(df)) {
-        return u_arg1;
+    uint64_t max_uint = DF_MAX_UINT(df);
+    if (arg2 >= 0) {
+        uint64_t u_arg2 = (uint64_t)arg2;
+        return (u_arg1 > u_arg2) ?
+            (int64_t)(u_arg1 - u_arg2) :
+            0;
     } else {
-        return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
-               UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
+        uint64_t u_arg2 = (uint64_t)(-arg2);
+        return (u_arg1 < max_uint - u_arg2) ?
+            (int64_t)(u_arg1 + u_arg2) :
+            (int64_t)max_uint;
     }
 }
 
-void helper_msa_binsri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t m)
+void helper_msa_subsus_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_load_wr_elem_s64(env, wd, df, i);
-        td = msa_binsr_df(env, df, td, ts, m);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_subsus_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -413,69 +495,76 @@ void helper_msa_binsri_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
-#define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
-            dest = UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
-
-void helper_msa_bmnzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+void helper_msa_andi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
     void *pwd = &(env->active_fpu.fpr[wd]);
     void *pws = &(env->active_fpu.fpr[ws]);
     ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        BIT_MOVE_IF_NOT_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
+        B(pwd, i) = B(pws, i) & i8;
     } DONE_ALL_ELEMENTS;
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-#define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
-            dest = UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
-
-void helper_msa_bmzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+void helper_msa_ori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
     void *pwd = &(env->active_fpu.fpr[wd]);
     void *pws = &(env->active_fpu.fpr[ws]);
     ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        BIT_MOVE_IF_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
+        B(pwd, i) = B(pws, i) | i8;
     } DONE_ALL_ELEMENTS;
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-#define BIT_SELECT(dest, arg1, arg2, df) \
-            dest = UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
+void helper_msa_nori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        B(pwd, i) = ~(B(pws, i) | i8);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
 
-void helper_msa_bseli_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+void helper_msa_xori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
     void *pwd = &(env->active_fpu.fpr[wd]);
     void *pws = &(env->active_fpu.fpr[ws]);
     ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        BIT_SELECT(B(pwd, i), B(pws, i), i8, DF_BYTE);
+        B(pwd, i) = B(pws, i) ^ i8;
     } DONE_ALL_ELEMENTS;
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-static inline int64_t msa_ceq_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
-        int64_t arg2)
+static inline int64_t msa_asub_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
-    return arg1 == arg2 ? -1 : 0;
+    /* signed compare */
+    return (arg1 < arg2) ?
+        (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
 }
 
-void helper_msa_ceqi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t i5)
+void helper_msa_asub_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_ceq_df(env, df, ts, i5);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_asub_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -483,21 +572,50 @@ void helper_msa_ceqi_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
-static inline int64_t msa_cle_s_df(CPUMIPSState *env, uint32_t df,
+static inline uint64_t msa_asub_u_df(CPUMIPSState *env, uint32_t df,
+        uint64_t arg1, uint64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    /* unsigned compare */
+    return (u_arg1 < u_arg2) ?
+        (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
+}
+
+void helper_msa_asub_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_asub_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_ave_s_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
-    return arg1 <= arg2 ? -1 : 0;
+    /* signed shift */
+    return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
 }
 
-void helper_msa_clei_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t s5)
+void helper_msa_ave_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_cle_s_df(env, df, ts, s5);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_ave_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -505,23 +623,25 @@ void helper_msa_clei_s_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
-static inline int64_t msa_cle_u_df(CPUMIPSState *env, uint32_t df,
-        int64_t arg1, int64_t arg2)
+static inline uint64_t msa_ave_u_df(CPUMIPSState *env, uint32_t df,
+        uint64_t arg1, uint64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
     uint64_t u_arg2 = UNSIGNED(arg2, df);
-    return u_arg1 <= u_arg2 ? -1 : 0;
+    /* unsigned shift */
+    return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
 }
 
-void helper_msa_clei_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t u5)
+void helper_msa_ave_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    uint64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_i64(env, ws, df, i);
-        td = msa_cle_u_df(env, df, ts, u5);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_ave_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -529,21 +649,49 @@ void helper_msa_clei_u_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
-static inline int64_t msa_clt_s_df(CPUMIPSState *env, uint32_t df,
+static inline int64_t msa_aver_s_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
-    return arg1 < arg2 ? -1 : 0;
+    /* signed shift */
+    return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
 }
 
-void helper_msa_clti_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t s5)
+void helper_msa_aver_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_clt_s_df(env, df, ts, s5);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_aver_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline uint64_t msa_aver_u_df(CPUMIPSState *env, uint32_t df,
+        uint64_t arg1, uint64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    /* unsigned shift */
+    return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
+}
+
+void helper_msa_aver_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_aver_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -551,85 +699,1520 @@ void helper_msa_clti_s_df(CPUMIPSState *env, uint32_t 
df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_clt_u_df(CPUMIPSState *env, uint32_t df,
-        int64_t arg1, int64_t arg2)
-{
-    uint64_t u_arg1 = UNSIGNED(arg1, df);
-    uint64_t u_arg2 = UNSIGNED(arg2, df);
-    return u_arg1 < u_arg2 ? -1 : 0;
-}
-
-void helper_msa_clti_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t u5)
+static inline int64_t msa_bclr_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
+}
+
+void helper_msa_bclr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_bclr_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_bclri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_bclr_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_bneg_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
+}
+
+void helper_msa_bneg_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_bneg_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_bnegi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_bneg_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_bset_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return UNSIGNED(arg1 | (1LL << b_arg2), df);
+}
+
+void helper_msa_bset_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_bset_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_bseti_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_bset_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_binsl_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_dest = UNSIGNED(dest, df);
+    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
+    int32_t sh_a = DF_BITS(df) - sh_d;
+    if (sh_d == DF_BITS(df)) {
+        return u_arg1;
+    } else {
+        return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
+               UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
+    }
+}
+
+void helper_msa_binsl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_binsl_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_binsli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_binsl_df(env, df, td, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_binsr_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_dest = UNSIGNED(dest, df);
+    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
+    int32_t sh_a = DF_BITS(df) - sh_d;
+    if (sh_d == DF_BITS(df)) {
+        return u_arg1;
+    } else {
+        return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
+               UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
+    }
+}
+
+void helper_msa_binsr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_binsr_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_binsri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_binsr_df(env, df, td, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
+            dest = UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
+
+void helper_msa_bmnzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        BIT_MOVE_IF_NOT_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
+            dest = UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
+
+void helper_msa_bmzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        BIT_MOVE_IF_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define BIT_SELECT(dest, arg1, arg2, df) \
+            dest = UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
+
+void helper_msa_bseli_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        BIT_SELECT(B(pwd, i), B(pws, i), i8, DF_BYTE);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_ceq_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    return arg1 == arg2 ? -1 : 0;
+}
+
+void helper_msa_ceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_ceq_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_ceqi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t i5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_ceq_df(env, df, ts, i5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_cle_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 <= arg2 ? -1 : 0;
+}
+
+void helper_msa_cle_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_cle_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_clei_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_cle_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_cle_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 <= u_arg2 ? -1 : 0;
+}
+
+void helper_msa_cle_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_cle_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_clei_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        td = msa_cle_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_clt_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 < arg2 ? -1 : 0;
+}
+
+void helper_msa_clt_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_clt_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_clti_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_clt_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_clt_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 < u_arg2 ? -1 : 0;
+}
+
+void helper_msa_clt_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_clt_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_clti_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_clt_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define SIGNED_EVEN(a, df) \
+        ((((int64_t)(a)) << (64 - DF_BITS(df)/2)) >> (64 - DF_BITS(df)/2))
+#define UNSIGNED_EVEN(a, df) \
+        ((((uint64_t)(a)) << (64 - DF_BITS(df)/2)) >> (64 - DF_BITS(df)/2))
+
+#define SIGNED_ODD(a, df) \
+        ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)/2))
+#define UNSIGNED_ODD(a, df) \
+        ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)/2))
+
+#define SIGNED_EXTRACT(e, o, a, df)             \
+    int64_t e = SIGNED_EVEN(a, df);             \
+    int64_t o = SIGNED_ODD(a, df);
+
+#define UNSIGNED_EXTRACT(e, o, a, df)           \
+    int64_t e = UNSIGNED_EVEN(a, df);           \
+    int64_t o = UNSIGNED_ODD(a, df);
+
+static inline int64_t msa_hadd_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
+}
+
+void helper_msa_hadd_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_hadd_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_hadd_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
+}
+
+void helper_msa_hadd_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_hadd_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_hsub_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
+}
+
+void helper_msa_hsub_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_hsub_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_hsub_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
+}
+
+void helper_msa_hsub_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_hsub_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dotp_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dotp_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_dotp_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dotp_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dotp_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_dotp_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dpadd_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dpadd_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_dpadd_s_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dpadd_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dpadd_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_dpadd_u_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dpsub_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
+}
+
+void helper_msa_dpsub_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_dpsub_s_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dpsub_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
+}
+
+void helper_msa_dpsub_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_dpsub_u_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_ilvev_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            B(pwx, 2*i)   = B(pwt, 2*i);
+            B(pwx, 2*i+1) = B(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            H(pwx, 2*i)   = H(pwt, 2*i);
+            H(pwx, 2*i+1) = H(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            W(pwx, 2*i)   = W(pwt, 2*i);
+            W(pwx, 2*i+1) = W(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            D(pwx, 2*i)   = D(pwt, 2*i);
+            D(pwx, 2*i+1) = D(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ilvev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_ilvev_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_ilvod_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            B(pwx, 2*i)   = B(pwt, 2*i+1);
+            B(pwx, 2*i+1) = B(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            H(pwx, 2*i)   = H(pwt, 2*i+1);
+            H(pwx, 2*i+1) = H(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            W(pwx, 2*i)   = W(pwt, 2*i+1);
+            W(pwx, 2*i+1) = W(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            D(pwx, 2*i)   = D(pwt, 2*i+1);
+            D(pwx, 2*i+1) = D(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ilvod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_ilvod_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_ilvl_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            B(pwx, 2*i)   = BL(pwt, i);
+            B(pwx, 2*i+1) = BL(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            H(pwx, 2*i)   = HL(pwt, i);
+            H(pwx, 2*i+1) = HL(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            W(pwx, 2*i)   = WL(pwt, i);
+            W(pwx, 2*i+1) = WL(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            D(pwx, 2*i)   = DL(pwt, i);
+            D(pwx, 2*i+1) = DL(pws, i);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ilvl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_ilvl_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_ilvr_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            B(pwx, 2*i)   = BR(pwt, i);
+            B(pwx, 2*i+1) = BR(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            H(pwx, 2*i)   = HR(pwt, i);
+            H(pwx, 2*i+1) = HR(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            W(pwx, 2*i)   = WR(pwt, i);
+            W(pwx, 2*i+1) = WR(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            D(pwx, 2*i)   = DR(pwt, i);
+            D(pwx, 2*i+1) = DR(pws, i);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ilvr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_ilvr_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_pckev_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            BR(pwx, i) = B(pwt, 2*i);
+            BL(pwx, i) = B(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            HR(pwx, i) = H(pwt, 2*i);
+            HL(pwx, i) = H(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            WR(pwx, i) = W(pwt, 2*i);
+            WL(pwx, i) = W(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            DR(pwx, i) = D(pwt, 2*i);
+            DL(pwx, i) = D(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_pckev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_pckev_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_pckod_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            BR(pwx, i) = B(pwt, 2*i+1);
+            BL(pwx, i) = B(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            HR(pwx, i) = H(pwt, 2*i+1);
+            HL(pwx, i) = H(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            WR(pwx, i) = W(pwt, 2*i+1);
+            WL(pwx, i) = W(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            DR(pwx, i) = D(pwt, 2*i+1);
+            DL(pwx, i) = D(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_pckod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_pckod_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_vshf_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    uint32_t n = MSA_WRLEN / DF_BITS(df);
+    uint32_t k;
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_B_ELEMENTS(i, MSA_WRLEN) {
+            k = (B(pwd, i) & 0x3f) % (2 * n);
+            B(pwx, i) =
+                (B(pwd, i) & 0xc0) ? 0 : k < n ? B(pwt, k) : B(pws, k - n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            k = (H(pwd, i) & 0x3f) % (2 * n);
+            H(pwx, i) =
+                (H(pwd, i) & 0xc0) ? 0 : k < n ? H(pwt, k) : H(pws, k - n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            k = (W(pwd, i) & 0x3f) % (2 * n);
+            W(pwx, i) =
+                (W(pwd, i) & 0xc0) ? 0 : k < n ? W(pwt, k) : W(pws, k - n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            k = (D(pwd, i) & 0x3f) % (2 * n);
+            D(pwx, i) =
+                (D(pwd, i) & 0xc0) ? 0 : k < n ? D(pwt, k) : D(pws, k - n);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_vshf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_vshf_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+
+#define SHF_POS(i, imm) ((i & 0xfc) + ((imm >> (2 * (i & 0x03))) & 0x03))
+
+static inline void msa_shf_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, uint32_t imm)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+      ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        B(pwx, i) = B(pws, SHF_POS(i, imm));
+      } DONE_ALL_ELEMENTS;
+      break;
+    case DF_HALF:
+      ALL_H_ELEMENTS(i, MSA_WRLEN) {
+        H(pwx, i) = H(pws, SHF_POS(i, imm));
+      } DONE_ALL_ELEMENTS;
+      break;
+    case DF_WORD:
+      ALL_W_ELEMENTS(i, MSA_WRLEN) {
+        W(pwx, i) = W(pws, SHF_POS(i, imm));
+      } DONE_ALL_ELEMENTS;
+      break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t imm)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    msa_shf_df(env, df, pwd, pws, imm);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_maddv_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    return dest + arg1 * arg2;
+}
+
+void helper_msa_maddv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_maddv_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_msubv_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    return dest - arg1 * arg2;
+}
+
+void helper_msa_msubv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_msubv_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_max_a_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
+    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
+    return abs_arg1 > abs_arg2 ? arg1 : arg2;
+}
+
+void helper_msa_max_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_max_a_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_max_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 > arg2 ? arg1 : arg2;
+}
+
+void helper_msa_max_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_max_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_maxi_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_max_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_max_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 > u_arg2 ? arg1 : arg2;
+}
+
+void helper_msa_max_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_max_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_maxi_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        td = msa_max_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_min_a_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
+    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
+    return abs_arg1 < abs_arg2 ? arg1 : arg2;
+}
+
+void helper_msa_min_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_min_a_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_min_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 < arg2 ? arg1 : arg2;
+}
+
+void helper_msa_min_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_min_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_mini_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_min_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_min_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 < u_arg2 ? arg1 : arg2;
+}
+
+void helper_msa_min_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_min_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_mini_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        td = msa_min_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_splat_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, target_ulong rt)
+{
+    uint32_t n = rt % DF_ELEMENTS(df, MSA_WRLEN);
+    msa_check_index(env, df, n);
+    switch (df) {
+    case DF_BYTE:
+        ALL_B_ELEMENTS(i, MSA_WRLEN) {
+            B(pwd, i)   = B(pws, n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            H(pwd, i)   = H(pws, n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            W(pwd, i)   = W(pws, n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            D(pwd, i)   = D(pws, n);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
+
+void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t rt)
 {
-    int64_t td, ts;
-    int i;
-    int df_bits = 8 * (1 << df);
-    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
-        ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_clt_u_df(env, df, ts, u5);
-        msa_store_wr_elem(env, td, wd, df, i);
-    }
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    msa_splat_df(env, df, pwd, pws, env->active_tc.gpr[rt]);
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-#define SHF_POS(i, imm) ((i & 0xfc) + ((imm >> (2 * (i & 0x03))) & 0x03))
-
-static inline void msa_shf_df(CPUMIPSState *env, uint32_t df, void *pwd,
-        void *pws, uint32_t imm)
+void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t s10)
 {
-    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    int64_t s64 = ((int64_t)s10 << 54) >> 54;
     switch (df) {
     case DF_BYTE:
-      ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        B(pwx, i) = B(pws, SHF_POS(i, imm));
-      } DONE_ALL_ELEMENTS;
-      break;
+        ALL_B_ELEMENTS(i, MSA_WRLEN) {
+            B(pwd, i)   = (int8_t)s10;
+        } DONE_ALL_ELEMENTS;
+        break;
     case DF_HALF:
-      ALL_H_ELEMENTS(i, MSA_WRLEN) {
-        H(pwx, i) = H(pws, SHF_POS(i, imm));
-      } DONE_ALL_ELEMENTS;
-      break;
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            H(pwd, i)   = (int16_t)s64;
+        } DONE_ALL_ELEMENTS;
+        break;
     case DF_WORD:
-      ALL_W_ELEMENTS(i, MSA_WRLEN) {
-        W(pwx, i) = W(pws, SHF_POS(i, imm));
-      } DONE_ALL_ELEMENTS;
-      break;
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            W(pwd, i)   = (int32_t)s64;
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            D(pwd, i)   = s64;
+        } DONE_ALL_ELEMENTS;
+       break;
     default:
         /* shouldn't get here */
         assert(0);
     }
-    msa_move_v(pwd, pwx);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
 }
 
-void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t imm)
+void helper_msa_mulv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    void *pws = &(env->active_fpu.fpr[ws]);
-    msa_shf_df(env, df, pwd, pws, imm);
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = ts * tt;
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-static inline int64_t msa_max_s_df(CPUMIPSState *env, uint32_t df,
+static inline int64_t msa_div_s_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
-    return arg1 > arg2 ? arg1 : arg2;
+    if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
+        return DF_MIN_INT(df);
+    }
+    return arg2 ? arg1 / arg2 : 0;
 }
 
-void helper_msa_maxi_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t s5)
+void helper_msa_div_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_max_s_df(env, df, ts, s5);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_div_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -637,23 +2220,24 @@ void helper_msa_maxi_s_df(CPUMIPSState *env, uint32_t 
df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_max_u_df(CPUMIPSState *env, uint32_t df,
+static inline int64_t msa_div_u_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
     uint64_t u_arg2 = UNSIGNED(arg2, df);
-    return u_arg1 > u_arg2 ? arg1 : arg2;
+    return u_arg2 ? u_arg1 / u_arg2 : 0;
 }
 
-void helper_msa_maxi_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t u5)
+void helper_msa_div_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    uint64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_i64(env, ws, df, i);
-        td = msa_max_u_df(env, df, ts, u5);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_div_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -661,21 +2245,25 @@ void helper_msa_maxi_u_df(CPUMIPSState *env, uint32_t 
df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_min_s_df(CPUMIPSState *env, uint32_t df,
+static inline int64_t msa_mod_s_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
-    return arg1 < arg2 ? arg1 : arg2;
+    if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
+        return 0;
+    }
+    return arg2 ? arg1 % arg2 : 0;
 }
 
-void helper_msa_mini_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t s5)
+void helper_msa_mod_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_min_s_df(env, df, ts, s5);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_mod_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -683,23 +2271,24 @@ void helper_msa_mini_s_df(CPUMIPSState *env, uint32_t 
df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_min_u_df(CPUMIPSState *env, uint32_t df,
+static inline int64_t msa_mod_u_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
     uint64_t u_arg2 = UNSIGNED(arg2, df);
-    return u_arg1 < u_arg2 ? arg1 : arg2;
+    return u_arg2 ? u_arg1 % u_arg2 : 0;
 }
 
-void helper_msa_mini_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t u5)
+void helper_msa_mod_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    uint64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_i64(env, ws, df, i);
-        td = msa_min_u_df(env, df, ts, u5);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_mod_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -707,41 +2296,6 @@ void helper_msa_mini_u_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
-void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t s10)
-{
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    int64_t s64 = ((int64_t)s10 << 54) >> 54;
-    switch (df) {
-    case DF_BYTE:
-        ALL_B_ELEMENTS(i, MSA_WRLEN) {
-            B(pwd, i)   = (int8_t)s10;
-        } DONE_ALL_ELEMENTS;
-        break;
-    case DF_HALF:
-        ALL_H_ELEMENTS(i, MSA_WRLEN) {
-            H(pwd, i)   = (int16_t)s64;
-        } DONE_ALL_ELEMENTS;
-        break;
-    case DF_WORD:
-        ALL_W_ELEMENTS(i, MSA_WRLEN) {
-            W(pwd, i)   = (int32_t)s64;
-        } DONE_ALL_ELEMENTS;
-        break;
-    case DF_DOUBLE:
-        ALL_D_ELEMENTS(i, MSA_WRLEN) {
-            D(pwd, i)   = s64;
-        } DONE_ALL_ELEMENTS;
-       break;
-    default:
-        /* shouldn't get here */
-        assert(0);
-    }
-    if (env->active_msa.msair & MSAIR_WRP_BIT) {
-        env->active_msa.msamodify |= (1 << wd);
-    }
-}
-
 static inline int64_t msa_sat_u_df(CPUMIPSState *env, uint32_t df, int64_t arg,
         uint32_t m)
 {
@@ -790,6 +2344,30 @@ void helper_msa_sat_s_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
+static inline int64_t msa_sll_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return arg1 << b_arg2;
+}
+
+void helper_msa_sll_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_sll_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_slli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, uint32_t m)
 {
@@ -806,6 +2384,30 @@ void helper_msa_slli_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
+static inline int64_t msa_sra_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return arg1 >> b_arg2;
+}
+
+void helper_msa_sra_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_sra_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_srai_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, uint32_t m)
 {
@@ -822,6 +2424,31 @@ void helper_msa_srai_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
+static inline int64_t msa_srl_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return u_arg1 >> b_arg2;
+}
+
+void helper_msa_srl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_srl_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 static inline int64_t msa_srli_df(CPUMIPSState *env, uint32_t df, int64_t arg,
         uint32_t m)
 {
@@ -845,6 +2472,35 @@ void helper_msa_srli_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
+static inline int64_t msa_srar_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    if (b_arg2 == 0) {
+        return arg1;
+    } else {
+        int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
+        return (arg1 >> b_arg2) + r_bit;
+    }
+}
+
+void helper_msa_srar_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_srar_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 static inline int64_t msa_srari_df(CPUMIPSState *env, uint32_t df, int64_t arg,
         uint32_t m)
 {
@@ -872,6 +2528,36 @@ void helper_msa_srari_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
     }
 }
 
+static inline int64_t msa_srlr_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    if (b_arg2 == 0) {
+        return u_arg1;
+    } else {
+        uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
+        return (u_arg1 >> b_arg2) + r_bit;
+    }
+}
+
+void helper_msa_srlr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_srlr_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 static inline int64_t msa_srlri_df(CPUMIPSState *env, uint32_t df, int64_t arg,
         uint32_t m)
 {
@@ -899,3 +2585,56 @@ void helper_msa_srlri_df(CPUMIPSState *env, uint32_t df, 
uint32_t wd,
         env->active_msa.msamodify |= (1 << wd);
     }
 }
+static inline void msa_sld_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, target_ulong rt)
+{
+    uint32_t n = rt % DF_ELEMENTS(df, MSA_WRLEN);
+    uint8_t v[64];
+    uint32_t i, k;
+#define CONCATENATE_AND_SLIDE(s, k)             \
+    do {                                        \
+        for (i = 0; i < s; i++) {               \
+            v[i]     = B(pws, s * k + i);       \
+            v[i + s] = B(pwd, s * k + i);       \
+        }                                       \
+        for (i = 0; i < s; i++) {               \
+            B(pwd, s * k + i) = v[i + n];       \
+        }                                       \
+    } while (0)
+
+    msa_check_index(env, df, n);
+    switch (df) {
+    case DF_BYTE:
+        CONCATENATE_AND_SLIDE(MSA_WRLEN/8, 0);
+        break;
+    case DF_HALF:
+        for (k = 0; k < 2; k++) {
+            CONCATENATE_AND_SLIDE(MSA_WRLEN/16, k);
+        }
+        break;
+    case DF_WORD:
+        for (k = 0; k < 4; k++) {
+            CONCATENATE_AND_SLIDE(MSA_WRLEN/32, k);
+        }
+        break;
+    case DF_DOUBLE:
+        for (k = 0; k < 8; k++) {
+            CONCATENATE_AND_SLIDE(MSA_WRLEN/64, k);
+        }
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
+
+void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t rt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    msa_sld_df(env, df, pwd, pws, env->active_tc.gpr[rt]);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
diff --git a/target-mips/translate.c b/target-mips/translate.c
index f97d3a9..e063531 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -15030,6 +15030,295 @@ static void gen_msa_bit(CPUMIPSState *env, 
DisasContext *ctx)
     tcg_temp_free_i32(tws);
 }
 
+static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx)
+{
+#define MASK_MSA_3R(op)    (MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
+    uint32_t opcode = ctx->opcode;
+
+    uint8_t df = (ctx->opcode >> 21) & 0x3 /* df [22:21] */;
+    uint8_t wt = (ctx->opcode >> 16) & 0x1f /* wt [20:16] */;
+    uint8_t ws = (ctx->opcode >> 11) & 0x1f /* ws [15:11] */;
+    uint8_t wd = (ctx->opcode >> 6) & 0x1f /* wd [10:6] */;
+
+    TCGv_i32 tdf = tcg_const_i32(df);
+    TCGv_i32 twd = tcg_const_i32(wd);
+    TCGv_i32 tws = tcg_const_i32(ws);
+    TCGv_i32 twt = tcg_const_i32(wt);
+
+    switch (MASK_MSA_3R(opcode)) {
+    case OPC_MSA_SLL_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_sll_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ADDV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_addv_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_CEQ_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ceq_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ADD_A_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_add_a_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SUBS_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_subs_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MULV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_mulv_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SLD_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_sld_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_VSHF_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_vshf_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SRA_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_sra_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SUBV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_subv_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ADDS_A_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_adds_a_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SUBS_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_subs_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MADDV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_maddv_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SPLAT_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_splat_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SRAR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_srar_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SRL_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_srl_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MAX_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_max_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_CLT_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_clt_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ADDS_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_adds_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SUBSUS_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_subsus_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MSUBV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_msubv_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_PCKEV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_pckev_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SRLR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_srlr_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_BCLR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_bclr_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MAX_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_max_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_CLT_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_clt_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ADDS_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_adds_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SUBSUU_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_subsuu_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_PCKOD_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_pckod_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_BSET_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_bset_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MIN_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_min_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_CLE_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_cle_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_AVE_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ave_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ASUB_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_asub_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_DIV_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_div_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ILVL_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ilvl_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_BNEG_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_bneg_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MIN_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_min_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_CLE_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_cle_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_AVE_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ave_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ASUB_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_asub_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_DIV_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_div_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ILVR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ilvr_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_BINSL_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_binsl_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MAX_A_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_max_a_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_AVER_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_aver_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MOD_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_mod_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ILVEV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ilvev_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_BINSR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_binsr_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MIN_A_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_min_a_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_AVER_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_aver_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MOD_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_mod_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ILVOD_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ilvod_df(cpu_env, tdf, twd, tws, twt);
+        break;
+
+    case OPC_MSA_DOTP_S_df:
+    case OPC_MSA_DOTP_U_df:
+    case OPC_MSA_DPADD_S_df:
+    case OPC_MSA_DPADD_U_df:
+    case OPC_MSA_DPSUB_S_df:
+    case OPC_MSA_HADD_S_df:
+    case OPC_MSA_DPSUB_U_df:
+    case OPC_MSA_HADD_U_df:
+    case OPC_MSA_HSUB_S_df:
+    case OPC_MSA_HSUB_U_df:
+        if (df == 0) {
+            if (check_msa_access(env, ctx, -1, -1, -1)) {
+                generate_exception(ctx, EXCP_RI);
+            }
+        }
+        check_msa_access(env, ctx, wt, ws, wd);
+        switch (MASK_MSA_3R(opcode)) {
+        case OPC_MSA_DOTP_S_df:
+            gen_helper_msa_dotp_s_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_DOTP_U_df:
+            gen_helper_msa_dotp_u_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_DPADD_S_df:
+            gen_helper_msa_dpadd_s_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_DPADD_U_df:
+            gen_helper_msa_dpadd_u_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_DPSUB_S_df:
+            gen_helper_msa_dpsub_s_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_HADD_S_df:
+            gen_helper_msa_hadd_s_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_DPSUB_U_df:
+            gen_helper_msa_dpsub_u_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_HADD_U_df:
+            gen_helper_msa_hadd_u_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_HSUB_S_df:
+            gen_helper_msa_hsub_s_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_HSUB_U_df:
+            gen_helper_msa_hsub_u_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        }
+        break;
+    default:
+        MIPS_INVAL("MSA instruction");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+    tcg_temp_free_i32(twd);
+    tcg_temp_free_i32(tws);
+    tcg_temp_free_i32(twt);
+    tcg_temp_free_i32(tdf);
+}
+
 static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
 {
     uint32_t opcode = ctx->opcode;
@@ -15049,6 +15338,17 @@ static void gen_msa(CPUMIPSState *env, DisasContext 
*ctx)
     case OPC_MSA_BIT_0A:
         gen_msa_bit(env, ctx);
         break;
+    case OPC_MSA_3R_0D:
+    case OPC_MSA_3R_0E:
+    case OPC_MSA_3R_0F:
+    case OPC_MSA_3R_10:
+    case OPC_MSA_3R_11:
+    case OPC_MSA_3R_12:
+    case OPC_MSA_3R_13:
+    case OPC_MSA_3R_14:
+    case OPC_MSA_3R_15:
+        gen_msa_3r(env, ctx);
+        break;
     default:
         MIPS_INVAL("MSA instruction");
         generate_exception(ctx, EXCP_RI);
-- 
1.7.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]