[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 1/2] target/mips: Optimize ILVOD.<B|H|W|D> MSA i
From: |
Richard Henderson |
Subject: |
Re: [Qemu-devel] [PATCH 1/2] target/mips: Optimize ILVOD.<B|H|W|D> MSA instructions |
Date: |
Fri, 15 Mar 2019 09:43:32 -0700 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.4.0 |
On 3/15/19 5:02 AM, Mateja Marjanovic wrote:
> From: Mateja Marjanovic <address@hidden>
>
> Optimize set of MSA instructions ILVOD, using directly
> tcg registers and performing logic on them insted of
> using helpers.
> Performance measurement is done by executing the
> instructions large number of times on a computer
> with Intel Core i7-3770 CPU @ 3.40GHz×8.
>
> instruction || before || after ||
> ==============================================
> ilvod.b: || 66.97 ms || 26.34 ms ||
> ilvod.h: || 44.75 ms || 25.17 ms ||
> ilvod.w: || 41.27 ms || 24.37 ms ||
> ilvod.d: || 41.75 ms || 20.50 ms ||
>
> Signed-off-by: Mateja Marjanovic <address@hidden>
> ---
> target/mips/helper.h | 1 -
> target/mips/msa_helper.c | 51 --------------------
> target/mips/translate.c | 119
> ++++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 118 insertions(+), 53 deletions(-)
>
> diff --git a/target/mips/helper.h b/target/mips/helper.h
> index a6d687e..d162836 100644
> --- a/target/mips/helper.h
> +++ b/target/mips/helper.h
> @@ -865,7 +865,6 @@ DEF_HELPER_5(msa_pckod_df, void, env, i32, i32, i32, i32)
> DEF_HELPER_5(msa_ilvl_df, void, env, i32, i32, i32, i32)
> DEF_HELPER_5(msa_ilvr_df, void, env, i32, i32, i32, i32)
> DEF_HELPER_5(msa_ilvev_df, void, env, i32, i32, i32, i32)
> -DEF_HELPER_5(msa_ilvod_df, void, env, i32, i32, i32, i32)
> DEF_HELPER_5(msa_vshf_df, void, env, i32, i32, i32, i32)
> DEF_HELPER_5(msa_srar_df, void, env, i32, i32, i32, i32)
> DEF_HELPER_5(msa_srlr_df, void, env, i32, i32, i32, i32)
> diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c
> index 9d9dafe..cbcfd57 100644
> --- a/target/mips/msa_helper.c
> +++ b/target/mips/msa_helper.c
> @@ -1363,57 +1363,6 @@ void helper_msa_ilvev_df(CPUMIPSState *env, uint32_t
> df, uint32_t wd,
> }
> }
>
> -void helper_msa_ilvod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
> - uint32_t ws, uint32_t wt)
> -{
> - wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
> - wr_t *pws = &(env->active_fpu.fpr[ws].wr);
> - wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
> -
> - switch (df) {
> - case DF_BYTE:
> - pwd->b[0] = pwt->b[1];
> - pwd->b[1] = pws->b[1];
> - pwd->b[2] = pwt->b[3];
> - pwd->b[3] = pws->b[3];
> - pwd->b[4] = pwt->b[5];
> - pwd->b[5] = pws->b[5];
> - pwd->b[6] = pwt->b[7];
> - pwd->b[7] = pws->b[7];
> - pwd->b[8] = pwt->b[9];
> - pwd->b[9] = pws->b[9];
> - pwd->b[10] = pwt->b[11];
> - pwd->b[11] = pws->b[11];
> - pwd->b[12] = pwt->b[13];
> - pwd->b[13] = pws->b[13];
> - pwd->b[14] = pwt->b[15];
> - pwd->b[15] = pws->b[15];
> - break;
> - case DF_HALF:
> - pwd->h[0] = pwt->h[1];
> - pwd->h[1] = pws->h[1];
> - pwd->h[2] = pwt->h[3];
> - pwd->h[3] = pws->h[3];
> - pwd->h[4] = pwt->h[5];
> - pwd->h[5] = pws->h[5];
> - pwd->h[6] = pwt->h[7];
> - pwd->h[7] = pws->h[7];
> - break;
> - case DF_WORD:
> - pwd->w[0] = pwt->w[1];
> - pwd->w[1] = pws->w[1];
> - pwd->w[2] = pwt->w[3];
> - pwd->w[3] = pws->w[3];
> - break;
> - case DF_DOUBLE:
> - pwd->d[0] = pwt->d[1];
> - pwd->d[1] = pws->d[1];
> - break;
> - default:
> - assert(0);
> - }
> -}
> -
> void helper_msa_ilvl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
> uint32_t ws, uint32_t wt)
> {
> diff --git a/target/mips/translate.c b/target/mips/translate.c
> index b4a1103..101d2de 100644
> --- a/target/mips/translate.c
> +++ b/target/mips/translate.c
> @@ -28889,6 +28889,108 @@ static void gen_msa_bit(CPUMIPSState *env,
> DisasContext *ctx)
> tcg_temp_free_i32(tws);
> }
>
> +static inline void gen_ilvod_b(CPUMIPSState *env, uint32_t wd,
> + uint32_t ws, uint32_t wt) {
{ on next line.
> + TCGv_i64 t0 = tcg_temp_new_i64();
> + TCGv_i64 t1 = tcg_temp_new_i64();
> +
> + uint64_t mask = (1ULL << 8) - 1;
> + mask |= mask << 16;
> + mask |= mask << 32;
> + mask <<= 8;
This is a constant. Clearer to just write 0xff00ff00ff00ff00ull;
> +static inline void gen_ilvod_h(CPUMIPSState *env, uint32_t wd,
> + uint32_t ws, uint32_t wt) {
Likewise.
> + uint64_t mask = (1ULL << 16) - 1;
> + mask |= mask << 32;
> + mask <<= 16;
0xffff0000ffff0000ull
> +static inline void gen_ilvod_w(CPUMIPSState *env, uint32_t wd,
> + uint32_t ws, uint32_t wt) {
Likewise.
> + tcg_gen_andi_i64(t0, msa_wr_d[wt * 2], mask);
> + tcg_gen_shri_i64(t0, t0, 32);
> + tcg_gen_or_i64(t1, t1, t0);
> + tcg_gen_andi_i64(t0, msa_wr_d[ws * 2], mask);
> + tcg_gen_or_i64(t1, t1, t0);
This can fold down to deposit.
tcg_gen_shri_i64(t0, msa_wr_d[wt * 2], 32);
tcg_gen_deposit_i64(msa_wr_d[wd * 2], msa_wr_d[ws * 2], t0, 0, 32);
> +static inline void gen_ilvod_d(CPUMIPSState *env, uint32_t wd,
> + uint32_t ws, uint32_t wt) {
Likewise.
r~