qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 37/55] target/mips: Use 8-byte memory ops for msa load/sto


From: Philippe Mathieu-Daudé
Subject: Re: [PATCH v2 37/55] target/mips: Use 8-byte memory ops for msa load/store
Date: Wed, 18 Aug 2021 11:21:28 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.11.0

On 8/3/21 6:14 AM, Richard Henderson wrote:
> Rather than use 4-16 separate operations, use 2 operations
> plus some byte reordering as necessary.
> 
> Cc: Philippe Mathieu-Daudé <f4bug@amsat.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/mips/tcg/msa_helper.c | 201 +++++++++++++----------------------
>  1 file changed, 71 insertions(+), 130 deletions(-)
> 
> diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c
> index a8880ce81c..e40c1b7057 100644
> --- a/target/mips/tcg/msa_helper.c
> +++ b/target/mips/tcg/msa_helper.c
> @@ -8218,47 +8218,31 @@ void helper_msa_ffint_u_df(CPUMIPSState *env, 
> uint32_t df, uint32_t wd,
>  #define MEMOP_IDX(DF)
>  #endif
>  
> +#ifdef TARGET_WORDS_BIGENDIAN
> +static inline uint64_t bswap16x4(uint64_t x)
> +{
> +    uint64_t m = 0x00ff00ff00ff00ffull;
> +    return ((x & m) << 8) | ((x >> 8) & m);
> +}
> +
> +static inline uint64_t bswap32x2(uint64_t x)
> +{
> +    return ror64(bswap64(x), 32);
> +}
> +#endif

I'm trying to remove TARGET_WORDS_BIGENDIAN uses,
so this would become:

static inline bool is_cpu_bigendian(CPUMIPSState *)
{
    return extract32(env->CP0_Config0, CP0C0_BE, 1);
}

static inline uint64_t bswap16x4(CPUMIPSState *env, uint64_t x)
{
    if (is_cpu_bigendian(env)) {
        uint64_t m = 0x00ff00ff00ff00ffull;
        return ((x & m) << 8) | ((x >> 8) & m);
    } else {
        return x;
    }
}

static inline uint64_t bswap32x2(CPUMIPSState *env, uint64_t x)
{
    if (is_cpu_bigendian(env)) {
        return ror64(bswap64(x), 32);
    } else {
        return x;
    }
}

And we can remove the other TARGET_WORDS_BIGENDIAN uses:

>  void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
> @@ -8293,18 +8271,20 @@ void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
>  {
>      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
>      uintptr_t ra = GETPC();
> +    uint64_t d0, d1;
>  
> -#if !defined(HOST_WORDS_BIGENDIAN)
> -    pwd->w[0] = cpu_ldl_data_ra(env, addr + (0 << DF_WORD), ra);
> -    pwd->w[1] = cpu_ldl_data_ra(env, addr + (1 << DF_WORD), ra);
> -    pwd->w[2] = cpu_ldl_data_ra(env, addr + (2 << DF_WORD), ra);
> -    pwd->w[3] = cpu_ldl_data_ra(env, addr + (3 << DF_WORD), ra);
> -#else
> -    pwd->w[0] = cpu_ldl_data_ra(env, addr + (1 << DF_WORD), ra);
> -    pwd->w[1] = cpu_ldl_data_ra(env, addr + (0 << DF_WORD), ra);
> -    pwd->w[2] = cpu_ldl_data_ra(env, addr + (3 << DF_WORD), ra);
> -    pwd->w[3] = cpu_ldl_data_ra(env, addr + (2 << DF_WORD), ra);
> +    /*
> +     * Load 8 bytes at a time.  Use little-endian load, then for
> +     * big-endian target, we must then bswap the two words.
> +     */
> +    d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
> +    d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
> +#ifdef TARGET_WORDS_BIGENDIAN
> +    d0 = bswap32x2(d0);
> +    d1 = bswap32x2(d1);
>  #endif
> +    pwd->d[0] = d0;
> +    pwd->d[1] = d1;
>  }

But can be done later, so:

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

(nice simplification BTW!).



reply via email to

[Prev in Thread] Current Thread [Next in Thread]