qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [PATCH 13/13] target-ppc: Add xxperm and xxpermr instruct


From: David Gibson
Subject: Re: [Qemu-ppc] [PATCH 13/13] target-ppc: Add xxperm and xxpermr instructions
Date: Tue, 6 Dec 2016 15:11:22 +1100
User-agent: Mutt/1.7.1 (2016-10-04)

On Mon, Dec 05, 2016 at 04:55:30PM +0530, Nikunj A Dadhania wrote:
> From: Bharata B Rao <address@hidden>
> 
> xxperm:  VSX Vector Permute
> xxpermr: VSX Vector Permute Right-indexed
> 
> Signed-off-by: Bharata B Rao <address@hidden>
> Signed-off-by: Nikunj A Dadhania <address@hidden>
> ---
>  target-ppc/fpu_helper.c             | 50 
> +++++++++++++++++++++++++++++++++++++
>  target-ppc/helper.h                 |  2 ++
>  target-ppc/translate/vsx-impl.inc.c |  2 ++
>  target-ppc/translate/vsx-ops.inc.c  |  2 ++
>  4 files changed, 56 insertions(+)
> 
> diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
> index 3b867cf..be552c7 100644
> --- a/target-ppc/fpu_helper.c
> +++ b/target-ppc/fpu_helper.c
> @@ -2869,3 +2869,53 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb)
>      float_check_status(env);
>      return xt;
>  }
> +
> +static void vsr_copy_256(ppc_vsr_t *xa, ppc_vsr_t *xt, int8_t *src)
> +{
> +#if defined(HOST_WORDS_BIGENDIAN)
> +    memcpy(src, xa, sizeof(*xa));
> +    memcpy(src + 16, xt, sizeof(*xt));
> +#else
> +    memcpy(src, xt, sizeof(*xt));
> +    memcpy(src + 16, xa, sizeof(*xa));

Is this right?  I thought the order of the bytes within each word
varied with the host endianness as well.

> +#endif
> +}
> +
> +static int8_t vsr_get_byte(int8_t *src, int bound, int idx)
> +{
> +    if (idx >= bound) {
> +        return 0xFF;
> +    }

AFAICT you don't need this check.  For both xxperm and xxpermr you're
already masking the index to 5 bits, so it can't exceed 31.

> +#if defined(HOST_WORDS_BIGENDIAN)
> +    return src[idx];
> +#else
> +    return src[bound - 1 - idx];
> +#endif
> +}
> +
> +#define VSX_XXPERM(op, indexed)                                    \
> +void helper_##op(CPUPPCState *env, uint32_t opcode)                \
> +{                                                                  \
> +    ppc_vsr_t xt, xa, pcv;                                         \
> +    int i, idx;                                                    \
> +    int8_t src[32];                                                \
> +                                                                   \
> +    getVSR(xA(opcode), &xa, env);                                  \
> +    getVSR(xT(opcode), &xt, env);                                  \
> +    getVSR(xB(opcode), &pcv, env);                                 \
> +                                                                   \
> +    vsr_copy_256(&xa, &xt, src);                                   \

You have a double copy here AFAICT - first from the actual env
structure to xt and xa, then to the src array.  That seems like it
would be good to avoid.

It seems like it would nice in any case to avoid even the one copy.
You'd need a temporary for the output of course and to copy that, but
you should be able to combine indexed with host endianness to
translate each index to retrieve directly from the VSR values in env.

> +    for (i = 0; i < 16; i++) {                                     \
> +        idx = pcv.VsrB(i) & 0x1F;                                  \
> +        if (indexed) {                                             \
> +            xt.VsrB(i) = vsr_get_byte(src, 32, 31 - idx);          \
> +        } else {                                                   \
> +            xt.VsrB(i) = vsr_get_byte(src, 32, idx);               \
> +        }                                                          \
> +    }                                                              \
> +    putVSR(xT(opcode), &xt, env);                                  \
> +}
> +
> +VSX_XXPERM(xxperm, 0)
> +VSX_XXPERM(xxpermr, 1)
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 9f812c8..399cf99 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -538,6 +538,8 @@ DEF_HELPER_2(xvrspip, void, env, i32)
>  DEF_HELPER_2(xvrspiz, void, env, i32)
>  DEF_HELPER_4(xxextractuw, void, env, tl, tl, i32)
>  DEF_HELPER_4(xxinsertw, void, env, tl, tl, i32)
> +DEF_HELPER_2(xxperm, void, env, i32)
> +DEF_HELPER_2(xxpermr, void, env, i32)
>  
>  DEF_HELPER_2(efscfsi, i32, env, i32)
>  DEF_HELPER_2(efscfui, i32, env, i32)
> diff --git a/target-ppc/translate/vsx-impl.inc.c 
> b/target-ppc/translate/vsx-impl.inc.c
> index 77f098b..2ad152e 100644
> --- a/target-ppc/translate/vsx-impl.inc.c
> +++ b/target-ppc/translate/vsx-impl.inc.c
> @@ -914,6 +914,8 @@ GEN_VSX_HELPER_2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX)
>  GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
>  GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX)
>  GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX)
> +GEN_VSX_HELPER_2(xxperm, 0x08, 0x03, 0, PPC2_ISA300)
> +GEN_VSX_HELPER_2(xxpermr, 0x08, 0x07, 0, PPC2_ISA300)
>  
>  static void gen_xxbrd(DisasContext *ctx)
>  {
> diff --git a/target-ppc/translate/vsx-ops.inc.c 
> b/target-ppc/translate/vsx-ops.inc.c
> index 42e83d2..93fb9b8 100644
> --- a/target-ppc/translate/vsx-ops.inc.c
> +++ b/target-ppc/translate/vsx-ops.inc.c
> @@ -275,6 +275,8 @@ VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207),
>  VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207),
>  GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX),
>  GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX),
> +GEN_XX3FORM(xxperm, 0x08, 0x03, PPC2_ISA300),
> +GEN_XX3FORM(xxpermr, 0x08, 0x07, PPC2_ISA300),
>  GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
>  GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300),
>  GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]