[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [PATCH v1 3/3] target-ppc: implement xxbr[qdwh] instructi
From: |
David Gibson |
Subject: |
Re: [Qemu-ppc] [PATCH v1 3/3] target-ppc: implement xxbr[qdwh] instruction |
Date: |
Thu, 13 Oct 2016 11:21:45 +1100 |
User-agent: |
Mutt/1.7.0 (2016-08-17) |
On Wed, Oct 12, 2016 at 10:38:53AM +0530, Nikunj A Dadhania wrote:
> Add required helpers (GEN_XX2FORM_EO) for supporting this instruction.
>
> xxbrh: VSX Vector Byte-Reverse Halfword
> xxbrw: VSX Vector Byte-Reverse Word
> xxbrd: VSX Vector Byte-Reverse Doubleword
> xxbrq: VSX Vector Byte-Reverse Quadword
>
> Signed-off-by: Nikunj A Dadhania <address@hidden>
> ---
> target-ppc/translate.c | 32 +++++++++++++++
> target-ppc/translate/vsx-impl.inc.c | 77
> +++++++++++++++++++++++++++++++++++++
> target-ppc/translate/vsx-ops.inc.c | 8 ++++
> 3 files changed, 117 insertions(+)
>
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index dab8f19..94989b2 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -376,6 +376,9 @@ GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type,
> type2)
> #define GEN_HANDLER_E_2(name, opc1, opc2, opc3, opc4, inval, type, type2)
> \
> GEN_OPCODE3(name, opc1, opc2, opc3, opc4, inval, type, type2)
>
> +#define GEN_HANDLER2_E_2(name, onam, opc1, opc2, opc3, opc4, inval, typ,
> typ2) \
> +GEN_OPCODE4(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2)
> +
> typedef struct opcode_t {
> unsigned char opc1, opc2, opc3, opc4;
> #if HOST_LONG_BITS == 64 /* Explicitly align to 64 bits */
> @@ -662,6 +665,21 @@ EXTRACT_HELPER(IMM8, 11, 8);
> },
> \
> .oname = stringify(name),
> \
> }
> +#define GEN_OPCODE4(name, onam, op1, op2, op3, op4, invl, _typ, _typ2)
> \
> +{
> \
> + .opc1 = op1,
> \
> + .opc2 = op2,
> \
> + .opc3 = op3,
> \
> + .opc4 = op4,
> \
> + .handler = {
> \
> + .inval1 = invl,
> \
> + .type = _typ,
> \
> + .type2 = _typ2,
> \
> + .handler = &gen_##name,
> \
> + .oname = onam,
> \
> + },
> \
> + .oname = onam,
> \
> +}
> #else
> #define GEN_OPCODE(name, op1, op2, op3, invl, _typ, _typ2)
> \
> {
> \
> @@ -720,6 +738,20 @@ EXTRACT_HELPER(IMM8, 11, 8);
> },
> \
> .oname = stringify(name),
> \
> }
> +#define GEN_OPCODE4(name, onam, op1, op2, op3, op4, invl, _typ, _typ2)
> \
> +{
> \
> + .opc1 = op1,
> \
> + .opc2 = op2,
> \
> + .opc3 = op3,
> \
> + .opc4 = op4,
> \
> + .handler = {
> \
> + .inval1 = invl,
> \
> + .type = _typ,
> \
> + .type2 = _typ2,
> \
> + .handler = &gen_##name,
> \
> + },
> \
> + .oname = onam,
> \
> +}
> #endif
>
> /* SPR load/store helpers */
> diff --git a/target-ppc/translate/vsx-impl.inc.c
> b/target-ppc/translate/vsx-impl.inc.c
> index 23ec1e1..52af5c1 100644
> --- a/target-ppc/translate/vsx-impl.inc.c
> +++ b/target-ppc/translate/vsx-impl.inc.c
> @@ -132,6 +132,22 @@ static void gen_bswap16x8(TCGv_i64 outh, TCGv_i64 outl,
> tcg_temp_free_i64(mask);
> }
>
> +static void gen_bswap32x4(TCGv_i64 outh, TCGv_i64 outl,
> + TCGv_i64 inh, TCGv_i64 inl)
> +{
> + TCGv_i64 hi = tcg_temp_new_i64();
> + TCGv_i64 lo = tcg_temp_new_i64();
> +
> + tcg_gen_bswap64_i64(hi, inh);
> + tcg_gen_bswap64_i64(lo, inl);
> + tcg_gen_shri_i64(outh, hi, 32);
> + tcg_gen_deposit_i64(outh, outh, hi, 32, 32);
> + tcg_gen_shri_i64(outl, lo, 32);
> + tcg_gen_deposit_i64(outl, outl, lo, 32, 32);
> +
> + tcg_temp_free_i64(hi);
> + tcg_temp_free_i64(lo);
> +}
Is there actually any advantage to having this 128-bit operation
working on two 64-bit "register"s, as opposed to having a bswap32x2
that operates on a single 64-bit register amd calling it twice?
> static void gen_lxvh8x(DisasContext *ctx)
> {
> TCGv EA;
> @@ -717,6 +733,67 @@ GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
> GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX)
> GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX)
>
> +static void gen_xxbrd(DisasContext *ctx)
> +{
> + TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
> + TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
> + TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
> + TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
> +
> + if (unlikely(!ctx->vsx_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VSXU);
> + return;
> + }
> + tcg_gen_bswap64_i64(xth, xbh);
> + tcg_gen_bswap64_i64(xtl, xbl);
> +}
> +
> +static void gen_xxbrh(DisasContext *ctx)
> +{
> + TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
> + TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
> + TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
> + TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
> +
> + if (unlikely(!ctx->vsx_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VSXU);
> + return;
> + }
> + gen_bswap16x8(xth, xtl, xbh, xbl);
Likewise for the 16x8 version, I guess, although that would mean
changing the existing users.
> +}
> +
> +static void gen_xxbrq(DisasContext *ctx)
> +{
> + TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
> + TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
> + TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
> + TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
> + TCGv_i64 t0 = tcg_temp_new_i64();
> +
> + if (unlikely(!ctx->vsx_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VSXU);
> + return;
> + }
> + tcg_gen_bswap64_i64(t0, xbl);
> + tcg_gen_bswap64_i64(xtl, xbh);
> + tcg_gen_bswap64_i64(xth, t0);
This looks wrong. You swap xbl as you move it to t0, then swap it
again as you put it back into xth. So it looks like you'll translate
0011223344556677 8899AABBCCDDEEFF
to
8899AABBCCDDEEFF 7766554433221100
whereas it should become
FFEEDDCCBBAA9977 7766554433221100
> + tcg_temp_free_i64(t0);
> +}
> +
> +static void gen_xxbrw(DisasContext *ctx)
> +{
> + TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
> + TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
> + TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
> + TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
> +
> + if (unlikely(!ctx->vsx_enabled)) {
> + gen_exception(ctx, POWERPC_EXCP_VSXU);
> + return;
> + }
> + gen_bswap32x4(xth, xtl, xbh, xbl);
> +}
> +
> #define VSX_LOGICAL(name, tcg_op) \
> static void glue(gen_, name)(DisasContext * ctx) \
> { \
> diff --git a/target-ppc/translate/vsx-ops.inc.c
> b/target-ppc/translate/vsx-ops.inc.c
> index 10eb4b9..af0d27e 100644
> --- a/target-ppc/translate/vsx-ops.inc.c
> +++ b/target-ppc/translate/vsx-ops.inc.c
> @@ -39,6 +39,10 @@ GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0,
> PPC_NONE, fl2)
> GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
> GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
>
> +#define GEN_XX2FORM_EO(name, opc2, opc3, opc4, fl2)
> \
> +GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 0, opc3, opc4, 0, PPC_NONE, fl2),
> \
> +GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 1, opc3, opc4, 0, PPC_NONE, fl2)
> +
> #define GEN_XX3FORM(name, opc2, opc3, fl2) \
> GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
> GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2), \
> @@ -222,6 +226,10 @@ GEN_XX2FORM(xvrspic, 0x16, 0x0A, PPC2_VSX),
> GEN_XX2FORM(xvrspim, 0x12, 0x0B, PPC2_VSX),
> GEN_XX2FORM(xvrspip, 0x12, 0x0A, PPC2_VSX),
> GEN_XX2FORM(xvrspiz, 0x12, 0x09, PPC2_VSX),
> +GEN_XX2FORM_EO(xxbrh, 0x16, 0x1D, 0x07, PPC2_ISA300),
> +GEN_XX2FORM_EO(xxbrw, 0x16, 0x1D, 0x0F, PPC2_ISA300),
> +GEN_XX2FORM_EO(xxbrd, 0x16, 0x1D, 0x17, PPC2_ISA300),
> +GEN_XX2FORM_EO(xxbrq, 0x16, 0x1D, 0x1F, PPC2_ISA300),
>
> #define VSX_LOGICAL(name, opc2, opc3, fl2) \
> GEN_XX3FORM(name, opc2, opc3, fl2)
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature