[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v6 15/16] tcg/ppc: Update vector support to v2.0
From: |
Aleksandar Markovic |
Subject: |
Re: [Qemu-devel] [PATCH v6 15/16] tcg/ppc: Update vector support to v2.07 |
Date: |
Sun, 30 Jun 2019 13:50:57 +0200 |
On Jun 29, 2019 3:14 PM, "Richard Henderson" <address@hidden>
wrote:
>
> This includes single-word loads and stores, lots of double-word
> arithmetic, and a few extra logical operations.
>
This patch should be split into several units (so, treat shift, compare,
etc. separately).
The same for other similar patches from this series.
> Signed-off-by: Richard Henderson <address@hidden>
> Signed-off-by: Aleksandar Markovic <address@hidden>
> ---
> tcg/ppc/tcg-target.h | 3 +-
> tcg/ppc/tcg-target.inc.c | 128 ++++++++++++++++++++++++++++++---------
> 2 files changed, 103 insertions(+), 28 deletions(-)
>
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index 40544f996d..b8355d0a56 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -61,6 +61,7 @@ typedef enum {
> extern bool have_isa_altivec;
> extern bool have_isa_2_06;
> extern bool have_isa_2_06_vsx;
> +extern bool have_isa_2_07_vsx;
> extern bool have_isa_3_00;
>
> /* optional instructions automatically implemented */
> @@ -147,7 +148,7 @@ extern bool have_isa_3_00;
> #define TCG_TARGET_HAS_v256 0
>
> #define TCG_TARGET_HAS_andc_vec 1
> -#define TCG_TARGET_HAS_orc_vec 0
> +#define TCG_TARGET_HAS_orc_vec have_isa_2_07_vsx
> #define TCG_TARGET_HAS_not_vec 1
> #define TCG_TARGET_HAS_neg_vec 0
> #define TCG_TARGET_HAS_abs_vec 0
> diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
> index 50d1b5612c..af86ab07dd 100644
> --- a/tcg/ppc/tcg-target.inc.c
> +++ b/tcg/ppc/tcg-target.inc.c
> @@ -67,6 +67,7 @@ static tcg_insn_unit *tb_ret_addr;
> bool have_isa_altivec;
> bool have_isa_2_06;
> bool have_isa_2_06_vsx;
> +bool have_isa_2_07_vsx;
> bool have_isa_3_00;
>
> #define HAVE_ISA_2_06 have_isa_2_06
> @@ -473,10 +474,12 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
> #define LVEWX XO31(71)
> #define LXSDX XO31(588) /* v2.06 */
> #define LXVDSX XO31(332) /* v2.06 */
> +#define LXSIWZX XO31(12) /* v2.07 */
>
> #define STVX XO31(231)
> #define STVEWX XO31(199)
> #define STXSDX XO31(716) /* v2.06 */
> +#define STXSIWX XO31(140) /* v2.07 */
>
> #define VADDSBS VX4(768)
> #define VADDUBS VX4(512)
> @@ -487,6 +490,7 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
> #define VADDSWS VX4(896)
> #define VADDUWS VX4(640)
> #define VADDUWM VX4(128)
> +#define VADDUDM VX4(192) /* v2.07 */
>
> #define VSUBSBS VX4(1792)
> #define VSUBUBS VX4(1536)
> @@ -497,47 +501,62 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
> #define VSUBSWS VX4(1920)
> #define VSUBUWS VX4(1664)
> #define VSUBUWM VX4(1152)
> +#define VSUBUDM VX4(1216) /* v2.07 */
>
> #define VMAXSB VX4(258)
> #define VMAXSH VX4(322)
> #define VMAXSW VX4(386)
> +#define VMAXSD VX4(450) /* v2.07 */
> #define VMAXUB VX4(2)
> #define VMAXUH VX4(66)
> #define VMAXUW VX4(130)
> +#define VMAXUD VX4(194) /* v2.07 */
> #define VMINSB VX4(770)
> #define VMINSH VX4(834)
> #define VMINSW VX4(898)
> +#define VMINSD VX4(962) /* v2.07 */
> #define VMINUB VX4(514)
> #define VMINUH VX4(578)
> #define VMINUW VX4(642)
> +#define VMINUD VX4(706) /* v2.07 */
>
> #define VCMPEQUB VX4(6)
> #define VCMPEQUH VX4(70)
> #define VCMPEQUW VX4(134)
> +#define VCMPEQUD VX4(199) /* v2.07 */
> #define VCMPGTSB VX4(774)
> #define VCMPGTSH VX4(838)
> #define VCMPGTSW VX4(902)
> +#define VCMPGTSD VX4(967) /* v2.07 */
> #define VCMPGTUB VX4(518)
> #define VCMPGTUH VX4(582)
> #define VCMPGTUW VX4(646)
> +#define VCMPGTUD VX4(711) /* v2.07 */
>
> #define VSLB VX4(260)
> #define VSLH VX4(324)
> #define VSLW VX4(388)
> +#define VSLD VX4(1476) /* v2.07 */
> #define VSRB VX4(516)
> #define VSRH VX4(580)
> #define VSRW VX4(644)
> +#define VSRD VX4(1732) /* v2.07 */
> #define VSRAB VX4(772)
> #define VSRAH VX4(836)
> #define VSRAW VX4(900)
> +#define VSRAD VX4(964) /* v2.07 */
> #define VRLB VX4(4)
> #define VRLH VX4(68)
> #define VRLW VX4(132)
> +#define VRLD VX4(196) /* v2.07 */
>
> #define VMULEUB VX4(520)
> #define VMULEUH VX4(584)
> +#define VMULEUW VX4(648) /* v2.07 */
> #define VMULOUB VX4(8)
> #define VMULOUH VX4(72)
> +#define VMULOUW VX4(136) /* v2.07 */
> +#define VMULUWM VX4(137) /* v2.07 */
> #define VMSUMUHM VX4(38)
>
> #define VMRGHB VX4(12)
> @@ -555,6 +574,9 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
> #define VNOR VX4(1284)
> #define VOR VX4(1156)
> #define VXOR VX4(1220)
> +#define VEQV VX4(1668) /* v2.07 */
> +#define VNAND VX4(1412) /* v2.07 */
> +#define VORC VX4(1348) /* v2.07 */
>
> #define VSPLTB VX4(524)
> #define VSPLTH VX4(588)
> @@ -568,6 +590,11 @@ static int tcg_target_const_match(tcg_target_long
val, TCGType type,
> #define XXPERMDI (OPCD(60) | (10 << 3)) /* v2.06 */
> #define XXSEL (OPCD(60) | (3 << 4)) /* v2.06 */
>
> +#define MFVSRD XO31(51) /* v2.07 */
> +#define MFVSRWZ XO31(115) /* v2.07 */
> +#define MTVSRD XO31(179) /* v2.07 */
> +#define MTVSRWZ XO31(179) /* v2.07 */
> +
> #define RT(r) ((r)<<21)
> #define RS(r) ((r)<<21)
> #define RA(r) ((r)<<16)
> @@ -697,12 +724,27 @@ static bool tcg_out_mov(TCGContext *s, TCGType
type, TCGReg ret, TCGReg arg)
> tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
> /* fallthru */
> case TCG_TYPE_I32:
> - if (ret < TCG_REG_V0 && arg < TCG_REG_V0) {
> - tcg_out32(s, OR | SAB(arg, ret, arg));
> - break;
> - } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) {
> - /* Altivec does not support vector/integer moves. */
> - return false;
> + if (ret < TCG_REG_V0) {
> + if (arg < TCG_REG_V0) {
> + tcg_out32(s, OR | SAB(arg, ret, arg));
> + break;
> + } else if (have_isa_2_07_vsx) {
> + tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
> + | VRT(arg) | RA(ret) | 1);
> + break;
> + } else {
> + /* Altivec does not support vector->integer moves. */
> + return false;
> + }
> + } else if (arg < TCG_REG_V0) {
> + if (have_isa_2_07_vsx) {
> + tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
> + | VRT(ret) | RA(arg) | 1);
> + break;
> + } else {
> + /* Altivec does not support integer->vector moves. */
> + return false;
> + }
> }
> /* fallthru */
> case TCG_TYPE_V64:
> @@ -1140,6 +1182,10 @@ static void tcg_out_ld(TCGContext *s, TCGType
type, TCGReg ret,
> tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
> break;
> }
> + if (have_isa_2_07_vsx) {
> + tcg_out_mem_long(s, 0, LXSIWZX | 1, ret, base, offset);
> + break;
> + }
> assert((offset & 3) == 0);
> tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
> shift = (offset - 4) & 0xc;
> @@ -1187,6 +1233,10 @@ static void tcg_out_st(TCGContext *s, TCGType
type, TCGReg arg,
> tcg_out_mem_long(s, STW, STWX, arg, base, offset);
> break;
> }
> + if (have_isa_2_07_vsx) {
> + tcg_out_mem_long(s, 0, STXSIWX | 1, arg, base, offset);
> + break;
> + }
> assert((offset & 3) == 0);
> shift = (offset - 4) & 0xc;
> if (shift) {
> @@ -2907,26 +2957,37 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType
type, unsigned vece)
> case INDEX_op_andc_vec:
> case INDEX_op_not_vec:
> return 1;
> + case INDEX_op_orc_vec:
> + return have_isa_2_07_vsx;
> case INDEX_op_add_vec:
> case INDEX_op_sub_vec:
> case INDEX_op_smax_vec:
> case INDEX_op_smin_vec:
> case INDEX_op_umax_vec:
> case INDEX_op_umin_vec:
> + case INDEX_op_shlv_vec:
> + case INDEX_op_shrv_vec:
> + case INDEX_op_sarv_vec:
> + return vece <= MO_32 || have_isa_2_07_vsx;
> case INDEX_op_ssadd_vec:
> case INDEX_op_sssub_vec:
> case INDEX_op_usadd_vec:
> case INDEX_op_ussub_vec:
> - case INDEX_op_shlv_vec:
> - case INDEX_op_shrv_vec:
> - case INDEX_op_sarv_vec:
> return vece <= MO_32;
> case INDEX_op_cmp_vec:
> - case INDEX_op_mul_vec:
> case INDEX_op_shli_vec:
> case INDEX_op_shri_vec:
> case INDEX_op_sari_vec:
> - return vece <= MO_32 ? -1 : 0;
> + return vece <= MO_32 || have_isa_2_07_vsx ? -1 : 0;
> + case INDEX_op_mul_vec:
> + switch (vece) {
> + case MO_8:
> + case MO_16:
> + return -1;
> + case MO_32:
> + return have_isa_2_07_vsx ? 1 : -1;
> + }
> + return 0;
> case INDEX_op_bitsel_vec:
> return have_isa_2_06_vsx;
> default:
> @@ -3030,28 +3091,28 @@ static void tcg_out_vec_op(TCGContext *s,
TCGOpcode opc,
> const TCGArg *args, const int *const_args)
> {
> static const uint32_t
> - add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
> - sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
> - eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
> - gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
> - gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
> + add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
> + sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
> + eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
> + gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
> + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
> ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
> usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
> sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
> ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
> - umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
> - smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
> - umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
> - smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
> - shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
> - shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
> - sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
> + umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
> + smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
> + umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
> + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
> + shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
> + shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
> + sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
> mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
> mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
> - muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
> - mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
> + muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
> + mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
> pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
> - rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
> + rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
>
> TCGType type = vecl + TCG_TYPE_V64;
> TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
> @@ -3074,6 +3135,10 @@ static void tcg_out_vec_op(TCGContext *s,
TCGOpcode opc,
> case INDEX_op_sub_vec:
> insn = sub_op[vece];
> break;
> + case INDEX_op_mul_vec:
> + tcg_debug_assert(vece == MO_32 && have_isa_2_07_vsx);
> + insn = VMULUWM;
> + break;
> case INDEX_op_ssadd_vec:
> insn = ssadd_op[vece];
> break;
> @@ -3123,6 +3188,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode
opc,
> insn = VNOR;
> a2 = a1;
> break;
> + case INDEX_op_orc_vec:
> + insn = VORC;
> + break;
>
> case INDEX_op_cmp_vec:
> switch (args[3]) {
> @@ -3203,7 +3271,7 @@ static void expand_vec_cmp(TCGType type, unsigned
vece, TCGv_vec v0,
> {
> bool need_swap = false, need_inv = false;
>
> - tcg_debug_assert(vece <= MO_32);
> + tcg_debug_assert(vece <= MO_32 || have_isa_2_07_vsx);
>
> switch (cond) {
> case TCG_COND_EQ:
> @@ -3267,6 +3335,7 @@ static void expand_vec_mul(TCGType type, unsigned
vece, TCGv_vec v0,
> break;
>
> case MO_32:
> + tcg_debug_assert(!have_isa_2_07_vsx);
> t3 = tcg_temp_new_vec(type);
> t4 = tcg_temp_new_vec(type);
> tcg_gen_dupi_vec(MO_8, t4, -16);
> @@ -3562,6 +3631,11 @@ static void tcg_target_init(TCGContext *s)
> have_isa_2_06_vsx = true;
> }
> }
> + if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
> + if (hwcap & PPC_FEATURE_HAS_VSX) {
> + have_isa_2_07_vsx = true;
> + }
> + }
> #ifdef PPC_FEATURE2_ARCH_3_00
> if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
> have_isa_3_00 = true;
> --
> 2.17.1
>
>
- Re: [Qemu-devel] [PATCH v6 09/16] tcg/ppc: Prepare case for vector multiply, (continued)
[Qemu-devel] [PATCH v6 06/16] tcg/ppc: Add support for vector maximum/minimum, Richard Henderson, 2019/06/29
[Qemu-devel] [PATCH v6 16/16] tcg/ppc: Update vector support to v3.00, Richard Henderson, 2019/06/29
[Qemu-devel] [PATCH v6 04/16] tcg/ppc: Enable tcg backend vector compilation, Richard Henderson, 2019/06/29
[Qemu-devel] [PATCH v6 15/16] tcg/ppc: Update vector support to v2.07, Richard Henderson, 2019/06/29
[Qemu-devel] [PATCH v6 12/16] tcg/ppc: Support vector dup2, Richard Henderson, 2019/06/29
[Qemu-devel] [PATCH v6 03/16] tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC(), Richard Henderson, 2019/06/29
[Qemu-devel] [PATCH v6 10/16] tcg/ppc: Support vector shift by immediate, Richard Henderson, 2019/06/29
[Qemu-devel] [PATCH v6 08/16] tcg/ppc: Add support for vector saturated add/subtract, Richard Henderson, 2019/06/29
[Qemu-devel] [PATCH v6 01/16] tcg/ppc: Introduce Altivec registers, Richard Henderson, 2019/06/29
[Qemu-devel] [PATCH v6 05/16] tcg/ppc: Add support for load/store/logic/comparison, Richard Henderson, 2019/06/29
Re: [Qemu-devel] [PATCH v6 00/16] tcg/ppc: Add vector opcodes, no-reply, 2019/06/29