[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3 05/81] target/arm: Implement SVE2 Integer Multiply - Unpredica
From: |
Richard Henderson |
Subject: |
[PATCH v3 05/81] target/arm: Implement SVE2 Integer Multiply - Unpredicated |
Date: |
Fri, 18 Sep 2020 11:36:35 -0700 |
For MUL, we can rely on generic support. For SMULH and UMULH,
create some trivial helpers. For PMUL, back in a21bb78e5817,
we organized helper_gvec_pmul_b in preparation for this use.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 10 ++++
target/arm/sve.decode | 10 ++++
target/arm/translate-sve.c | 50 ++++++++++++++++++++
target/arm/vec_helper.c | 96 ++++++++++++++++++++++++++++++++++++++
4 files changed, 166 insertions(+)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 8defd7c801..94658169b2 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -815,6 +815,16 @@ DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void,
ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_umulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 6425396ac1..2c71d9e446 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1090,3 +1090,13 @@ ST1_zprz 1110010 .. 00 ..... 100 ... ..... ..... \
@rprr_scatter_store xs=0 esz=3 scale=0
ST1_zprz 1110010 .. 00 ..... 110 ... ..... ..... \
@rprr_scatter_store xs=1 esz=3 scale=0
+
+#### SVE2 Support
+
+### SVE2 Integer Multiply - Unpredicated
+
+# SVE2 integer multiply vectors (unpredicated)
+MUL_zzz 00000100 .. 1 ..... 0110 00 ..... ..... @rd_rn_rm
+SMULH_zzz 00000100 .. 1 ..... 0110 10 ..... ..... @rd_rn_rm
+UMULH_zzz 00000100 .. 1 ..... 0110 11 ..... ..... @rd_rn_rm
+PMUL_zzz 00000100 00 1 ..... 0110 01 ..... ..... @rd_rn_rm_e0
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 9095586fc9..04c5a2c8bd 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -5805,3 +5805,53 @@ static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz
*a)
{
return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
}
+
+/*
+ * SVE2 Integer Multiply - Unpredicated
+ */
+
+static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+ if (!dc_isar_feature(aa64_sve2, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
+ }
+ return true;
+}
+
+static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a,
+ gen_helper_gvec_3 *fn)
+{
+ if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
+ }
+ return true;
+}
+
+static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
+ gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
+ };
+ return do_sve2_zzz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
+ gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
+ };
+ return do_sve2_zzz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+ return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b);
+}
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index a973454e4f..84e1ae3933 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -1937,3 +1937,99 @@ DO_VRINT_RMODE(gvec_vrint_rm_h, helper_rinth, uint16_t)
DO_VRINT_RMODE(gvec_vrint_rm_s, helper_rints, uint32_t)
#undef DO_VRINT_RMODE
+
+/*
+ * NxN -> N highpart multiply
+ *
+ * TODO: expose this as a generic vector operation.
+ */
+
+void HELPER(gvec_smulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = ((int32_t)n[i] * m[i]) >> 8;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = ((int32_t)n[i] * m[i]) >> 16;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = ((int64_t)n[i] * m[i]) >> 32;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t discard;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ muls64(&discard, &d[i], n[i], m[i]);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = ((uint32_t)n[i] * m[i]) >> 8;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = ((uint32_t)n[i] * m[i]) >> 16;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = ((uint64_t)n[i] * m[i]) >> 32;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t discard;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ mulu64(&discard, &d[i], n[i], m[i]);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
--
2.25.1
- [PATCH v3 00/81] target/arm: Implement SVE2, Richard Henderson, 2020/09/18
- [PATCH v3 01/81] target/arm: Fix sve_uzp_p vs odd vector lengths, Richard Henderson, 2020/09/18
- [PATCH v3 02/81] target/arm: Fix sve_zip_p vs odd vector lengths, Richard Henderson, 2020/09/18
- [PATCH v3 03/81] target/arm: Fix sve_punpk_p vs odd vector lengths, Richard Henderson, 2020/09/18
- [PATCH v3 04/81] target/arm: Add ID_AA64ZFR0 fields and isar_feature_aa64_sve2, Richard Henderson, 2020/09/18
- [PATCH v3 06/81] target/arm: Implement SVE2 integer pairwise add and accumulate long, Richard Henderson, 2020/09/18
- [PATCH v3 05/81] target/arm: Implement SVE2 Integer Multiply - Unpredicated,
Richard Henderson <=
- [PATCH v3 07/81] target/arm: Implement SVE2 integer unary operations (predicated), Richard Henderson, 2020/09/18
- [PATCH v3 08/81] target/arm: Split out saturating/rounding shifts from neon, Richard Henderson, 2020/09/18
- [PATCH v3 11/81] target/arm: Implement SVE2 integer pairwise arithmetic, Richard Henderson, 2020/09/18
- [PATCH v3 09/81] target/arm: Implement SVE2 saturating/rounding bitwise shift left (predicated), Richard Henderson, 2020/09/18
- [PATCH v3 10/81] target/arm: Implement SVE2 integer halving add/subtract (predicated), Richard Henderson, 2020/09/18
- [PATCH v3 13/81] target/arm: Implement SVE2 integer add/subtract long, Richard Henderson, 2020/09/18
- [PATCH v3 14/81] target/arm: Implement SVE2 integer add/subtract interleaved long, Richard Henderson, 2020/09/18
- [PATCH v3 12/81] target/arm: Implement SVE2 saturating add/subtract (predicated), Richard Henderson, 2020/09/18
- [PATCH v3 17/81] target/arm: Implement PMULLB and PMULLT, Richard Henderson, 2020/09/18
- [PATCH v3 15/81] target/arm: Implement SVE2 integer add/subtract wide, Richard Henderson, 2020/09/18