[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3 22/81] target/arm: Implement SVE2 integer absolute difference
From: |
Richard Henderson |
Subject: |
[PATCH v3 22/81] target/arm: Implement SVE2 integer absolute difference and accumulate long |
Date: |
Fri, 18 Sep 2020 11:36:52 -0700 |
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Fix select offsetting and argument order (laurent desnogues).
---
target/arm/helper-sve.h | 14 ++++++++++
target/arm/sve.decode | 12 +++++++++
target/arm/sve_helper.c | 23 ++++++++++++++++
target/arm/translate-sve.c | 55 ++++++++++++++++++++++++++++++++++++++
4 files changed, 104 insertions(+)
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index b399fb2576..d5dfd4edea 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -2402,3 +2402,17 @@ DEF_HELPER_FLAGS_4(sve2_sqcadd_b, TCG_CALL_NO_RWG, void,
ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2_sqcadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2_sqcadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2_sqcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sabal_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sabal_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sabal_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uabal_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uabal_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uabal_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 655cb5c12f..6cf09847a0 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -70,6 +70,7 @@
&rpr_s rd pg rn s
&rprr_s rd pg rn rm s
&rprr_esz rd pg rn rm esz
+&rrrr_esz rd ra rn rm esz
&rprrr_esz rd pg rn rm ra esz
&rpri_esz rd pg rn imm esz
&ptrue rd esz pat s
@@ -119,6 +120,10 @@
@rdn_i8s ........ esz:2 ...... ... imm:s8 rd:5 \
&rri_esz rn=%reg_movprfx
+# Four operand, vector element size
+@rda_rn_rm ........ esz:2 . rm:5 ... ... rn:5 rd:5 \
+ &rrrr_esz ra=%reg_movprfx
+
# Three operand with "memory" size, aka immediate left shift
@rd_rn_msz_rm ........ ... rm:5 .... imm:2 rn:5 rd:5 &rrri
@@ -1235,3 +1240,10 @@ CADD_rot90 01000101 .. 00000 0 11011 0 ..... .....
@rdn_rm
CADD_rot270 01000101 .. 00000 0 11011 1 ..... ..... @rdn_rm
SQCADD_rot90 01000101 .. 00000 1 11011 0 ..... ..... @rdn_rm
SQCADD_rot270 01000101 .. 00000 1 11011 1 ..... ..... @rdn_rm
+
+## SVE2 integer absolute difference and accumulate long
+
+SABALB 01000101 .. 0 ..... 1100 00 ..... ..... @rda_rn_rm
+SABALT 01000101 .. 0 ..... 1100 01 ..... ..... @rda_rn_rm
+UABALB 01000101 .. 0 ..... 1100 10 ..... ..... @rda_rn_rm
+UABALT 01000101 .. 0 ..... 1100 11 ..... ..... @rda_rn_rm
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index b8541168bf..cc8450c44e 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -1241,6 +1241,29 @@ DO_ZZZ_NTB(sve2_eoril_d, uint64_t, , DO_EOR)
#undef DO_ZZZ_NTB
+#define DO_ZZZW_ACC(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ intptr_t sel1 = simd_data(desc) * sizeof(TYPEN); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEN *)(vn + HN(i + sel1)); \
+ TYPEW mm = *(TYPEN *)(vm + HN(i + sel1)); \
+ TYPEW aa = *(TYPEW *)(va + HW(i)); \
+ *(TYPEW *)(vd + HW(i)) = OP(nn, mm) + aa; \
+ } \
+}
+
+DO_ZZZW_ACC(sve2_sabal_h, int16_t, int8_t, H1_2, H1, DO_ABD)
+DO_ZZZW_ACC(sve2_sabal_s, int32_t, int16_t, H1_4, H1_2, DO_ABD)
+DO_ZZZW_ACC(sve2_sabal_d, int64_t, int32_t, , H1_4, DO_ABD)
+
+DO_ZZZW_ACC(sve2_uabal_h, uint16_t, uint8_t, H1_2, H1, DO_ABD)
+DO_ZZZW_ACC(sve2_uabal_s, uint32_t, uint16_t, H1_4, H1_2, DO_ABD)
+DO_ZZZW_ACC(sve2_uabal_d, uint64_t, uint32_t, , H1_4, DO_ABD)
+
+#undef DO_ZZZW_ACC
+
#define DO_BITPERM(NAME, TYPE, OP) \
void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
{ \
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 95a81eb101..7b3720e8ef 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -163,6 +163,18 @@ static void gen_gvec_ool_zzz(DisasContext *s,
gen_helper_gvec_3 *fn,
vsz, vsz, data, fn);
}
+/* Invoke an out-of-line helper on 4 Zregs. */
+static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
+ int rd, int rn, int rm, int ra, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, ra),
+ vsz, vsz, data, fn);
+}
+
/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
int rd, int rn, int pg, int data)
@@ -6326,3 +6338,46 @@ static bool trans_SQCADD_rot270(DisasContext *s,
arg_rrr_esz *a)
{
return do_cadd(s, a, true, true);
}
+
+static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
+ gen_helper_gvec_4 *fn, int data)
+{
+ if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
+ }
+ return true;
+}
+
+static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
+{
+ static gen_helper_gvec_4 * const fns[2][4] = {
+ { NULL, gen_helper_sve2_sabal_h,
+ gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d },
+ { NULL, gen_helper_sve2_uabal_h,
+ gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d },
+ };
+ return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel);
+}
+
+static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
+{
+ return do_abal(s, a, false, false);
+}
+
+static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
+{
+ return do_abal(s, a, false, true);
+}
+
+static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
+{
+ return do_abal(s, a, true, false);
+}
+
+static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
+{
+ return do_abal(s, a, true, true);
+}
--
2.25.1
- [PATCH v3 13/81] target/arm: Implement SVE2 integer add/subtract long, (continued)
- [PATCH v3 13/81] target/arm: Implement SVE2 integer add/subtract long, Richard Henderson, 2020/09/18
- [PATCH v3 14/81] target/arm: Implement SVE2 integer add/subtract interleaved long, Richard Henderson, 2020/09/18
- [PATCH v3 12/81] target/arm: Implement SVE2 saturating add/subtract (predicated), Richard Henderson, 2020/09/18
- [PATCH v3 17/81] target/arm: Implement PMULLB and PMULLT, Richard Henderson, 2020/09/18
- [PATCH v3 15/81] target/arm: Implement SVE2 integer add/subtract wide, Richard Henderson, 2020/09/18
- [PATCH v3 18/81] target/arm: Implement SVE2 bitwise shift left long, Richard Henderson, 2020/09/18
- [PATCH v3 16/81] target/arm: Implement SVE2 integer multiply long, Richard Henderson, 2020/09/18
- [PATCH v3 19/81] target/arm: Implement SVE2 bitwise exclusive-or interleaved, Richard Henderson, 2020/09/18
- [PATCH v3 21/81] target/arm: Implement SVE2 complex integer add, Richard Henderson, 2020/09/18
- [PATCH v3 20/81] target/arm: Implement SVE2 bitwise permute, Richard Henderson, 2020/09/18
- [PATCH v3 22/81] target/arm: Implement SVE2 integer absolute difference and accumulate long,
Richard Henderson <=
- [PATCH v3 23/81] target/arm: Implement SVE2 integer add/subtract long with carry, Richard Henderson, 2020/09/18
- [PATCH v3 24/81] target/arm: Implement SVE2 bitwise shift right and accumulate, Richard Henderson, 2020/09/18
- [PATCH v3 25/81] target/arm: Implement SVE2 bitwise shift and insert, Richard Henderson, 2020/09/18
- [PATCH v3 26/81] target/arm: Implement SVE2 integer absolute difference and accumulate, Richard Henderson, 2020/09/18
- [PATCH v3 27/81] target/arm: Implement SVE2 saturating extract narrow, Richard Henderson, 2020/09/18
- [PATCH v3 28/81] target/arm: Implement SVE2 floating-point pairwise, Richard Henderson, 2020/09/18
- [PATCH v3 29/81] target/arm: Implement SVE2 SHRN, RSHRN, Richard Henderson, 2020/09/18
- [PATCH v3 30/81] target/arm: Implement SVE2 SQSHRUN, SQRSHRUN, Richard Henderson, 2020/09/18
- [PATCH v3 31/81] target/arm: Implement SVE2 UQSHRN, UQRSHRN, Richard Henderson, 2020/09/18
- [PATCH v3 33/81] target/arm: Implement SVE2 WHILEGT, WHILEGE, WHILEHI, WHILEHS, Richard Henderson, 2020/09/18