[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 38/57] target/arm: Implement MVE VQADD and VQSUB
From: |
Peter Maydell |
Subject: |
[PULL 38/57] target/arm: Implement MVE VQADD and VQSUB |
Date: |
Mon, 21 Jun 2021 17:28:14 +0100 |
Implement the MVE VQADD and VQSUB insns, which perform saturating
addition of a scalar to each element. Note that individual bytes of
each result element are used or discarded according to the predicate
mask, but FPSCR.QC is only set if the predicate mask for the lowest
byte of the element is set.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210617121628.20116-28-peter.maydell@linaro.org
---
target/arm/helper-mve.h | 16 ++++++++++
target/arm/mve.decode | 5 +++
target/arm/mve_helper.c | 62 ++++++++++++++++++++++++++++++++++++++
target/arm/translate-mve.c | 4 +++
4 files changed, 87 insertions(+)
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
index 1b807e1cf5f..092efdab475 100644
--- a/target/arm/helper-mve.h
+++ b/target/arm/helper-mve.h
@@ -173,6 +173,22 @@ DEF_HELPER_FLAGS_4(mve_vhsubu_scalarb, TCG_CALL_NO_WG,
void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vhsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
DEF_HELPER_FLAGS_4(mve_vhsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr,
i32)
+
DEF_HELPER_FLAGS_4(mve_vbrsrb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vbrsrh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vbrsrw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
index e189e2de648..c85227c675a 100644
--- a/target/arm/mve.decode
+++ b/target/arm/mve.decode
@@ -167,6 +167,11 @@ VHADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 100
.... @2scalar
VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
+
+VQADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
+VQADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
+VQSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
+VQSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
# Predicate operations
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index 558c6f5aa34..1d97bd2ae84 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -490,6 +490,33 @@ DO_2OP_U(vhaddu, do_vhadd_u)
DO_2OP_S(vhsubs, do_vhsub_s)
DO_2OP_U(vhsubu, do_vhsub_u)
+static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool
*s)
+{
+ if (val > max) {
+ *s = true;
+ return max;
+ } else if (val < min) {
+ *s = true;
+ return min;
+ }
+ return val;
+}
+
+#define DO_SQADD_B(n, m, s) do_sat_bhw((int64_t)n + m, INT8_MIN, INT8_MAX, s)
+#define DO_SQADD_H(n, m, s) do_sat_bhw((int64_t)n + m, INT16_MIN, INT16_MAX, s)
+#define DO_SQADD_W(n, m, s) do_sat_bhw((int64_t)n + m, INT32_MIN, INT32_MAX, s)
+
+#define DO_UQADD_B(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT8_MAX, s)
+#define DO_UQADD_H(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT16_MAX, s)
+#define DO_UQADD_W(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT32_MAX, s)
+
+#define DO_SQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, INT8_MIN, INT8_MAX, s)
+#define DO_SQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, INT16_MIN, INT16_MAX, s)
+#define DO_SQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, INT32_MIN, INT32_MAX, s)
+
+#define DO_UQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT8_MAX, s)
+#define DO_UQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT16_MAX, s)
+#define DO_UQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT32_MAX, s)
#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
@@ -505,6 +532,27 @@ DO_2OP_U(vhsubu, do_vhsub_u)
mve_advance_vpt(env); \
}
+#define DO_2OP_SAT_SCALAR(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
+ uint32_t rm) \
+ { \
+ TYPE *d = vd, *n = vn; \
+ TYPE m = rm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ bool qc = false; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ bool sat = false; \
+ mergemask(&d[H##ESIZE(e)], FN(n[H##ESIZE(e)], m, &sat), \
+ mask); \
+ qc |= sat & mask & 1; \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
/* provide unsigned 2-op scalar helpers for all sizes */
#define DO_2OP_SCALAR_U(OP, FN) \
DO_2OP_SCALAR(OP##b, 1, uint8_t, FN) \
@@ -523,6 +571,20 @@ DO_2OP_SCALAR_U(vhaddu_scalar, do_vhadd_u)
DO_2OP_SCALAR_S(vhsubs_scalar, do_vhsub_s)
DO_2OP_SCALAR_U(vhsubu_scalar, do_vhsub_u)
+DO_2OP_SAT_SCALAR(vqaddu_scalarb, 1, uint8_t, DO_UQADD_B)
+DO_2OP_SAT_SCALAR(vqaddu_scalarh, 2, uint16_t, DO_UQADD_H)
+DO_2OP_SAT_SCALAR(vqaddu_scalarw, 4, uint32_t, DO_UQADD_W)
+DO_2OP_SAT_SCALAR(vqadds_scalarb, 1, int8_t, DO_SQADD_B)
+DO_2OP_SAT_SCALAR(vqadds_scalarh, 2, int16_t, DO_SQADD_H)
+DO_2OP_SAT_SCALAR(vqadds_scalarw, 4, int32_t, DO_SQADD_W)
+
+DO_2OP_SAT_SCALAR(vqsubu_scalarb, 1, uint8_t, DO_UQSUB_B)
+DO_2OP_SAT_SCALAR(vqsubu_scalarh, 2, uint16_t, DO_UQSUB_H)
+DO_2OP_SAT_SCALAR(vqsubu_scalarw, 4, uint32_t, DO_UQSUB_W)
+DO_2OP_SAT_SCALAR(vqsubs_scalarb, 1, int8_t, DO_SQSUB_B)
+DO_2OP_SAT_SCALAR(vqsubs_scalarh, 2, int16_t, DO_SQSUB_H)
+DO_2OP_SAT_SCALAR(vqsubs_scalarw, 4, int32_t, DO_SQSUB_W)
+
static inline uint32_t do_vbrsrb(uint32_t n, uint32_t m)
{
m &= 0xff;
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index 7c4c06e434c..27c69d9c7dd 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -446,6 +446,10 @@ DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
+DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
+DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
+DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
+DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
DO_2OP_SCALAR(VBRSR, vbrsr)
static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
--
2.20.1
- [PULL 17/57] target/arm: Implement MVE VMVN (register), (continued)
- [PULL 17/57] target/arm: Implement MVE VMVN (register), Peter Maydell, 2021/06/21
- [PULL 20/57] tcg: Make gen_dup_i32/i64() public as tcg_gen_dup_i32/i64, Peter Maydell, 2021/06/21
- [PULL 12/57] target/arm: Implement MVE VLDR/VSTR (non-widening forms), Peter Maydell, 2021/06/21
- [PULL 19/57] target/arm: Implement MVE VNEG, Peter Maydell, 2021/06/21
- [PULL 21/57] target/arm: Implement MVE VDUP, Peter Maydell, 2021/06/21
- [PULL 26/57] target/arm: Implement MVE VMAX, VMIN, Peter Maydell, 2021/06/21
- [PULL 23/57] target/arm: Implement MVE VADD, VSUB, VMUL, Peter Maydell, 2021/06/21
- [PULL 30/57] target/arm: Implement MVE VMLALDAV, Peter Maydell, 2021/06/21
- [PULL 25/57] target/arm: Implement MVE VRMULH, Peter Maydell, 2021/06/21
- [PULL 37/57] target/arm: Implement MVE VPST, Peter Maydell, 2021/06/21
- [PULL 38/57] target/arm: Implement MVE VQADD and VQSUB,
Peter Maydell <=
- [PULL 34/57] target/arm: Implement MVE VSUB, VMUL (scalar), Peter Maydell, 2021/06/21
- [PULL 42/57] target/arm: Implement MVE VQADD, VQSUB (vector), Peter Maydell, 2021/06/21
- [PULL 24/57] target/arm: Implement MVE VMULH, Peter Maydell, 2021/06/21
- [PULL 14/57] target/arm: Implement MVE VCLZ, Peter Maydell, 2021/06/21
- [PULL 13/57] target/arm: Implement widening/narrowing MVE VLDR/VSTR insns, Peter Maydell, 2021/06/21
- [PULL 18/57] target/arm: Implement MVE VABS, Peter Maydell, 2021/06/21
- [PULL 22/57] target/arm: Implement MVE VAND, VBIC, VORR, VORN, VEOR, Peter Maydell, 2021/06/21
- [PULL 27/57] target/arm: Implement MVE VABD, Peter Maydell, 2021/06/21
- [PULL 28/57] target/arm: Implement MVE VHADD, VHSUB, Peter Maydell, 2021/06/21
- [PULL 29/57] target/arm: Implement MVE VMULL, Peter Maydell, 2021/06/21