[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 26/67] target/arm: Convert FABD to decodetree
From: |
Richard Henderson |
Subject: |
[PATCH v2 26/67] target/arm: Convert FABD to decodetree |
Date: |
Fri, 24 May 2024 16:20:40 -0700 |
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 1 +
target/arm/tcg/a64.decode | 6 ++++
target/arm/tcg/translate-a64.c | 60 ++++++++++++++++++++++------------
target/arm/tcg/vec_helper.c | 6 ++++
4 files changed, 53 insertions(+), 20 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 8d076011c1..ff6e3094f4 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -724,6 +724,7 @@ DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr,
ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 7fc3277be6..a852b5f06f 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -728,6 +728,9 @@ FACGE_s 0111 1110 0.1 ..... 11101 1 ..... .....
@rrr_sd
FACGT_s 0111 1110 110 ..... 00101 1 ..... ..... @rrr_h
FACGT_s 0111 1110 1.1 ..... 11101 1 ..... ..... @rrr_sd
+FABD_s 0111 1110 110 ..... 00010 1 ..... ..... @rrr_h
+FABD_s 0111 1110 1.1 ..... 11010 1 ..... ..... @rrr_sd
+
### Advanced SIMD three same
FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
@@ -778,6 +781,9 @@ FACGE_v 0.10 1110 0.1 ..... 11101 1 ..... .....
@qrrr_sd
FACGT_v 0.10 1110 110 ..... 00101 1 ..... ..... @qrrr_h
FACGT_v 0.10 1110 1.1 ..... 11101 1 ..... ..... @qrrr_sd
+FABD_v 0.10 1110 110 ..... 00010 1 ..... ..... @qrrr_h
+FABD_v 0.10 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd
+
### Advanced SIMD scalar x indexed element
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 75b0c1a005..633384d2a5 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5010,6 +5010,31 @@ static const FPScalar f_scalar_facgt = {
};
TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
+static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subh(d, n, m, s);
+ gen_vfp_absh(d, d);
+}
+
+static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subs(d, n, m, s);
+ gen_vfp_abss(d, d);
+}
+
+static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subd(d, n, m, s);
+ gen_vfp_absd(d, d);
+}
+
+static const FPScalar f_scalar_fabd = {
+ gen_fabd_h,
+ gen_fabd_s,
+ gen_fabd_d,
+};
+TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
+
static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
gen_helper_gvec_3_ptr * const fns[3])
{
@@ -5150,6 +5175,13 @@ static gen_helper_gvec_3_ptr * const f_vector_facgt[3] =
{
};
TRANS(FACGT_v, do_fp3_vector, a, f_vector_facgt)
+static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
+ gen_helper_gvec_fabd_h,
+ gen_helper_gvec_fabd_s,
+ gen_helper_gvec_fabd_d,
+};
+TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd)
+
/*
* Advanced SIMD scalar/vector x indexed element
*/
@@ -9303,10 +9335,6 @@ static void handle_3same_float(DisasContext *s, int
size, int elements,
case 0x3f: /* FRSQRTS */
gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
break;
- case 0x7a: /* FABD */
- gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
- gen_vfp_absd(tcg_res, tcg_res);
- break;
default:
case 0x18: /* FMAXNM */
case 0x19: /* FMLA */
@@ -9322,6 +9350,7 @@ static void handle_3same_float(DisasContext *s, int size,
int elements,
case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
case 0x5f: /* FDIV */
+ case 0x7a: /* FABD */
case 0x7c: /* FCMGT */
case 0x7d: /* FACGT */
g_assert_not_reached();
@@ -9344,10 +9373,6 @@ static void handle_3same_float(DisasContext *s, int
size, int elements,
case 0x3f: /* FRSQRTS */
gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
break;
- case 0x7a: /* FABD */
- gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
- gen_vfp_abss(tcg_res, tcg_res);
- break;
default:
case 0x18: /* FMAXNM */
case 0x19: /* FMLA */
@@ -9363,6 +9388,7 @@ static void handle_3same_float(DisasContext *s, int size,
int elements,
case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
case 0x5f: /* FDIV */
+ case 0x7a: /* FABD */
case 0x7c: /* FCMGT */
case 0x7d: /* FACGT */
g_assert_not_reached();
@@ -9405,7 +9431,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext
*s, uint32_t insn)
switch (fpopcode) {
case 0x1f: /* FRECPS */
case 0x3f: /* FRSQRTS */
- case 0x7a: /* FABD */
break;
default:
case 0x1b: /* FMULX */
@@ -9413,6 +9438,7 @@ static void disas_simd_scalar_three_reg_same(DisasContext
*s, uint32_t insn)
case 0x7d: /* FACGT */
case 0x1c: /* FCMEQ */
case 0x5c: /* FCMGE */
+ case 0x7a: /* FABD */
case 0x7c: /* FCMGT */
unallocated_encoding(s);
return;
@@ -9568,13 +9594,13 @@ static void
disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
switch (fpopcode) {
case 0x07: /* FRECPS */
case 0x0f: /* FRSQRTS */
- case 0x1a: /* FABD */
break;
default:
case 0x03: /* FMULX */
case 0x04: /* FCMEQ (reg) */
case 0x14: /* FCMGE (reg) */
case 0x15: /* FACGE */
+ case 0x1a: /* FABD */
case 0x1c: /* FCMGT (reg) */
case 0x1d: /* FACGT */
unallocated_encoding(s);
@@ -9602,15 +9628,12 @@ static void
disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
case 0x0f: /* FRSQRTS */
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
break;
- case 0x1a: /* FABD */
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
- tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
- break;
default:
case 0x03: /* FMULX */
case 0x04: /* FCMEQ (reg) */
case 0x14: /* FCMGE (reg) */
case 0x15: /* FACGE */
+ case 0x1a: /* FABD */
case 0x1c: /* FCMGT (reg) */
case 0x1d: /* FACGT */
g_assert_not_reached();
@@ -11272,7 +11295,6 @@ static void disas_simd_3same_float(DisasContext *s,
uint32_t insn)
return;
case 0x1f: /* FRECPS */
case 0x3f: /* FRSQRTS */
- case 0x7a: /* FABD */
if (!fp_access_check(s)) {
return;
}
@@ -11314,6 +11336,7 @@ static void disas_simd_3same_float(DisasContext *s,
uint32_t insn)
case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
case 0x5f: /* FDIV */
+ case 0x7a: /* FABD */
case 0x7d: /* FACGT */
case 0x7c: /* FCMGT */
unallocated_encoding(s);
@@ -11659,7 +11682,6 @@ static void disas_simd_three_reg_same_fp16(DisasContext
*s, uint32_t insn)
switch (fpopcode) {
case 0x7: /* FRECPS */
case 0xf: /* FRSQRTS */
- case 0x1a: /* FABD */
pairwise = false;
break;
case 0x10: /* FMAXNMP */
@@ -11684,6 +11706,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext
*s, uint32_t insn)
case 0x14: /* FCMGE */
case 0x15: /* FACGE */
case 0x17: /* FDIV */
+ case 0x1a: /* FABD */
case 0x1c: /* FCMGT */
case 0x1d: /* FACGT */
unallocated_encoding(s);
@@ -11757,10 +11780,6 @@ static void
disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
case 0xf: /* FRSQRTS */
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
break;
- case 0x1a: /* FABD */
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
- tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
- break;
default:
case 0x0: /* FMAXNM */
case 0x1: /* FMLA */
@@ -11776,6 +11795,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext
*s, uint32_t insn)
case 0x14: /* FCMGE */
case 0x15: /* FACGE */
case 0x17: /* FDIV */
+ case 0x1a: /* FABD */
case 0x1c: /* FCMGT */
case 0x1d: /* FACGT */
g_assert_not_reached();
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index dabefa3526..e9d7922f30 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -1154,6 +1154,11 @@ static float32 float32_abd(float32 op1, float32 op2,
float_status *stat)
return float32_abs(float32_sub(op1, op2, stat));
}
+static float64 float64_abd(float64 op1, float64 op2, float_status *stat)
+{
+ return float64_abs(float64_sub(op1, op2, stat));
+}
+
/*
* Reciprocal step. These are the AArch32 version which uses a
* non-fused multiply-and-subtract.
@@ -1238,6 +1243,7 @@ DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
DO_3OP(gvec_fabd_h, float16_abd, float16)
DO_3OP(gvec_fabd_s, float32_abd, float32)
+DO_3OP(gvec_fabd_d, float64_abd, float64)
DO_3OP(gvec_fceq_h, float16_ceq, float16)
DO_3OP(gvec_fceq_s, float32_ceq, float32)
--
2.34.1
- [PATCH v2 16/67] target/arm: Convert XAR to decodetree, (continued)
- [PATCH v2 16/67] target/arm: Convert XAR to decodetree, Richard Henderson, 2024/05/24
- [PATCH v2 14/67] target/arm: Convert Cryptographic 4-register to decodetree, Richard Henderson, 2024/05/24
- [PATCH v2 17/67] target/arm: Convert Advanced SIMD copy to decodetree, Richard Henderson, 2024/05/24
- [PATCH v2 21/67] target/arm: Introduce vfp_load_reg16, Richard Henderson, 2024/05/24
- [PATCH v2 23/67] target/arm: Convert FNMUL to decodetree, Richard Henderson, 2024/05/24
- [PATCH v2 24/67] target/arm: Convert FMLA, FMLS to decodetree, Richard Henderson, 2024/05/24
- [PATCH v2 20/67] target/arm: Convert FMAX, FMIN, FMAXNM, FMINNM to decodetree, Richard Henderson, 2024/05/24
- [PATCH v2 19/67] target/arm: Convert FADD, FSUB, FDIV, FMUL to decodetree, Richard Henderson, 2024/05/24
- [PATCH v2 18/67] target/arm: Convert FMULX to decodetree, Richard Henderson, 2024/05/24
- [PATCH v2 26/67] target/arm: Convert FABD to decodetree,
Richard Henderson <=
- [PATCH v2 27/67] target/arm: Convert FRECPS, FRSQRTS to decodetree, Richard Henderson, 2024/05/24
- [PATCH v2 31/67] target/arm: Convert ADDP to decodetree, Richard Henderson, 2024/05/24
- [PATCH v2 32/67] target/arm: Use gvec for neon padd, Richard Henderson, 2024/05/24
- [PATCH v2 36/67] target/arm: Convert disas_simd_3same_logic to decodetree, Richard Henderson, 2024/05/24
- [PATCH v2 34/67] target/arm: Use gvec for neon pmax, pmin, Richard Henderson, 2024/05/24
- [PATCH v2 38/67] target/arm: Convert SUQADD and USQADD to gvec, Richard Henderson, 2024/05/24
- [PATCH v2 37/67] target/arm: Improve vector UQADD, UQSUB, SQADD, SQSUB, Richard Henderson, 2024/05/24