[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 10/10] tcg/s390x: Implement ctpop operation
From: |
Richard Henderson |
Subject: |
[PATCH 10/10] tcg/s390x: Implement ctpop operation |
Date: |
Thu, 24 Feb 2022 05:43:33 -1000 |
There is an older form that produces per-byte results,
and a newer form that produces per-register results,
and a vector form that produces per-element results.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.h | 5 ++--
tcg/s390x/tcg-target.c.inc | 54 ++++++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+), 2 deletions(-)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 4aff59b7c0..42cb900c6d 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -62,6 +62,7 @@ typedef enum TCGReg {
#define FACILITY_LOAD_ON_COND 45
#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
+#define FACILITY_POPCOUNT FACILITY_LOAD_ON_COND
#define FACILITY_LOAD_ON_COND2 53
#define FACILITY_MISC_INSN_EXT2 58
#define FACILITY_MISC_INSN_EXT3 61
@@ -91,7 +92,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i32 0
#define TCG_TARGET_HAS_ctz_i32 HAVE_FACILITY(VECTOR)
-#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 HAVE_FACILITY(POPCOUNT)
#define TCG_TARGET_HAS_deposit_i32 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_extract_i32 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_sextract_i32 0
@@ -128,7 +129,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i64 HAVE_FACILITY(EXT_IMM)
#define TCG_TARGET_HAS_ctz_i64 HAVE_FACILITY(VECTOR)
-#define TCG_TARGET_HAS_ctpop_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 HAVE_FACILITY(POPCOUNT)
#define TCG_TARGET_HAS_deposit_i64 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_extract_i64 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_sextract_i64 0
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 9c3f8f365e..4b877c70fe 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -187,6 +187,7 @@ typedef enum S390Opcode {
RRE_SLBGR = 0xb989,
RRE_XGR = 0xb982,
+ RRFa_ALHHLR = 0xb9da,
RRFa_MGRK = 0xb9ec,
RRFa_MSRKC = 0xb9fd,
RRFa_MSGRKC = 0xb9ed,
@@ -215,6 +216,7 @@ typedef enum S390Opcode {
RRFc_LOCR = 0xb9f2,
RRFc_LOCGR = 0xb9e2,
+ RRFc_POPCNT = 0xb9e1,
RR_AR = 0x1a,
RR_ALR = 0x1e,
@@ -315,6 +317,7 @@ typedef enum S390Opcode {
VRRc_VO = 0xe76a,
VRRc_VOC = 0xe76f,
VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
+ VRRa_VPOPCT = 0xe750,
VRRc_VS = 0xe7f7,
VRRa_VUPH = 0xe7d7,
VRRa_VUPL = 0xe7d6,
@@ -1694,6 +1697,48 @@ static void tgen_ctz(TCGContext *s, TCGType type, TCGReg
dest,
tgen_movcond_int(s, type, dest, a2, a2const, src, cc, inv_cc);
}
+static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg a1)
+{
+ /* With MIE3, POPCNT can produce the complete result. */
+ if (HAVE_FACILITY(MISC_INSN_EXT3)) {
+ if (type == TCG_TYPE_I32) {
+ tgen_ext32u(s, dest, a1);
+ a1 = dest;
+ }
+ tcg_out_insn(s, RRFc, POPCNT, dest, a1, 8);
+ return;
+ }
+
+ /* Failing that, the vector facility can produce the complete result. */
+ if (HAVE_FACILITY(VECTOR)) {
+ tcg_out_mov(s, type, TCG_TMPV, a1);
+ tcg_out_insn(s, VRRa, VPOPCT, TCG_TMPV, TCG_TMPV,
+ type == TCG_TYPE_I32 ? MO_32 : MO_64);
+ tcg_out_mov(s, type, dest, TCG_TMPV);
+ return;
+ }
+
+ /*
+ * Failing that, POPCNT produces one byte per byte.
+ * Fold to intermediate results to produce the final value.
+ */
+ tcg_out_insn(s, RRFc, POPCNT, dest, a1, 0);
+ if (type == TCG_TYPE_I32) {
+ tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, dest, TCG_REG_NONE, 16);
+ tcg_out_insn(s, RR, ALR, dest, TCG_TMP0);
+ tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, dest, TCG_REG_NONE, 8);
+ tcg_out_insn(s, RR, ALR, dest, TCG_TMP0);
+ tgen_ext8u(s, TCG_TYPE_I32, dest, dest);
+ } else {
+ tcg_out_insn(s, RRFa, ALHHLR, dest, dest, dest);
+ tcg_out_sh64(s, RSY_SLLG, TCG_TMP0, dest, TCG_REG_NONE, 16);
+ tcg_out_insn(s, RRE, ALGR, dest, TCG_TMP0);
+ tcg_out_sh64(s, RSY_SLLG, TCG_TMP0, dest, TCG_REG_NONE, 8);
+ tcg_out_insn(s, RRE, ALGR, dest, TCG_TMP0);
+ tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
+ }
+}
+
static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
int ofs, int len, int z)
{
@@ -2858,6 +2903,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
tgen_ctz(s, TCG_TYPE_I64, args[0], args[1], args[2], const_args[2]);
break;
+ case INDEX_op_ctpop_i32:
+ tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+ case INDEX_op_ctpop_i64:
+ tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+
case INDEX_op_mb:
/* The host memory model is quite strong, we simply need to
serialize the instruction stream. */
@@ -3416,6 +3468,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode
op)
case INDEX_op_extu_i32_i64:
case INDEX_op_extract_i32:
case INDEX_op_extract_i64:
+ case INDEX_op_ctpop_i32:
+ case INDEX_op_ctpop_i64:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_i32:
--
2.25.1
- [PATCH 00/10] tcg/s390x: updates for mie2 and mie3, Richard Henderson, 2022/02/24
- [PATCH 01/10] tcg/s390x: Distinguish RRF-a and RRF-c formats, Richard Henderson, 2022/02/24
- [PATCH 02/10] tcg/s390x: Distinguish RIE formats, Richard Henderson, 2022/02/24
- [PATCH 03/10] tcg/s390x: Support MIE2 multiply single instructions, Richard Henderson, 2022/02/24
- [PATCH 06/10] tcg/s390x: Create tgen_cmp2 to simplify movcond, Richard Henderson, 2022/02/24
- [PATCH 05/10] tcg/s390x: Support MIE3 logical operations, Richard Henderson, 2022/02/24
- [PATCH 04/10] tcg/s390x: Support MIE2 MGRK instruction, Richard Henderson, 2022/02/24
- [PATCH 08/10] tcg/s390x: Use tgen_movcond_int in tgen_clz, Richard Henderson, 2022/02/24
- [PATCH 07/10] tcg/s390x: Support SELGR instruction in MOVCOND, Richard Henderson, 2022/02/24
- [PATCH 09/10] tcg/s390x: Use vector ctz for integer ctz, Richard Henderson, 2022/02/24
- [PATCH 10/10] tcg/s390x: Implement ctpop operation,
Richard Henderson <=