[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 28/36] tcg: Implement gvec support for rotate by immediate
From: |
Richard Henderson |
Subject: |
[PATCH v2 28/36] tcg: Implement gvec support for rotate by immediate |
Date: |
Tue, 21 Apr 2020 18:17:14 -0700 |
No host backend support yet, but the interfaces for rotli
are in place. Canonicalize immediate rotate to the left,
based on a survey of architectures, but provide both left
and right shift interfaces to the translators.
Signed-off-by: Richard Henderson <address@hidden>
---
accel/tcg/tcg-runtime.h | 5 +++
include/tcg/tcg-op-gvec.h | 6 ++++
include/tcg/tcg-op.h | 2 ++
include/tcg/tcg-opc.h | 1 +
include/tcg/tcg.h | 1 +
tcg/aarch64/tcg-target.h | 1 +
tcg/i386/tcg-target.h | 1 +
tcg/ppc/tcg-target.h | 1 +
accel/tcg/tcg-runtime-gvec.c | 48 +++++++++++++++++++++++++
tcg/tcg-op-gvec.c | 68 ++++++++++++++++++++++++++++++++++++
tcg/tcg-op-vec.c | 12 +++++++
tcg/tcg.c | 2 ++
tcg/README | 3 +-
13 files changed, 150 insertions(+), 1 deletion(-)
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 4fa61b49b4..cf10c8361e 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -259,6 +259,11 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void,
ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rotl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rotl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rotl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rotl64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index cea6497341..1afc3ebf03 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -334,6 +334,10 @@ void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs,
uint32_t aofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
@@ -388,5 +392,7 @@ void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
+void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
#endif
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
index b07bf7b524..c624e371d5 100644
--- a/include/tcg/tcg-op.h
+++ b/include/tcg/tcg-op.h
@@ -986,6 +986,8 @@ void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec
a, TCGv_vec b);
void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index 4a9cbf5426..c46c096c3e 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -245,6 +245,7 @@ DEF(not_vec, 1, 1, 0, IMPLVEC |
IMPL(TCG_TARGET_HAS_not_vec))
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
+DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index f72530dfda..d2034d9334 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -182,6 +182,7 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_andc_vec 0
#define TCG_TARGET_HAS_orc_vec 0
+#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_shi_vec 0
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 0
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index ca214f6909..225a597f84 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -133,6 +133,7 @@ typedef enum {
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 1
+#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_shi_vec 1
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 1
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index bfb3f5f6e9..23aabde992 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -183,6 +183,7 @@ extern bool have_avx2;
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_neg_vec 0
#define TCG_TARGET_HAS_abs_vec 1
+#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_shi_vec 1
#define TCG_TARGET_HAS_shs_vec 1
#define TCG_TARGET_HAS_shv_vec have_avx2
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 4fa21f0e71..e57b891aa5 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -161,6 +161,7 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
#define TCG_TARGET_HAS_abs_vec 0
+#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_shi_vec 0
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 1
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index ca449702e6..34b1030365 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -716,6 +716,54 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}
+void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
+ }
+ clear_high(d, oprsz, desc);
+}
+
void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 049a55e700..25300b1577 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -2694,6 +2694,74 @@ void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs,
uint32_t aofs,
}
}
+void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_8, 0xff << c);
+
+ tcg_gen_shli_i64(d, a, c);
+ tcg_gen_shri_i64(a, a, 8 - c);
+ tcg_gen_andi_i64(d, d, mask);
+ tcg_gen_andi_i64(a, a, ~mask);
+ tcg_gen_or_i64(d, d, a);
+}
+
+void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff << c);
+
+ tcg_gen_shli_i64(d, a, c);
+ tcg_gen_shri_i64(a, a, 16 - c);
+ tcg_gen_andi_i64(d, d, mask);
+ tcg_gen_andi_i64(a, a, ~mask);
+ tcg_gen_or_i64(d, d, a);
+}
+
+void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
+ static const GVecGen2i g[4] = {
+ { .fni8 = tcg_gen_vec_rotl8i_i64,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl8i,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_rotl16i_i64,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl16i,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_rotli_i32,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl32i,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_rotli_i64,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl64i,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
+ }
+}
+
+void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ tcg_gen_gvec_rotli(vece, dofs, aofs, -shift & ((8 << vece) - 1),
+ oprsz, maxsz);
+}
+
/*
* Specialized generation vector shifts by a non-constant scalar.
*/
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index a9c16d85c5..845cb3de2e 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -546,6 +546,18 @@ void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec
a, int64_t i)
do_shifti(INDEX_op_sari_vec, vece, r, a, i);
}
+void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
+}
+
+void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ int bits = 8 << vece;
+ tcg_debug_assert(i >= 0 && i < bits);
+ do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
+}
+
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
diff --git a/tcg/tcg.c b/tcg/tcg.c
index d712d19842..71409073bb 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1697,6 +1697,8 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
return have_vec && TCG_TARGET_HAS_shv_vec;
+ case INDEX_op_rotli_vec:
+ return have_vec && TCG_TARGET_HAS_roti_vec;
case INDEX_op_ssadd_vec:
case INDEX_op_usadd_vec:
case INDEX_op_sssub_vec:
diff --git a/tcg/README b/tcg/README
index bfa2e4ed24..1e3e4654f4 100644
--- a/tcg/README
+++ b/tcg/README
@@ -605,10 +605,11 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 ->
i32.
* shri_vec v0, v1, i2
* sari_vec v0, v1, i2
+* rotli_vec v0, v1, i2
* shrs_vec v0, v1, s2
* sars_vec v0, v1, s2
- Similarly for logical and arithmetic right shift.
+ Similarly for logical and arithmetic right shift, and left rotate.
* shlv_vec v0, v1, v2
--
2.20.1
- Re: [PATCH v2 23/36] tcg: Add tcg_reg_alloc_dup2, (continued)
- [PATCH v2 24/36] tcg/i386: Use tcg_constant_vec with tcg vec expanders, Richard Henderson, 2020/04/21
- [PATCH v2 25/36] tcg: Remove tcg_gen_dup{8,16,32,64}i_vec, Richard Henderson, 2020/04/21
- [PATCH v2 26/36] tcg: Add load_dest parameter to GVecGen2, Richard Henderson, 2020/04/21
- [PATCH v2 27/36] tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32, 64}, Richard Henderson, 2020/04/21
- [PATCH v2 28/36] tcg: Implement gvec support for rotate by immediate,
Richard Henderson <=
- [PATCH v2 30/36] tcg: Remove expansion to shift by vector from do_shifts, Richard Henderson, 2020/04/21
- [PATCH v2 34/36] tcg/ppc: Implement INDEX_op_rot[lr]v_vec, Richard Henderson, 2020/04/21
- [PATCH v2 29/36] tcg: Implement gvec support for rotate by vector, Richard Henderson, 2020/04/21
- [PATCH v2 32/36] tcg/i386: Implement INDEX_op_rotl[is]_vec, Richard Henderson, 2020/04/21
- [PATCH v2 33/36] tcg/aarch64: Implement INDEX_op_rotli_vec, Richard Henderson, 2020/04/21
- [PATCH v2 36/36] target/s390x: Use tcg_gen_gvec_rotl{i,s,v}, Richard Henderson, 2020/04/21
- [PATCH v2 31/36] tcg: Implement gvec support for rotate by scalar, Richard Henderson, 2020/04/21