[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH for-4.1 v2 07/13] tcg: Add INDEX_op_dup_mem_vec
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH for-4.1 v2 07/13] tcg: Add INDEX_op_dup_mem_vec |
Date: |
Sun, 17 Mar 2019 02:08:28 -0700 |
Allow the backend to expand dup from memory directly, instead of
forcing the value into a temp first. This is especially important
if integer/vector register moves do not exist.
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/aarch64/tcg-target.h | 1 +
tcg/i386/tcg-target.h | 1 +
tcg/ppc/tcg-target.h | 1 +
tcg/tcg-op.h | 1 +
tcg/tcg-opc.h | 1 +
tcg/tcg.h | 1 +
tcg/tcg-op-gvec.c | 88 +++++++++++++++++++++-------------------
tcg/tcg-op-vec.c | 11 +++++
tcg/tcg.c | 2 +
9 files changed, 66 insertions(+), 41 deletions(-)
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 2d93cf404e..8ce99fc9c8 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -137,6 +137,7 @@ typedef enum {
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 1
#define TCG_TARGET_HAS_minmax_vec 1
+#define TCG_TARGET_HAS_dupm_vec 0
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 7995fe3eab..8e8d59f4f4 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -187,6 +187,7 @@ extern bool have_avx2;
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 1
#define TCG_TARGET_HAS_minmax_vec 1
+#define TCG_TARGET_HAS_dupm_vec 0
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
(((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 683eb807ae..5143ee853a 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -152,6 +152,7 @@ extern bool have_isa_3_00;
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 1
#define TCG_TARGET_HAS_minmax_vec 1
+#define TCG_TARGET_HAS_dupm_vec 0
void flush_icache_range(uintptr_t start, uintptr_t stop);
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index d3e51b15af..64cd3f58ef 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -950,6 +950,7 @@ void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv,
TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec, TCGv_i64);
+void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec, TCGv_ptr, tcg_target_long);
void tcg_gen_dup8i_vec(TCGv_vec, uint32_t);
void tcg_gen_dup16i_vec(TCGv_vec, uint32_t);
void tcg_gen_dup32i_vec(TCGv_vec, uint32_t);
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 4e0238ad1a..b8ad147377 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -211,6 +211,7 @@ DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
DEF(dupi_vec, 1, 0, 1, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
+DEF(dupm_vec, 1, 1, 1, TCG_OPF_VECTOR | IMPL(TCG_TARGET_HAS_dupm_vec))
DEF(dup_vec, 1, 1, 0, IMPLVEC)
DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 32b7cf3489..f7c12de75a 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -185,6 +185,7 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_mul_vec 0
#define TCG_TARGET_HAS_sat_vec 0
#define TCG_TARGET_HAS_minmax_vec 0
+#define TCG_TARGET_HAS_dupm_vec 0
#else
#define TCG_TARGET_MAYBE_vec 1
#endif
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 0996ef0812..59ab516bf0 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -390,6 +390,40 @@ static TCGType choose_vector_type(TCGOpcode op, unsigned
vece, uint32_t size,
return 0;
}
+static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_vec t_vec)
+{
+ uint32_t i = 0;
+
+ switch (type) {
+ case TCG_TYPE_V256:
+ /* Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ */
+ for (; i + 32 <= oprsz; i += 32) {
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256);
+ }
+ /* fallthru */
+ case TCG_TYPE_V128:
+ for (; i + 16 <= oprsz; i += 16) {
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128);
+ }
+ break;
+ case TCG_TYPE_V64:
+ for (; i < oprsz; i += 8) {
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
/* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C.
* Only one of IN_32 or IN_64 may be set;
* IN_C is used if IN_32 and IN_64 are unset.
@@ -429,49 +463,11 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t
oprsz,
} else if (in_64) {
tcg_gen_dup_i64_vec(vece, t_vec, in_64);
} else {
- switch (vece) {
- case MO_8:
- tcg_gen_dup8i_vec(t_vec, in_c);
- break;
- case MO_16:
- tcg_gen_dup16i_vec(t_vec, in_c);
- break;
- case MO_32:
- tcg_gen_dup32i_vec(t_vec, in_c);
- break;
- default:
- tcg_gen_dup64i_vec(t_vec, in_c);
- break;
- }
+ tcg_gen_dupi_vec(vece, t_vec, in_c);
}
-
- i = 0;
- switch (type) {
- case TCG_TYPE_V256:
- /* Recall that ARM SVE allows vector sizes that are not a
- * power of 2, but always a multiple of 16. The intent is
- * that e.g. size == 80 would be expanded with 2x32 + 1x16.
- */
- for (; i + 32 <= oprsz; i += 32) {
- tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256);
- }
- /* fallthru */
- case TCG_TYPE_V128:
- for (; i + 16 <= oprsz; i += 16) {
- tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128);
- }
- break;
- case TCG_TYPE_V64:
- for (; i < oprsz; i += 8) {
- tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
- }
- break;
- default:
- g_assert_not_reached();
- }
-
+ do_dup_store(type, dofs, oprsz, maxsz, t_vec);
tcg_temp_free_vec(t_vec);
- goto done;
+ return;
}
/* Otherwise, inline with an integer type, unless "large". */
@@ -1287,6 +1283,16 @@ void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs,
uint32_t oprsz,
void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz)
{
+ if (TCG_TARGET_HAS_dupm_vec) {
+ TCGType type = choose_vector_type(INDEX_op_dupm_vec, vece, oprsz, 0);
+ if (type != 0) {
+ TCGv_vec t_vec = tcg_temp_new_vec(type);
+ tcg_gen_dup_mem_vec(vece, t_vec, cpu_env, aofs);
+ do_dup_store(type, dofs, oprsz, maxsz, t_vec);
+ tcg_temp_free_vec(t_vec);
+ return;
+ }
+ }
if (vece <= MO_32) {
TCGv_i32 in = tcg_temp_new_i32();
switch (vece) {
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index cfb18682b1..ce7987b858 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -194,6 +194,17 @@ void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r,
TCGv_i32 a)
vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
}
+void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
+ tcg_target_long ofs)
+{
+ TCGArg ri = tcgv_vec_arg(r);
+ TCGArg bi = tcgv_ptr_arg(b);
+ TCGTemp *rt = arg_temp(ri);
+ TCGType type = rt->base_type;
+
+ vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
+}
+
static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
{
TCGArg ri = tcgv_vec_arg(r);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index b5389ea767..e0d771d610 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1623,6 +1623,8 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_smax_vec:
case INDEX_op_umax_vec:
return have_vec && TCG_TARGET_HAS_minmax_vec;
+ case INDEX_op_dupm_vec:
+ return have_vec && TCG_TARGET_HAS_dupm_vec;
default:
tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
--
2.17.2
- [Qemu-devel] [PATCH for-4.1 v2 00/13] tcg/ppc: Add vector opcodes, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 01/13] tcg: Assert fixed_reg is read-only, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 05/13] target/arm: Fill in .opc for cmtst_op, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 04/13] tcg: Allow add_vec, sub_vec, neg_vec, not_vec to be expanded, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 02/13] tcg: Return bool success from tcg_out_mov, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 03/13] tcg: Support cross-class moves without instruction support, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 08/13] tcg/ppc: Implement INDEX_op_dupm_vec, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 07/13] tcg: Add INDEX_op_dup_mem_vec,
Richard Henderson <=
- [Qemu-devel] [PATCH for-4.1 v2 09/13] tcg/ppc: Support vector shift by immediate, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 10/13] tcg/ppc: Support vector multiply, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 11/13] tcg/ppc: Update vector support to v2.06, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 06/13] tcg/ppc: Initial backend support for Altivec, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 12/13] tcg/ppc: Update vector support to v2.07, Richard Henderson, 2019/03/17
- [Qemu-devel] [PATCH for-4.1 v2 13/13] tcg/ppc: Update vector support to v3.00, Richard Henderson, 2019/03/17
- Re: [Qemu-devel] [PATCH for-4.1 v2 00/13] tcg/ppc: Add vector opcodes, no-reply, 2019/03/17