[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 12/36] target/arm: Convert Neon 'load single structure to all lan
From: |
Peter Maydell |
Subject: |
[PATCH 12/36] target/arm: Convert Neon 'load single structure to all lanes' to decodetree |
Date: |
Thu, 30 Apr 2020 19:09:39 +0100 |
Convert the Neon "load single structure to all lanes" insns to
decodetree.
Signed-off-by: Peter Maydell <address@hidden>
---
target/arm/translate-neon.inc.c | 73 +++++++++++++++++++++++++++++++++
target/arm/translate.c | 55 +------------------------
target/arm/neon-ls.decode | 5 +++
3 files changed, 80 insertions(+), 53 deletions(-)
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 966c0d92012..e60e9559bad 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -398,3 +398,76 @@ static bool trans_VLDST_multiple(DisasContext *s,
arg_VLDST_multiple *a)
gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
return true;
}
+
+static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
+{
+ /* Neon load single structure to all lanes */
+ int reg, stride, vec_size;
+ int vd = a->vd;
+ int size = a->size;
+ int nregs = a->n + 1;
+ TCGv_i32 addr, tmp;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+
+ if (size == 3) {
+ if (nregs != 4 || a->a == 0) {
+ return false;
+ }
+ /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
+ size = 2;
+ }
+ if (nregs == 1 && a->a == 1 && size == 0) {
+ return false;
+ }
+ if (nregs == 3 && a->a == 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * VLD1 to all lanes: T bit indicates how many Dregs to write.
+ * VLD2/3/4 to all lanes: T bit indicates register stride.
+ */
+ stride = a->t ? 2 : 1;
+ vec_size = nregs == 1 ? stride * 8 : 8;
+
+ tmp = tcg_temp_new_i32();
+ addr = tcg_temp_new_i32();
+ load_reg_var(s, addr, a->rn);
+ for (reg = 0; reg < nregs; reg++) {
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
+ s->be_data | size);
+ if ((vd & 1) && vec_size == 16) {
+ /*
+ * We cannot write 16 bytes at once because the
+ * destination is unaligned.
+ */
+ tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
+ 8, 8, tmp);
+ tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
+ neon_reg_offset(vd, 0), 8, 8);
+ } else {
+ tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
+ vec_size, vec_size, tmp);
+ }
+ tcg_gen_addi_i32(addr, addr, 1 << size);
+ vd += stride;
+ }
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(addr);
+
+ gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
+
+ return true;
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 3f97635a524..a9cad04ba91 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3224,7 +3224,6 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t
insn)
int size;
int reg;
int load;
- int vec_size;
TCGv_i32 addr;
TCGv_i32 tmp;
@@ -3254,58 +3253,8 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t
insn)
} else {
size = (insn >> 10) & 3;
if (size == 3) {
- /* Load single element to all lanes. */
- int a = (insn >> 4) & 1;
- if (!load) {
- return 1;
- }
- size = (insn >> 6) & 3;
- nregs = ((insn >> 8) & 3) + 1;
-
- if (size == 3) {
- if (nregs != 4 || a == 0) {
- return 1;
- }
- /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment
*/
- size = 2;
- }
- if (nregs == 1 && a == 1 && size == 0) {
- return 1;
- }
- if (nregs == 3 && a == 1) {
- return 1;
- }
- addr = tcg_temp_new_i32();
- load_reg_var(s, addr, rn);
-
- /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
- * VLD2/3/4 to all lanes: bit 5 indicates register stride.
- */
- stride = (insn & (1 << 5)) ? 2 : 1;
- vec_size = nregs == 1 ? stride * 8 : 8;
-
- tmp = tcg_temp_new_i32();
- for (reg = 0; reg < nregs; reg++) {
- gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
- s->be_data | size);
- if ((rd & 1) && vec_size == 16) {
- /* We cannot write 16 bytes at once because the
- * destination is unaligned.
- */
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
- 8, 8, tmp);
- tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
- neon_reg_offset(rd, 0), 8, 8);
- } else {
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
- vec_size, vec_size, tmp);
- }
- tcg_gen_addi_i32(addr, addr, 1 << size);
- rd += stride;
- }
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(addr);
- stride = (1 << size) * nregs;
+ /* Load single element to all lanes -- handled by decodetree */
+ return 1;
} else {
/* Single element. */
int idx = (insn >> 4) & 0xf;
diff --git a/target/arm/neon-ls.decode b/target/arm/neon-ls.decode
index dd03d5a37bd..f0ab6d2c987 100644
--- a/target/arm/neon-ls.decode
+++ b/target/arm/neon-ls.decode
@@ -34,3 +34,8 @@
VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \
vd=%vd_dp
+
+# Neon load single element to all lanes
+
+VLD_all_lanes 1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \
+ vd=%vd_dp
--
2.20.1
- [PATCH 09/36] target/arm: Convert V[US]DOT (scalar) to decodetree, (continued)
- [PATCH 09/36] target/arm: Convert V[US]DOT (scalar) to decodetree, Peter Maydell, 2020/04/30
- [PATCH 08/36] target/arm: Convert VCMLA (scalar) to decodetree, Peter Maydell, 2020/04/30
- [PATCH 07/36] target/arm: Convert VFM[AS]L (vector) to decodetree, Peter Maydell, 2020/04/30
- [PATCH 10/36] target/arm: Convert VFM[AS]L (scalar) to decodetree, Peter Maydell, 2020/04/30
- [PATCH 11/36] target/arm: Convert Neon load/store multiple structures to decodetree, Peter Maydell, 2020/04/30
- [PATCH 12/36] target/arm: Convert Neon 'load single structure to all lanes' to decodetree,
Peter Maydell <=
- [PATCH 13/36] target/arm: Convert Neon 'load/store single structure' to decodetree, Peter Maydell, 2020/04/30
- [PATCH 15/36] target/arm: Convert Neon 3-reg-same logic ops to decodetree, Peter Maydell, 2020/04/30
- [PATCH 16/36] target/arm: Convert Neon 3-reg-same VMAX/VMIN to decodetree, Peter Maydell, 2020/04/30
- [PATCH 14/36] target/arm: Convert Neon 3-reg-same VADD/VSUB to decodetree, Peter Maydell, 2020/04/30
- [PATCH 19/36] target/arm: Convert Neon 3-reg-same VMUL, VMLA, VMLS, VSHL to decodetree, Peter Maydell, 2020/04/30