[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v6 19/42] target/arm: Implement the LDGM, STGM, STZGM instruction
From: |
Richard Henderson |
Subject: |
[PATCH v6 19/42] target/arm: Implement the LDGM, STGM, STZGM instructions |
Date: |
Thu, 12 Mar 2020 12:41:56 -0700 |
Signed-off-by: Richard Henderson <address@hidden>
---
v3: Require pre-cleaned addresses.
v6: Check full mte enabled. Reorg the helpers.
---
target/arm/helper-a64.h | 3 ++
target/arm/translate.h | 2 +
target/arm/mte_helper.c | 84 ++++++++++++++++++++++++++++++++++++++
target/arm/translate-a64.c | 74 +++++++++++++++++++++++++++++----
4 files changed, 154 insertions(+), 9 deletions(-)
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 2fa61b86fa..7b628d100e 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -113,3 +113,6 @@ DEF_HELPER_FLAGS_2(stg_stub, TCG_CALL_NO_WG, void, env, i64)
DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64)
DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64)
+DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64)
diff --git a/target/arm/translate.h b/target/arm/translate.h
index e0f5d0be63..5552ee5a94 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -91,6 +91,8 @@ typedef struct DisasContext {
* < 0, set by the current instruction.
*/
int8_t btype;
+ /* A copy of cpu->dcz_blocksize. */
+ uint8_t dcz_blocksize;
/* True if this page is guarded. */
bool guarded_page;
/* Bottom two bits of XScale c15_cpar coprocessor access control reg */
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 7ec7930dfc..27d4b4536c 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -274,3 +274,87 @@ void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr)
probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra);
}
}
+
+#define LDGM_STGM_SIZE (4 << GMID_EL1_BS)
+
+uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr)
+{
+ int mmu_idx = cpu_mmu_index(env, false);
+ uintptr_t ra = GETPC();
+ void *tag_mem;
+
+ ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE);
+
+ /* Trap if accessing an invalid page. */
+ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD,
+ LDGM_STGM_SIZE, MMU_DATA_LOAD,
+ LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra);
+
+ /* The tag is squashed to zero if the page does not support tags. */
+ if (!tag_mem) {
+ return 0;
+ }
+
+ QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6);
+ /*
+ * We are loading 64-bits worth of tags. The ordering of elements
+ * within the word corresponds to a 64-bit little-endian operation.
+ */
+ return ldq_le_p(tag_mem);
+}
+
+void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+ int mmu_idx = cpu_mmu_index(env, false);
+ uintptr_t ra = GETPC();
+ void *tag_mem;
+
+ ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE);
+
+ /* Trap if accessing an invalid page. */
+ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
+ LDGM_STGM_SIZE, MMU_DATA_LOAD,
+ LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra);
+
+ /*
+ * Tag store only happens if the page support tags,
+ * and if the OS has enabled access to the tags.
+ */
+ if (!tag_mem) {
+ return;
+ }
+
+ QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6);
+ /*
+ * We are storing 64-bits worth of tags. The ordering of elements
+ * within the word corresponds to a 64-bit little-endian operation.
+ */
+ stq_le_p(tag_mem, val);
+}
+
+void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+ uintptr_t ra = GETPC();
+ int mmu_idx = cpu_mmu_index(env, false);
+ int log2_dcz_bytes, log2_tag_bytes;
+ intptr_t dcz_bytes, tag_bytes;
+ uint8_t *mem;
+
+ /*
+ * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1,
+ * i.e. 32 bytes, which is an unreasonably small dcz anyway,
+ * to make sure that we can access one complete tag byte here.
+ */
+ log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2;
+ log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1);
+ dcz_bytes = (intptr_t)1 << log2_dcz_bytes;
+ tag_bytes = (intptr_t)1 << log2_tag_bytes;
+ ptr &= -dcz_bytes;
+
+ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes,
+ MMU_DATA_STORE, tag_bytes, ra);
+ if (mem) {
+ int tag_pair = (val & 0xf) * 0x11;
+ memset(mem, tag_pair, tag_bytes);
+ }
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 048140ddc0..f010aa2b58 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3781,7 +3781,7 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
int op2 = extract32(insn, 10, 2);
int op1 = extract32(insn, 22, 2);
- bool is_load = false, is_pair = false, is_zero = false;
+ bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
int index = 0;
TCGv_i64 addr, clean_addr, tcg_rt;
@@ -3797,13 +3797,18 @@ static void disas_ldst_tag(DisasContext *s, uint32_t
insn)
* > 0 : pre-index, writeback
*/
switch (op1) {
- case 0: /* STG */
+ case 0:
if (op2 != 0) {
/* STG */
index = op2 - 2;
- break;
+ } else {
+ /* STZGM */
+ if (s->current_el == 0 || offset != 0) {
+ goto do_unallocated;
+ }
+ is_mult = is_zero = true;
}
- goto do_unallocated;
+ break;
case 1:
if (op2 != 0) {
/* STZG */
@@ -3819,17 +3824,27 @@ static void disas_ldst_tag(DisasContext *s, uint32_t
insn)
/* ST2G */
is_pair = true;
index = op2 - 2;
- break;
+ } else {
+ /* STGM */
+ if (s->current_el == 0 || offset != 0) {
+ goto do_unallocated;
+ }
+ is_mult = true;
}
- goto do_unallocated;
+ break;
case 3:
if (op2 != 0) {
/* STZ2G */
is_pair = is_zero = true;
index = op2 - 2;
- break;
+ } else {
+ /* LDGM */
+ if (s->current_el == 0 || offset != 0) {
+ goto do_unallocated;
+ }
+ is_mult = is_load = true;
}
- goto do_unallocated;
+ break;
default:
do_unallocated:
@@ -3837,7 +3852,9 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
return;
}
- if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
+ if (is_mult
+ ? !dc_isar_feature(aa64_mte, s)
+ : !dc_isar_feature(aa64_mte_insn_reg, s)) {
goto do_unallocated;
}
@@ -3851,6 +3868,44 @@ static void disas_ldst_tag(DisasContext *s, uint32_t
insn)
tcg_gen_addi_i64(addr, addr, offset);
}
+ if (is_mult) {
+ tcg_rt = cpu_reg(s, rt);
+
+ if (is_zero) {
+ int size = 4 << s->dcz_blocksize;
+
+ if (s->ata) {
+ gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
+ }
+ /*
+ * The non-tags portion of STZGM is mostly like DC_ZVA,
+ * except the alignment happens before the access.
+ */
+ clean_addr = clean_data_tbi(s, addr);
+ tcg_gen_andi_i64(clean_addr, clean_addr, -size);
+ gen_helper_dc_zva(cpu_env, clean_addr);
+ } else if (s->ata) {
+ if (is_load) {
+ gen_helper_ldgm(tcg_rt, cpu_env, addr);
+ } else {
+ gen_helper_stgm(cpu_env, addr, tcg_rt);
+ }
+ } else {
+ MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
+ int size = 4 << GMID_EL1_BS;
+
+ clean_addr = clean_data_tbi(s, addr);
+ tcg_gen_andi_i64(clean_addr, clean_addr, -size);
+ gen_probe_access(s, clean_addr, acc, size);
+
+ if (is_load) {
+ /* The result tags are zeros. */
+ tcg_gen_movi_i64(tcg_rt, 0);
+ }
+ }
+ return;
+ }
+
if (is_load) {
tcg_rt = cpu_reg(s, rt);
if (s->ata) {
@@ -14623,6 +14678,7 @@ static void
aarch64_tr_init_disas_context(DisasContextBase *dcbase,
dc->vec_stride = 0;
dc->cp_regs = arm_cpu->cp_regs;
dc->features = env->features;
+ dc->dcz_blocksize = arm_cpu->dcz_blocksize;
/* Single step state. The code-generation logic here is:
* SS_ACTIVE == 0:
--
2.20.1
- [PATCH v6 10/42] target/arm: Implement the ADDG, SUBG instructions, (continued)
- [PATCH v6 10/42] target/arm: Implement the ADDG, SUBG instructions, Richard Henderson, 2020/03/12
- [PATCH v6 14/42] target/arm: Add helper_probe_access, Richard Henderson, 2020/03/12
- [PATCH v6 13/42] target/arm: Define arm_cpu_do_unaligned_access for user-only, Richard Henderson, 2020/03/12
- [PATCH v6 16/42] target/arm: Implement the STGP instruction, Richard Henderson, 2020/03/12
- [PATCH v6 15/42] target/arm: Implement LDG, STG, ST2G instructions, Richard Henderson, 2020/03/12
- [PATCH v6 17/42] target/arm: Restrict the values of DCZID.BS under TCG, Richard Henderson, 2020/03/12
- [PATCH v6 20/42] target/arm: Implement the access tag cache flushes, Richard Henderson, 2020/03/12
- [PATCH v6 21/42] target/arm: Move regime_el to internals.h, Richard Henderson, 2020/03/12
- [PATCH v6 18/42] target/arm: Simplify DC_ZVA, Richard Henderson, 2020/03/12
- [PATCH v6 22/42] target/arm: Move regime_tcr to internals.h, Richard Henderson, 2020/03/12
- [PATCH v6 19/42] target/arm: Implement the LDGM, STGM, STZGM instructions,
Richard Henderson <=
- [PATCH v6 23/42] target/arm: Add gen_mte_check1, Richard Henderson, 2020/03/12
- [PATCH v6 24/42] target/arm: Add gen_mte_checkN, Richard Henderson, 2020/03/12
- [PATCH v6 25/42] target/arm: Implement helper_mte_check1, Richard Henderson, 2020/03/12
- [PATCH v6 26/42] target/arm: Implement helper_mte_checkN, Richard Henderson, 2020/03/12
- [PATCH v6 27/42] target/arm: Add helper_mte_check_zva, Richard Henderson, 2020/03/12
- [PATCH v6 28/42] target/arm: Use mte_checkN for sve unpredicated loads, Richard Henderson, 2020/03/12
- [PATCH v6 29/42] target/arm: Use mte_checkN for sve unpredicated stores, Richard Henderson, 2020/03/12
- [PATCH v6 30/42] target/arm: Use mte_check1 for sve LD1R, Richard Henderson, 2020/03/12
- [PATCH v6 32/42] target/arm: Add mte helpers for sve scalar + int stores, Richard Henderson, 2020/03/12
- [PATCH v6 34/42] target/arm: Handle TBI for sve scalar + int memory ops, Richard Henderson, 2020/03/12