[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 2/3] i386: Atomically update PTEs with mttcg
From: |
Philippe Mathieu-Daudé |
Subject: |
Re: [Qemu-devel] [PATCH 2/3] i386: Atomically update PTEs with mttcg |
Date: |
Fri, 14 Dec 2018 12:11:51 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.3.1 |
On 12/14/18 1:05 AM, Benjamin Herrenschmidt wrote:
> Note to RiscV folks: You may want to adapt your code to do the same as
> this, esp. afaik, what you do today is endian-broken if host and guest
> endian are different.
Cc'ing the RISC-V list.
> Cheers,
> Ben.
>
> On Fri, 2018-12-14 at 10:58 +1100, Benjamin Herrenschmidt wrote:
>> Afaik, this isn't well documented (at least it wasn't when I last looked)
>> but OSes such as Linux rely on this behaviour:
>>
>> The HW updates to the page tables need to be done atomically with the
>> checking of the present bit (and other permissions).
>>
>> This is what allows Linux to do simple xchg of PTEs with 0 and assume
>> the value read has "final" stable dirty and accessed bits (the TLB
>> invalidation is deferred).
>>
>> Signed-off-by: Benjamin Herrenschmidt <address@hidden>
>> ---
>> target/i386/excp_helper.c | 104 +++++++++++++++++++++++++++++---------
>> 1 file changed, 80 insertions(+), 24 deletions(-)
>>
>> diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c
>> index 49231f6b69..93fc24c011 100644
>> --- a/target/i386/excp_helper.c
>> +++ b/target/i386/excp_helper.c
>> @@ -157,11 +157,45 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>> int size,
>>
>> #else
>>
>> +static inline uint64_t update_entry(CPUState *cs, target_ulong addr,
>> + uint64_t orig_entry, uint32_t bits)
>> +{
>> + uint64_t new_entry = orig_entry | bits;
>> +
>> + /* Write the updated bottom 32-bits */
>> + if (qemu_tcg_mttcg_enabled()) {
>> + uint32_t old_le = cpu_to_le32(orig_entry);
>> + uint32_t new_le = cpu_to_le32(new_entry);
>> + MemTxResult result;
>> + uint32_t old_ret;
>> +
>> + old_ret = address_space_cmpxchgl_notdirty(cs->as, addr,
>> + old_le, new_le,
>> + MEMTXATTRS_UNSPECIFIED,
>> + &result);
>> + if (result == MEMTX_OK) {
>> + if (old_ret != old_le && old_ret != new_le) {
>> + new_entry = 0;
>> + }
>> + return new_entry;
>> + }
>> +
>> + /* Do we need to support this case where PTEs aren't in RAM ?
>> + *
>> + * For now fallback to non-atomic case
>> + */
>> + }
>> +
>> + x86_stl_phys_notdirty(cs, addr, new_entry);
>> +
>> + return new_entry;
>> +}
>> +
>> static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType
>> access_type,
>> int *prot)
>> {
>> CPUX86State *env = &X86_CPU(cs)->env;
>> - uint64_t rsvd_mask = PG_HI_RSVD_MASK;
>> + uint64_t rsvd_mask;
>> uint64_t ptep, pte;
>> uint64_t exit_info_1 = 0;
>> target_ulong pde_addr, pte_addr;
>> @@ -172,6 +206,8 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys,
>> MMUAccessType access_type,
>> return gphys;
>> }
>>
>> + restart:
>> + rsvd_mask = PG_HI_RSVD_MASK;
>> if (!(env->nested_pg_mode & SVM_NPT_NXE)) {
>> rsvd_mask |= PG_NX_MASK;
>> }
>> @@ -198,8 +234,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys,
>> MMUAccessType access_type,
>> goto do_fault_rsvd;
>> }
>> if (!(pml4e & PG_ACCESSED_MASK)) {
>> - pml4e |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
>> + pml4e = update_entry(cs, pml4e_addr, pml4e,
>> PG_ACCESSED_MASK);
>> + if (!pml4e) {
>> + goto restart;
>> + }
>> }
>> ptep &= pml4e ^ PG_NX_MASK;
>> pdpe_addr = (pml4e & PG_ADDRESS_MASK) +
>> @@ -213,8 +251,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys,
>> MMUAccessType access_type,
>> }
>> ptep &= pdpe ^ PG_NX_MASK;
>> if (!(pdpe & PG_ACCESSED_MASK)) {
>> - pdpe |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
>> + pdpe = update_entry(cs, pdpe_addr, pdpe, PG_ACCESSED_MASK);
>> + if (!pdpe) {
>> + goto restart;
>> + }
>> }
>> if (pdpe & PG_PSE_MASK) {
>> /* 1 GB page */
>> @@ -256,8 +296,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys,
>> MMUAccessType access_type,
>> }
>> /* 4 KB page */
>> if (!(pde & PG_ACCESSED_MASK)) {
>> - pde |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pde_addr, pde);
>> + pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> + if (!pde) {
>> + goto restart;
>> + }
>> }
>> pte_addr = (pde & PG_ADDRESS_MASK) + (((gphys >> 12) & 0x1ff) << 3);
>> pte = x86_ldq_phys(cs, pte_addr);
>> @@ -295,8 +337,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys,
>> MMUAccessType access_type,
>> }
>>
>> if (!(pde & PG_ACCESSED_MASK)) {
>> - pde |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pde_addr, pde);
>> + pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> + if (!pde) {
>> + goto restart;
>> + }
>> }
>>
>> /* page directory entry */
>> @@ -376,7 +420,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>> int size,
>> int error_code = 0;
>> int is_dirty, prot, page_size, is_write, is_user;
>> hwaddr paddr;
>> - uint64_t rsvd_mask = PG_HI_RSVD_MASK;
>> + uint64_t rsvd_mask;
>> uint32_t page_offset;
>> target_ulong vaddr;
>>
>> @@ -401,6 +445,8 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>> int size,
>> goto do_mapping;
>> }
>>
>> + restart:
>> + rsvd_mask = PG_HI_RSVD_MASK;
>> if (!(env->efer & MSR_EFER_NXE)) {
>> rsvd_mask |= PG_NX_MASK;
>> }
>> @@ -436,8 +482,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>> int size,
>> goto do_fault_rsvd;
>> }
>> if (!(pml5e & PG_ACCESSED_MASK)) {
>> - pml5e |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
>> + pml5e = update_entry(cs, pml5e_addr, pml5e,
>> PG_ACCESSED_MASK);
>> + if (!pml5e) {
>> + goto restart;
>> + }
>> }
>> ptep = pml5e ^ PG_NX_MASK;
>> } else {
>> @@ -456,8 +504,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>> int size,
>> goto do_fault_rsvd;
>> }
>> if (!(pml4e & PG_ACCESSED_MASK)) {
>> - pml4e |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
>> + pml4e = update_entry(cs, pml4e_addr, pml4e,
>> PG_ACCESSED_MASK);
>> + if (!pml4e) {
>> + goto restart;
>> + }
>> }
>> ptep &= pml4e ^ PG_NX_MASK;
>> pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) &
>> 0x1ff) << 3)) &
>> @@ -472,8 +522,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>> int size,
>> }
>> ptep &= pdpe ^ PG_NX_MASK;
>> if (!(pdpe & PG_ACCESSED_MASK)) {
>> - pdpe |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
>> + pdpe = update_entry(cs, pdpe_addr, pdpe, PG_ACCESSED_MASK);
>> + if (!pdpe) {
>> + goto restart;
>> + }
>> }
>> if (pdpe & PG_PSE_MASK) {
>> /* 1 GB page */
>> @@ -520,8 +572,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>> int size,
>> }
>> /* 4 KB page */
>> if (!(pde & PG_ACCESSED_MASK)) {
>> - pde |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pde_addr, pde);
>> + pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> + if (!pde) {
>> + goto restart;
>> + }
>> }
>> pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) <<
>> 3)) &
>> a20_mask;
>> @@ -563,8 +617,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>> int size,
>> }
>>
>> if (!(pde & PG_ACCESSED_MASK)) {
>> - pde |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pde_addr, pde);
>> + pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> + if (!pde) {
>> + goto restart;
>> + }
>> }
>>
>> /* page directory entry */
>> @@ -634,11 +690,11 @@ do_check_protect_pse36:
>> /* yes, it can! */
>> is_dirty = is_write && !(pte & PG_DIRTY_MASK);
>> if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
>> - pte |= PG_ACCESSED_MASK;
>> - if (is_dirty) {
>> - pte |= PG_DIRTY_MASK;
>> + pte = update_entry(cs, pte_addr, pte,
>> + PG_ACCESSED_MASK | (is_dirty ? PG_DIRTY_MASK :
>> 0));
>> + if (!pte) {
>> + goto restart;
>> }
>> - x86_stl_phys_notdirty(cs, pte_addr, pte);
>> }
>>
>> if (!(pte & PG_DIRTY_MASK)) {
>
>