[PATCH v3 13/15] KVM: Handle page fault for private memory

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v3 13/15] KVM: Handle page fault for private memory

From:	Chao Peng
Subject:	[PATCH v3 13/15] KVM: Handle page fault for private memory
Date:	Tue, 21 Dec 2021 23:11:23 +0800

When a page fault from the secondary page table while the guest is
running happens in a memslot with KVM_MEM_PRIVATE, we need go
different paths for private access and shared access.

  - For private access, KVM checks if the page is already allocated in
    the memory backend, if yes KVM establishes the mapping, otherwise
    exits to userspace to convert a shared page to private one.

  - For shared access, KVM also checks if the page is already allocated
    in the memory backend, if yes then exit to userspace to convert a
    private page to shared one, otherwise it's treated as a traditional
    hva-based shared memory, KVM lets existing code to obtain a pfn with
    get_user_pages() and establish the mapping.

The above code assume private memory is persistent and pre-allocated in
the memory backend so KVM can use this information as an indicator for
a page is private or shared. The above check is then performed by
calling kvm_memfd_get_pfn() which currently is implemented as a
pagecache search but in theory that can be implemented differently
(i.e. when the page is even not mapped into host pagecache there should
be some different implementation).

Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
---
 arch/x86/kvm/mmu/mmu.c         | 64 +++++++++++++++++++++++++++++++---
 arch/x86/kvm/mmu/paging_tmpl.h | 11 ++++--
 2 files changed, 68 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a7006e1ac2d2..7fc29f85313d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3156,6 +3156,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
        if (max_level == PG_LEVEL_4K)
                return PG_LEVEL_4K;
 
+       if (kvm_slot_is_private(slot))
+               return max_level;
+
        host_level = host_pfn_mapping_level(kvm, gfn, pfn, slot);
        return min(host_level, max_level);
 }
@@ -4359,7 +4362,50 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu 
*vcpu, gpa_t cr2_or_gpa,
                                  kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
 }
 
-static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault 
*fault, int *r)
+static bool kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
+                                   struct kvm_page_fault *fault,
+                                   bool *is_private_pfn, int *r)
+{
+       int order;
+       int mem_convert_type;
+       struct kvm_memory_slot *slot = fault->slot;
+       long pfn = kvm_memfd_get_pfn(slot, fault->gfn, &order);
+
+       if (kvm_vcpu_is_private_gfn(vcpu, fault->addr >> PAGE_SHIFT)) {
+               if (pfn < 0)
+                       mem_convert_type = KVM_EXIT_MEM_MAP_PRIVATE;
+               else {
+                       fault->pfn = pfn;
+                       if (slot->flags & KVM_MEM_READONLY)
+                               fault->map_writable = false;
+                       else
+                               fault->map_writable = true;
+
+                       if (order == 0)
+                               fault->max_level = PG_LEVEL_4K;
+                       *is_private_pfn = true;
+                       *r = RET_PF_FIXED;
+                       return true;
+               }
+       } else {
+               if (pfn < 0)
+                       return false;
+
+               kvm_memfd_put_pfn(pfn);
+               mem_convert_type = KVM_EXIT_MEM_MAP_SHARED;
+       }
+
+       vcpu->run->exit_reason = KVM_EXIT_MEMORY_ERROR;
+       vcpu->run->mem.type = mem_convert_type;
+       vcpu->run->mem.u.map.gpa = fault->gfn << PAGE_SHIFT;
+       vcpu->run->mem.u.map.size = PAGE_SIZE;
+       fault->pfn = -1;
+       *r = -1;
+       return true;
+}
+
+static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault 
*fault,
+                           bool *is_private_pfn, int *r)
 {
        struct kvm_memory_slot *slot = fault->slot;
        bool async;
@@ -4400,6 +4446,10 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
struct kvm_page_fault *fault,
                }
        }
 
+       if (kvm_slot_is_private(slot) &&
+           kvm_faultin_pfn_private(vcpu, fault, is_private_pfn, r))
+               return *r == RET_PF_FIXED ? false : true;
+
        async = false;
        fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, &async,
                                          fault->write, &fault->map_writable,
@@ -4446,6 +4496,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, 
struct kvm_page_fault *fault
        bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu);
 
        unsigned long mmu_seq;
+       bool is_private_pfn = false;
        int r;
 
        fault->gfn = kvm_gfn_unalias(vcpu->kvm, fault->addr);
@@ -4465,7 +4516,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, 
struct kvm_page_fault *fault
        mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
 
-       if (kvm_faultin_pfn(vcpu, fault, &r))
+       if (kvm_faultin_pfn(vcpu, fault, &is_private_pfn, &r))
                return r;
 
        if (handle_abnormal_pfn(vcpu, fault, ACC_ALL, &r))
@@ -4504,7 +4555,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, 
struct kvm_page_fault *fault
        else
                write_lock(&vcpu->kvm->mmu_lock);
 
-       if (is_page_fault_stale(vcpu, fault, mmu_seq))
+       if (!is_private_pfn && is_page_fault_stale(vcpu, fault, mmu_seq))
                goto out_unlock;
 
        r = make_mmu_pages_available(vcpu);
@@ -4522,7 +4573,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, 
struct kvm_page_fault *fault
                read_unlock(&vcpu->kvm->mmu_lock);
        else
                write_unlock(&vcpu->kvm->mmu_lock);
-       kvm_release_pfn_clean(fault->pfn);
+
+       if (is_private_pfn)
+               kvm_memfd_put_pfn(fault->pfn);
+       else
+               kvm_release_pfn_clean(fault->pfn);
+
        return r;
 }
 
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 6d343a399913..ebd5c923f844 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -842,6 +842,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct 
kvm_page_fault *fault
        int r;
        unsigned long mmu_seq;
        bool is_self_change_mapping;
+       bool is_private_pfn = false;
+
 
        pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, 
fault->error_code);
        WARN_ON_ONCE(fault->is_tdp);
@@ -890,7 +892,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct 
kvm_page_fault *fault
        mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
 
-       if (kvm_faultin_pfn(vcpu, fault, &r))
+       if (kvm_faultin_pfn(vcpu, fault, &is_private_pfn, &r))
                return r;
 
        if (handle_abnormal_pfn(vcpu, fault, walker.pte_access, &r))
@@ -918,7 +920,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct 
kvm_page_fault *fault
        r = RET_PF_RETRY;
        write_lock(&vcpu->kvm->mmu_lock);
 
-       if (is_page_fault_stale(vcpu, fault, mmu_seq))
+       if (!is_private_pfn && is_page_fault_stale(vcpu, fault, mmu_seq))
                goto out_unlock;
 
        kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
@@ -930,7 +932,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct 
kvm_page_fault *fault
 
 out_unlock:
        write_unlock(&vcpu->kvm->mmu_lock);
-       kvm_release_pfn_clean(fault->pfn);
+       if (is_private_pfn)
+               kvm_memfd_put_pfn(fault->pfn);
+       else
+               kvm_release_pfn_clean(fault->pfn);
        return r;
 }
 
-- 
2.17.1

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH v3 01/15] mm/shmem: Introduce F_SEAL_INACCESSIBLE, (continued)
- [PATCH v3 01/15] mm/shmem: Introduce F_SEAL_INACCESSIBLE, Chao Peng, 2021/12/21
- [PATCH v3 02/15] mm/memfd: Introduce MFD_INACCESSIBLE flag, Chao Peng, 2021/12/21
- [PATCH v3 03/15] mm/memfd: Introduce MEMFD_OPS, Chao Peng, 2021/12/21
- [PATCH v3 04/15] KVM: Extend the memslot to support fd-based private memory, Chao Peng, 2021/12/21
- [PATCH v3 05/15] KVM: Implement fd-based memory using MEMFD_OPS interfaces, Chao Peng, 2021/12/21
- [PATCH v3 06/15] KVM: Refactor hva based memory invalidation code, Chao Peng, 2021/12/21
- [PATCH v3 07/15] KVM: Special handling for fd-based memory invalidation, Chao Peng, 2021/12/21
- [PATCH v3 08/15] KVM: Split out common memory invalidation code, Chao Peng, 2021/12/21
- [PATCH v3 09/15] KVM: Implement fd-based memory invalidation, Chao Peng, 2021/12/21
- [PATCH v3 12/15] KVM: Add KVM_EXIT_MEMORY_ERROR exit, Chao Peng, 2021/12/21
- [PATCH v3 13/15] KVM: Handle page fault for private memory, Chao Peng <=
- [PATCH v3 14/15] KVM: Use kvm_userspace_memory_region_ext, Chao Peng, 2021/12/21
- [PATCH v3 10/15] KVM: Add kvm_map_gfn_range, Chao Peng, 2021/12/21
- [PATCH v3 15/15] KVM: Register/unregister private memory slot to memfd, Chao Peng, 2021/12/21
- [PATCH v3 11/15] KVM: Implement fd-based memory fallocation, Chao Peng, 2021/12/21
- Re: [PATCH v3 00/15] KVM: mm: fd-based approach for supporting KVM guest private memory, Sean Christopherson, 2021/12/21
  - Re: [PATCH v3 00/15] KVM: mm: fd-based approach for supporting KVM guest private memory, Chao Peng, 2021/12/21

Prev by Date: [PATCH v3 12/15] KVM: Add KVM_EXIT_MEMORY_ERROR exit
Next by Date: [PATCH v3 14/15] KVM: Use kvm_userspace_memory_region_ext
Previous by thread: [PATCH v3 12/15] KVM: Add KVM_EXIT_MEMORY_ERROR exit
Next by thread: [PATCH v3 14/15] KVM: Use kvm_userspace_memory_region_ext
Index(es):
- Date
- Thread