bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 5/9] use L4 page table directly on x86_64 instead of short-circui


From: Luca Dariz
Subject: [PATCH 5/9] use L4 page table directly on x86_64 instead of short-circuiting to pdpbase
Date: Sun, 12 Feb 2023 18:28:14 +0100

This is a preparation to run the kernel on high addresses, where the
user vm region and the kernel vm region will use different L3 page
tables.

* i386/intel/pmap.c: on x86_64, retrieve the value of pdpbase from the
  L4 table, and add the pmap_pdp() helper (useful also for PAE).
* i386/intel/pmap.h: remove pdpbase on x86_64.
---
 i386/intel/pmap.c | 97 ++++++++++++++++++++++++++++++++++++-----------
 i386/intel/pmap.h |  7 ++--
 2 files changed, 78 insertions(+), 26 deletions(-)

diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
index 470be744..9e9f91db 100644
--- a/i386/intel/pmap.c
+++ b/i386/intel/pmap.c
@@ -430,14 +430,11 @@ pt_entry_t *kernel_page_dir;
 static pmap_mapwindow_t mapwindows[PMAP_NMAPWINDOWS];
 def_simple_lock_data(static, pmapwindows_lock)
 
+#ifdef PAE
 static inline pt_entry_t *
-pmap_pde(const pmap_t pmap, vm_offset_t addr)
+pmap_ptp(const pmap_t pmap, vm_offset_t addr)
 {
-       pt_entry_t *page_dir;
-       if (pmap == kernel_pmap)
-               addr = kvtolin(addr);
-#if PAE
-       pt_entry_t *pdp_table, pdp, pde;
+       pt_entry_t *pdp_table, pdp;
 #ifdef __x86_64__
        pdp = pmap->l4base[lin2l4num(addr)];
        if ((pdp & INTEL_PTE_VALID) == 0)
@@ -446,6 +443,19 @@ pmap_pde(const pmap_t pmap, vm_offset_t addr)
 #else /* __x86_64__ */
        pdp_table = pmap->pdpbase;
 #endif /* __x86_64__ */
+       return pdp_table;
+}
+#endif
+
+static inline pt_entry_t *
+pmap_pde(const pmap_t pmap, vm_offset_t addr)
+{
+       pt_entry_t *page_dir;
+       if (pmap == kernel_pmap)
+               addr = kvtolin(addr);
+#if PAE
+       pt_entry_t *pdp_table, pde;
+       pdp_table = pmap_ptp(pmap, addr);
        pde = pdp_table[lin2pdpnum(addr)];
        if ((pde & INTEL_PTE_VALID) == 0)
                return PT_ENTRY_NULL;
@@ -585,6 +595,7 @@ vm_offset_t pmap_map_bd(
 static void pmap_bootstrap_pae(void)
 {
        vm_offset_t addr;
+       pt_entry_t *pdp_kernel;
 
 #ifdef __x86_64__
 #ifdef MACH_HYP
@@ -595,13 +606,15 @@ static void pmap_bootstrap_pae(void)
        memset(kernel_pmap->l4base, 0, INTEL_PGBYTES);
 #endif /* x86_64 */
 
+       // TODO: allocate only the PDPTE for kernel virtual space
+       // this means all directmap and the stupid limit above it
        init_alloc_aligned(PDPNUM * INTEL_PGBYTES, &addr);
        kernel_page_dir = (pt_entry_t*)phystokv(addr);
 
-       kernel_pmap->pdpbase = (pt_entry_t*)phystokv(pmap_grab_page());
-       memset(kernel_pmap->pdpbase, 0, INTEL_PGBYTES);
+       pdp_kernel = (pt_entry_t*)phystokv(pmap_grab_page());
+       memset(pdp_kernel, 0, INTEL_PGBYTES);
        for (int i = 0; i < PDPNUM; i++)
-               WRITE_PTE(&kernel_pmap->pdpbase[i],
+               WRITE_PTE(&pdp_kernel[i],
                          pa_to_pte(_kvtophys((void *) kernel_page_dir
                                              + i * INTEL_PGBYTES))
                          | INTEL_PTE_VALID
@@ -611,10 +624,14 @@ static void pmap_bootstrap_pae(void)
                        );
 
 #ifdef __x86_64__
-       WRITE_PTE(&kernel_pmap->l4base[0], 
pa_to_pte(_kvtophys(kernel_pmap->pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+        /* only fill the kernel pdpte during bootstrap */
+       WRITE_PTE(&kernel_pmap->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
+                  pa_to_pte(_kvtophys(pdp_kernel)) | INTEL_PTE_VALID | 
INTEL_PTE_WRITE);
 #ifdef MACH_PV_PAGETABLES
        pmap_set_page_readonly_init(kernel_pmap->l4base);
-#endif
+#endif /* MACH_PV_PAGETABLES */
+#else  /* x86_64 */
+        kernel_pmap->pdpbase = pdp_kernel;
 #endif /* x86_64 */
 }
 #endif /* PAE */
@@ -1243,7 +1260,7 @@ pmap_page_table_page_dealloc(vm_offset_t pa)
  */
 pmap_t pmap_create(vm_size_t size)
 {
-       pt_entry_t              *page_dir[PDPNUM];
+       pt_entry_t              *page_dir[PDPNUM], *pdp_kernel;
        int                     i;
        pmap_t                  p;
        pmap_statistics_t       stats;
@@ -1301,34 +1318,40 @@ pmap_t pmap_create(vm_size_t size)
 #endif /* MACH_PV_PAGETABLES */
 
 #if PAE
-       p->pdpbase = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache);
-       if (p->pdpbase == NULL) {
+       pdp_kernel = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache);
+       if (pdp_kernel == NULL) {
                for (i = 0; i < PDPNUM; i++)
                        kmem_cache_free(&pd_cache, (vm_address_t) page_dir[i]);
                kmem_cache_free(&pmap_cache, (vm_address_t) p);
                return PMAP_NULL;
        }
 
-       memset(p->pdpbase, 0, INTEL_PGBYTES);
+       memset(pdp_kernel, 0, INTEL_PGBYTES);
        {
                for (i = 0; i < PDPNUM; i++)
-                       WRITE_PTE(&p->pdpbase[i],
+                       WRITE_PTE(&pdp_kernel[i],
                                  pa_to_pte(kvtophys((vm_offset_t) page_dir[i]))
                                  | INTEL_PTE_VALID
 #if (defined(__x86_64__) && !defined(MACH_HYP)) || defined(MACH_PV_PAGETABLES)
                                  | INTEL_PTE_WRITE
 #ifdef __x86_64__
                                  | INTEL_PTE_USER
-#endif
+#endif /* __x86_64__ */
 #endif
                                  );
        }
 #ifdef __x86_64__
+       // TODO alloc only PDPTE for the user range VM_MIN_ADDRESS, 
VM_MAX_ADDRESS
+       // and keep the same for kernel range, in l4 table we have different 
entries
        p->l4base = (pt_entry_t *) kmem_cache_alloc(&l4_cache);
        if (p->l4base == NULL)
                panic("pmap_create");
        memset(p->l4base, 0, INTEL_PGBYTES);
-       WRITE_PTE(&p->l4base[0], pa_to_pte(kvtophys((vm_offset_t) p->pdpbase)) 
| INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_USER);
+       WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
+                 pa_to_pte(kvtophys((vm_offset_t) pdp_kernel)) | 
INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_USER);
+#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_ADDRESS)
+       // TODO kernel vm and user vm are not in the same l4 entry, so add the 
user one
+#endif
 #ifdef MACH_PV_PAGETABLES
        // FIXME: use kmem_cache_alloc instead
        if (kmem_alloc_wired(kernel_map,
@@ -1349,6 +1372,8 @@ pmap_t pmap_create(vm_size_t size)
        memset(p->user_l4base, 0, INTEL_PGBYTES);
        WRITE_PTE(&p->user_l4base[0], pa_to_pte(kvtophys((vm_offset_t) 
p->user_pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
 #endif /* MACH_PV_PAGETABLES */
+#else  /* _x86_64 */
+       p->pdpbase = pdp_kernel;
 #endif /* _x86_64 */
 #ifdef MACH_PV_PAGETABLES 
 #ifdef __x86_64__
@@ -1411,12 +1436,22 @@ void pmap_destroy(pmap_t p)
 
 #if PAE
        for (i = 0; i <= lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); i++) {
-           free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
-           page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
+#ifdef __x86_64__
+#ifdef USER32
+           /* In this case we know we have one PDP for user space */
+           pt_entry_t *pdp = (pt_entry_t *) 
ptetokv(p->l4base[lin2l4num(VM_MIN_ADDRESS)]);
 #else
+#error "TODO do 64-bit userspace need more that 512G?"
+#endif /* USER32 */
+           page_dir = (pt_entry_t *) ptetokv(pdp[i]);
+#else /* __x86_64__ */
+           page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
+#endif /* __x86_64__ */
+           free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
+#else /* PAE */
            free_all = FALSE;
            page_dir = p->dirbase;
-#endif
+#endif /* PAE */
 
 #ifdef __x86_64__
 #warning FIXME 64bit need to free l3
@@ -1464,14 +1499,20 @@ void pmap_destroy(pmap_t p)
 #endif /* __x86_64__ */
        pmap_set_page_readwrite(p->pdpbase);
 #endif /* MACH_PV_PAGETABLES */
+
 #ifdef __x86_64__
+       kmem_cache_free(&pdpt_cache, (vm_offset_t) pmap_ptp(p, VM_MIN_ADDRESS));
+#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_ADDRESS)
+       // TODO kernel vm and user vm are not in the same l4 entry
+#endif
         kmem_cache_free(&l4_cache, (vm_offset_t) p->l4base);
 #ifdef MACH_PV_PAGETABLES
        kmem_free(kernel_map, (vm_offset_t)p->user_l4base, INTEL_PGBYTES);
        kmem_free(kernel_map, (vm_offset_t)p->user_pdpbase, INTEL_PGBYTES);
 #endif /* MACH_PV_PAGETABLES */
-#endif /* __x86_64__ */
+#else /* __x86_64__ */
        kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase);
+#endif /* __x86_64__ */
 #endif /* PAE */
        kmem_cache_free(&pmap_cache, (vm_offset_t) p);
 }
@@ -2404,8 +2445,18 @@ void pmap_collect(pmap_t p)
 
 #if PAE
        for (i = 0; i <= lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); i++) {
-           free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
+#ifdef __x86_64__
+#ifdef USER32
+           /* In this case we know we have one PDP for user space */
+           pdp = (pt_entry_t *) ptetokv(p->l4base[lin2l4num(VM_MIN_ADDRESS)]);
+#else
+#error "TODO do 64-bit userspace need more that 512G?"
+#endif /* USER32 */
+           page_dir = (pt_entry_t *) ptetokv(pdp[i]);
+#else /* __x86_64__ */
            page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
+#endif /* __x86_64__ */
+           free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
 #else
            i = 0;
            free_all = FALSE;
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
index 92247faa..1c6a0d30 100644
--- a/i386/intel/pmap.h
+++ b/i386/intel/pmap.h
@@ -182,16 +182,17 @@ typedef   volatile long   cpu_set;        /* set of CPUs 
- must be <= 32 */
 struct pmap {
 #if ! PAE
        pt_entry_t      *dirbase;       /* page directory table */
-#else
-       pt_entry_t      *pdpbase;       /* page directory pointer table */
-#endif /* ! PAE */
+#else  /* PAE */
 #ifdef __x86_64__
        pt_entry_t      *l4base;        /* l4 table */
 #ifdef MACH_HYP
        pt_entry_t      *user_l4base;   /* Userland l4 table */
        pt_entry_t      *user_pdpbase;  /* Userland l4 table */
 #endif /* MACH_HYP */
+#else  /* x86_64 */
+       pt_entry_t      *pdpbase;       /* page directory pointer table */
 #endif /* x86_64 */
+#endif /* PAE */
        int             ref_count;      /* reference count */
        decl_simple_lock_data(,lock)
                                        /* lock on map */
-- 
2.30.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]