bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 5/9] use L4 page table directly on x86_64 instead of short-ci


From: Samuel Thibault
Subject: Re: [PATCH 5/9] use L4 page table directly on x86_64 instead of short-circuiting to pdpbase
Date: Sun, 12 Feb 2023 19:01:03 +0100
User-agent: NeoMutt/20170609 (1.8.3)

Applied, thanks!

Luca Dariz, le dim. 12 févr. 2023 18:28:14 +0100, a ecrit:
> This is a preparation to run the kernel on high addresses, where the
> user vm region and the kernel vm region will use different L3 page
> tables.
> 
> * i386/intel/pmap.c: on x86_64, retrieve the value of pdpbase from the
>   L4 table, and add the pmap_pdp() helper (useful also for PAE).
> * i386/intel/pmap.h: remove pdpbase on x86_64.
> ---
>  i386/intel/pmap.c | 97 ++++++++++++++++++++++++++++++++++++-----------
>  i386/intel/pmap.h |  7 ++--
>  2 files changed, 78 insertions(+), 26 deletions(-)
> 
> diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
> index 470be744..9e9f91db 100644
> --- a/i386/intel/pmap.c
> +++ b/i386/intel/pmap.c
> @@ -430,14 +430,11 @@ pt_entry_t *kernel_page_dir;
>  static pmap_mapwindow_t mapwindows[PMAP_NMAPWINDOWS];
>  def_simple_lock_data(static, pmapwindows_lock)
>  
> +#ifdef PAE
>  static inline pt_entry_t *
> -pmap_pde(const pmap_t pmap, vm_offset_t addr)
> +pmap_ptp(const pmap_t pmap, vm_offset_t addr)
>  {
> -     pt_entry_t *page_dir;
> -     if (pmap == kernel_pmap)
> -             addr = kvtolin(addr);
> -#if PAE
> -     pt_entry_t *pdp_table, pdp, pde;
> +     pt_entry_t *pdp_table, pdp;
>  #ifdef __x86_64__
>       pdp = pmap->l4base[lin2l4num(addr)];
>       if ((pdp & INTEL_PTE_VALID) == 0)
> @@ -446,6 +443,19 @@ pmap_pde(const pmap_t pmap, vm_offset_t addr)
>  #else /* __x86_64__ */
>       pdp_table = pmap->pdpbase;
>  #endif /* __x86_64__ */
> +     return pdp_table;
> +}
> +#endif
> +
> +static inline pt_entry_t *
> +pmap_pde(const pmap_t pmap, vm_offset_t addr)
> +{
> +     pt_entry_t *page_dir;
> +     if (pmap == kernel_pmap)
> +             addr = kvtolin(addr);
> +#if PAE
> +     pt_entry_t *pdp_table, pde;
> +     pdp_table = pmap_ptp(pmap, addr);
>       pde = pdp_table[lin2pdpnum(addr)];
>       if ((pde & INTEL_PTE_VALID) == 0)
>               return PT_ENTRY_NULL;
> @@ -585,6 +595,7 @@ vm_offset_t pmap_map_bd(
>  static void pmap_bootstrap_pae(void)
>  {
>       vm_offset_t addr;
> +     pt_entry_t *pdp_kernel;
>  
>  #ifdef __x86_64__
>  #ifdef MACH_HYP
> @@ -595,13 +606,15 @@ static void pmap_bootstrap_pae(void)
>       memset(kernel_pmap->l4base, 0, INTEL_PGBYTES);
>  #endif       /* x86_64 */
>  
> +     // TODO: allocate only the PDPTE for kernel virtual space
> +     // this means all directmap and the stupid limit above it
>       init_alloc_aligned(PDPNUM * INTEL_PGBYTES, &addr);
>       kernel_page_dir = (pt_entry_t*)phystokv(addr);
>  
> -     kernel_pmap->pdpbase = (pt_entry_t*)phystokv(pmap_grab_page());
> -     memset(kernel_pmap->pdpbase, 0, INTEL_PGBYTES);
> +     pdp_kernel = (pt_entry_t*)phystokv(pmap_grab_page());
> +     memset(pdp_kernel, 0, INTEL_PGBYTES);
>       for (int i = 0; i < PDPNUM; i++)
> -             WRITE_PTE(&kernel_pmap->pdpbase[i],
> +             WRITE_PTE(&pdp_kernel[i],
>                         pa_to_pte(_kvtophys((void *) kernel_page_dir
>                                             + i * INTEL_PGBYTES))
>                         | INTEL_PTE_VALID
> @@ -611,10 +624,14 @@ static void pmap_bootstrap_pae(void)
>                       );
>  
>  #ifdef __x86_64__
> -     WRITE_PTE(&kernel_pmap->l4base[0], 
> pa_to_pte(_kvtophys(kernel_pmap->pdpbase)) | INTEL_PTE_VALID | 
> INTEL_PTE_WRITE);
> +        /* only fill the kernel pdpte during bootstrap */
> +     WRITE_PTE(&kernel_pmap->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
> +                  pa_to_pte(_kvtophys(pdp_kernel)) | INTEL_PTE_VALID | 
> INTEL_PTE_WRITE);
>  #ifdef       MACH_PV_PAGETABLES
>       pmap_set_page_readonly_init(kernel_pmap->l4base);
> -#endif
> +#endif /* MACH_PV_PAGETABLES */
> +#else        /* x86_64 */
> +        kernel_pmap->pdpbase = pdp_kernel;
>  #endif       /* x86_64 */
>  }
>  #endif /* PAE */
> @@ -1243,7 +1260,7 @@ pmap_page_table_page_dealloc(vm_offset_t pa)
>   */
>  pmap_t pmap_create(vm_size_t size)
>  {
> -     pt_entry_t              *page_dir[PDPNUM];
> +     pt_entry_t              *page_dir[PDPNUM], *pdp_kernel;
>       int                     i;
>       pmap_t                  p;
>       pmap_statistics_t       stats;
> @@ -1301,34 +1318,40 @@ pmap_t pmap_create(vm_size_t size)
>  #endif       /* MACH_PV_PAGETABLES */
>  
>  #if PAE
> -     p->pdpbase = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache);
> -     if (p->pdpbase == NULL) {
> +     pdp_kernel = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache);
> +     if (pdp_kernel == NULL) {
>               for (i = 0; i < PDPNUM; i++)
>                       kmem_cache_free(&pd_cache, (vm_address_t) page_dir[i]);
>               kmem_cache_free(&pmap_cache, (vm_address_t) p);
>               return PMAP_NULL;
>       }
>  
> -     memset(p->pdpbase, 0, INTEL_PGBYTES);
> +     memset(pdp_kernel, 0, INTEL_PGBYTES);
>       {
>               for (i = 0; i < PDPNUM; i++)
> -                     WRITE_PTE(&p->pdpbase[i],
> +                     WRITE_PTE(&pdp_kernel[i],
>                                 pa_to_pte(kvtophys((vm_offset_t) page_dir[i]))
>                                 | INTEL_PTE_VALID
>  #if (defined(__x86_64__) && !defined(MACH_HYP)) || 
> defined(MACH_PV_PAGETABLES)
>                                 | INTEL_PTE_WRITE
>  #ifdef __x86_64__
>                                 | INTEL_PTE_USER
> -#endif
> +#endif /* __x86_64__ */
>  #endif
>                                 );
>       }
>  #ifdef __x86_64__
> +     // TODO alloc only PDPTE for the user range VM_MIN_ADDRESS, 
> VM_MAX_ADDRESS
> +     // and keep the same for kernel range, in l4 table we have different 
> entries
>       p->l4base = (pt_entry_t *) kmem_cache_alloc(&l4_cache);
>       if (p->l4base == NULL)
>               panic("pmap_create");
>       memset(p->l4base, 0, INTEL_PGBYTES);
> -     WRITE_PTE(&p->l4base[0], pa_to_pte(kvtophys((vm_offset_t) p->pdpbase)) 
> | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_USER);
> +     WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
> +               pa_to_pte(kvtophys((vm_offset_t) pdp_kernel)) | 
> INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_USER);
> +#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_ADDRESS)
> +     // TODO kernel vm and user vm are not in the same l4 entry, so add the 
> user one
> +#endif
>  #ifdef       MACH_PV_PAGETABLES
>       // FIXME: use kmem_cache_alloc instead
>       if (kmem_alloc_wired(kernel_map,
> @@ -1349,6 +1372,8 @@ pmap_t pmap_create(vm_size_t size)
>       memset(p->user_l4base, 0, INTEL_PGBYTES);
>       WRITE_PTE(&p->user_l4base[0], pa_to_pte(kvtophys((vm_offset_t) 
> p->user_pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
>  #endif       /* MACH_PV_PAGETABLES */
> +#else        /* _x86_64 */
> +     p->pdpbase = pdp_kernel;
>  #endif       /* _x86_64 */
>  #ifdef       MACH_PV_PAGETABLES 
>  #ifdef __x86_64__
> @@ -1411,12 +1436,22 @@ void pmap_destroy(pmap_t p)
>  
>  #if PAE
>       for (i = 0; i <= lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); i++) {
> -         free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
> -         page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
> +#ifdef __x86_64__
> +#ifdef USER32
> +         /* In this case we know we have one PDP for user space */
> +         pt_entry_t *pdp = (pt_entry_t *) 
> ptetokv(p->l4base[lin2l4num(VM_MIN_ADDRESS)]);
>  #else
> +#error "TODO do 64-bit userspace need more that 512G?"
> +#endif /* USER32 */
> +         page_dir = (pt_entry_t *) ptetokv(pdp[i]);
> +#else /* __x86_64__ */
> +         page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
> +#endif /* __x86_64__ */
> +         free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
> +#else /* PAE */
>           free_all = FALSE;
>           page_dir = p->dirbase;
> -#endif
> +#endif /* PAE */
>  
>  #ifdef __x86_64__
>  #warning FIXME 64bit need to free l3
> @@ -1464,14 +1499,20 @@ void pmap_destroy(pmap_t p)
>  #endif /* __x86_64__ */
>       pmap_set_page_readwrite(p->pdpbase);
>  #endif       /* MACH_PV_PAGETABLES */
> +
>  #ifdef __x86_64__
> +     kmem_cache_free(&pdpt_cache, (vm_offset_t) pmap_ptp(p, VM_MIN_ADDRESS));
> +#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_ADDRESS)
> +     // TODO kernel vm and user vm are not in the same l4 entry
> +#endif
>          kmem_cache_free(&l4_cache, (vm_offset_t) p->l4base);
>  #ifdef MACH_PV_PAGETABLES
>       kmem_free(kernel_map, (vm_offset_t)p->user_l4base, INTEL_PGBYTES);
>       kmem_free(kernel_map, (vm_offset_t)p->user_pdpbase, INTEL_PGBYTES);
>  #endif /* MACH_PV_PAGETABLES */
> -#endif /* __x86_64__ */
> +#else /* __x86_64__ */
>       kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase);
> +#endif /* __x86_64__ */
>  #endif       /* PAE */
>       kmem_cache_free(&pmap_cache, (vm_offset_t) p);
>  }
> @@ -2404,8 +2445,18 @@ void pmap_collect(pmap_t p)
>  
>  #if PAE
>       for (i = 0; i <= lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); i++) {
> -         free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
> +#ifdef __x86_64__
> +#ifdef USER32
> +         /* In this case we know we have one PDP for user space */
> +         pdp = (pt_entry_t *) ptetokv(p->l4base[lin2l4num(VM_MIN_ADDRESS)]);
> +#else
> +#error "TODO do 64-bit userspace need more that 512G?"
> +#endif /* USER32 */
> +         page_dir = (pt_entry_t *) ptetokv(pdp[i]);
> +#else /* __x86_64__ */
>           page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
> +#endif /* __x86_64__ */
> +         free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
>  #else
>           i = 0;
>           free_all = FALSE;
> diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
> index 92247faa..1c6a0d30 100644
> --- a/i386/intel/pmap.h
> +++ b/i386/intel/pmap.h
> @@ -182,16 +182,17 @@ typedef volatile long   cpu_set;        /* set of CPUs 
> - must be <= 32 */
>  struct pmap {
>  #if ! PAE
>       pt_entry_t      *dirbase;       /* page directory table */
> -#else
> -     pt_entry_t      *pdpbase;       /* page directory pointer table */
> -#endif       /* ! PAE */
> +#else        /* PAE */
>  #ifdef __x86_64__
>       pt_entry_t      *l4base;        /* l4 table */
>  #ifdef MACH_HYP
>       pt_entry_t      *user_l4base;   /* Userland l4 table */
>       pt_entry_t      *user_pdpbase;  /* Userland l4 table */
>  #endif       /* MACH_HYP */
> +#else        /* x86_64 */
> +     pt_entry_t      *pdpbase;       /* page directory pointer table */
>  #endif       /* x86_64 */
> +#endif       /* PAE */
>       int             ref_count;      /* reference count */
>       decl_simple_lock_data(,lock)
>                                       /* lock on map */
> -- 
> 2.30.2
> 
> 

-- 
Samuel
---
Pour une évaluation indépendante, transparente et rigoureuse !
Je soutiens la Commission d'Évaluation de l'Inria.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]