bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3] i386: Refactor int stacks to be per cpu for SMP


From: Samuel Thibault
Subject: Re: [PATCH v3] i386: Refactor int stacks to be per cpu for SMP
Date: Mon, 13 Feb 2023 01:31:04 +0100
User-agent: NeoMutt/20170609 (1.8.3)

Applied and fixed for PAE, thanks!

Damien Zammit, le sam. 04 févr. 2023 01:15:06 +0000, a ecrit:
> This also serialises the AP bringup, so paging can be enabled per cpu
> one by one.
> 
> Also-by: Almudena Garcia <liberamenso10000@gmail.com>
> ---
>  i386/i386/mp_desc.c     | 230 ++++++++++++++++++++++++++++------------
>  i386/i386/mp_desc.h     |   7 +-
>  i386/i386at/boothdr.S   |  18 +++-
>  i386/i386at/ioapic.c    |   5 +-
>  i386/i386at/model_dep.c |  33 +++---
>  5 files changed, 201 insertions(+), 92 deletions(-)
> 
> diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c
> index bcf2fbe7..03b2ea68 100644
> --- a/i386/i386/mp_desc.c
> +++ b/i386/i386/mp_desc.c
> @@ -24,25 +24,36 @@
>   * the rights to redistribute these changes.
>   */
> 
> -#if  NCPUS > 1
> -
> -#include <string.h>
> -
>  #include <kern/cpu_number.h>
>  #include <kern/debug.h>
>  #include <kern/printf.h>
> +#include <kern/smp.h>
> +#include <kern/startup.h>
> +#include <kern/kmutex.h>
>  #include <mach/machine.h>
>  #include <mach/xen.h>
>  #include <vm/vm_kern.h>
> 
>  #include <i386/mp_desc.h>
>  #include <i386/lock.h>
> +#include <i386/apic.h>
> +#include <i386/locore.h>
> +#include <i386/gdt.h>
> +#include <i386at/idt.h>
> +#include <i386at/int_init.h>
> +#include <i386/cpu.h>
> +#include <i386/smp.h>
> +
>  #include <i386at/model_dep.h>
>  #include <machine/ktss.h>
> +#include <machine/smp.h>
>  #include <machine/tss.h>
>  #include <machine/io_perm.h>
>  #include <machine/vm_param.h>
> 
> +#include <i386at/acpi_parse_apic.h>
> +#include <string.h>
> +
>  /*
>   * The i386 needs an interrupt stack to keep the PCB stack from being
>   * overrun by interrupts.  All interrupt stacks MUST lie at lower addresses
> @@ -52,20 +63,35 @@
>  /*
>   * Addresses of bottom and top of interrupt stacks.
>   */
> -vm_offset_t  interrupt_stack[NCPUS];
>  vm_offset_t  int_stack_top[NCPUS];
>  vm_offset_t  int_stack_base[NCPUS];
> 
> -/*
> - * Barrier address.
> - */
> -vm_offset_t  int_stack_high;
> +/* Interrupt stack allocation */
> +uint8_t solid_intstack[NCPUS*INTSTACK_SIZE] __aligned(NCPUS*INTSTACK_SIZE);
> +
> +void
> +interrupt_stack_alloc(void)
> +{
> +     int i;
> +
> +     /*
> +      * Set up pointers to the top of the interrupt stack.
> +      */
> +
> +     for (i = 0; i < NCPUS; i++) {
> +             int_stack_base[i] = (vm_offset_t) &solid_intstack[i * 
> INTSTACK_SIZE];
> +             int_stack_top[i] = (vm_offset_t) &solid_intstack[(i + 1) * 
> INTSTACK_SIZE] - 4;
> +     }
> +}
> 
> +#if  NCPUS > 1
>  /*
> - * First cpu`s interrupt stack.
> + * Flag to mark SMP init by BSP complete
>   */
> -extern char          _intstack[];    /* bottom */
> -extern char          _eintstack[];   /* top */
> +int bspdone;
> +
> +extern void *apboot, *apbootend;
> +extern volatile ApicLocalUnit* lapic;
> 
>  /*
>   * Multiprocessor i386/i486 systems use a separate copy of the
> @@ -77,7 +103,7 @@ extern char                _eintstack[];   /* top */
>   */
> 
>  /*
> - * Allocated descriptor tables.
> + * Descriptor tables.
>   */
>  struct mp_desc_table *mp_desc_table[NCPUS] = { 0 };
> 
> @@ -102,12 +128,13 @@ extern struct real_descriptor   ldt[LDTSZ];
>   * Allocate and initialize the per-processor descriptor tables.
>   */
> 
> -struct mp_desc_table *
> +int
>  mp_desc_init(int mycpu)
>  {
>       struct mp_desc_table *mpt;
> +     vm_offset_t mem;
> 
> -     if (mycpu == master_cpu) {
> +     if (mycpu == 0) {
>               /*
>                * Master CPU uses the tables built at boot time.
>                * Just set the TSS and GDT pointers.
> @@ -118,61 +145,28 @@ mp_desc_init(int mycpu)
>       }
>       else {
>               /*
> -              * Other CPUs allocate the table from the bottom of
> -              * the interrupt stack.
> +              * Allocate tables for other CPUs
>                */
> -             mpt = (struct mp_desc_table *) interrupt_stack[mycpu];
> +             if (!init_alloc_aligned(sizeof(struct mp_desc_table), &mem))
> +                     panic("not enough memory for descriptor tables");
> +             mpt = (struct mp_desc_table *)phystokv(mem);
> 
>               mp_desc_table[mycpu] = mpt;
>               mp_ktss[mycpu] = &mpt->ktss;
>               mp_gdt[mycpu] = mpt->gdt;
> 
>               /*
> -              * Copy the tables
> +              * Zero the tables
>                */
> -             memcpy(mpt->idt,
> -               idt,
> -               sizeof(idt));
> -             memcpy(mpt->gdt,
> -               gdt,
> -               sizeof(gdt));
> -             memcpy(mpt->ldt,
> -               ldt,
> -               sizeof(ldt));
> -             memset(&mpt->ktss, 0,
> -               sizeof(struct task_tss));
> +             memset(mpt->idt, 0, sizeof(idt));
> +             memset(mpt->gdt, 0, sizeof(gdt));
> +             memset(mpt->ldt, 0, sizeof(ldt));
> +             memset(&mpt->ktss, 0, sizeof(struct task_tss));
> 
> -             /*
> -              * Fix up the entries in the GDT to point to
> -              * this LDT and this TSS.
> -              */
> -#ifdef       MACH_RING1
> -             panic("TODO %s:%d\n",__FILE__,__LINE__);
> -#else        /* MACH_RING1 */
> -             _fill_gdt_sys_descriptor(mpt->gdt, KERNEL_LDT,
> -                     (unsigned)&mpt->ldt,
> -                     LDTSZ * sizeof(struct real_descriptor) - 1,
> -                     ACC_P|ACC_PL_K|ACC_LDT, 0);
> -             _fill_gdt_sys_descriptor(mpt->gdt, KERNEL_TSS,
> -                     (unsigned)&mpt->ktss,
> -                     sizeof(struct task_tss) - 1,
> -                     ACC_P|ACC_PL_K|ACC_TSS, 0);
> -
> -             mpt->ktss.tss.ss0 = KERNEL_DS;
> -             mpt->ktss.tss.io_bit_map_offset = IOPB_INVAL;
> -             mpt->ktss.barrier = 0xFF;
> -#endif       /* MACH_RING1 */
> -
> -             return mpt;
> +             return mycpu;
>       }
>  }
> 
> -static kern_return_t intel_startCPU(int slot_num)
> -{
> -     printf("TODO: intel_startCPU\n");
> -     return KERN_FAILURE;
> -}
> -
>  /* XXX should be adjusted per CPU speed */
>  int simple_lock_pause_loop = 100;
> 
> @@ -206,24 +200,128 @@ void
>  interrupt_processor(int cpu)
>  {
>       printf("interrupt cpu %d\n",cpu);
> +     smp_pmap_update(apic_get_cpu_apic_id(cpu));
> +}
> +
> +static void
> +paging_enable(void)
> +{
> +#ifndef MACH_HYP
> +    /* Turn paging on.
> +     * TODO: Why does setting the WP bit here cause a crash?
> +     */
> +    set_cr0(get_cr0() | CR0_PG /* | CR0_WP */);
> +    set_cr0(get_cr0() & ~(CR0_CD | CR0_NW));
> +    if (CPU_HAS_FEATURE(CPU_FEATURE_PGE))
> +        set_cr4(get_cr4() | CR4_PGE);
> +#endif  /* MACH_HYP */
> +}
> +
> +void
> +cpu_setup(int cpu)
> +{
> +    pmap_make_temporary_mapping();
> +    printf("AP=(%u) tempmap done\n", cpu);
> +
> +    paging_enable();
> +    flush_instr_queue();
> +    printf("AP=(%u) paging done\n", cpu);
> +
> +    mp_desc_init(cpu);
> +    printf("AP=(%u) mpdesc done\n", cpu);
> +
> +    ap_gdt_init(cpu);
> +    printf("AP=(%u) gdt done\n", cpu);
> +
> +    ap_idt_init(cpu);
> +    printf("AP=(%u) idt done\n", cpu);
> +
> +    ap_int_init(cpu);
> +    printf("AP=(%u) int done\n", cpu);
> +
> +    ap_ldt_init(cpu);
> +    printf("AP=(%u) ldt done\n", cpu);
> +
> +    ap_ktss_init(cpu);
> +    printf("AP=(%u) ktss done\n", cpu);
> +
> +    pmap_remove_temporary_mapping();
> +    printf("AP=(%u) remove tempmap done\n", cpu);
> +
> +    pmap_set_page_dir();
> +    flush_tlb();
> +    printf("AP=(%u) reset page dir done\n", cpu);
> +
> +    /* Initialize machine_slot fields with the cpu data */
> +    machine_slot[cpu].cpu_subtype = CPU_SUBTYPE_AT386;
> +    machine_slot[cpu].cpu_type = machine_slot[0].cpu_type;
> +
> +    lapic_enable();
> +    cpu_launch_first_thread(THREAD_NULL);
> +}
> +
> +void
> +cpu_ap_main()
> +{
> +    unsigned apic_id = (((ApicLocalUnit*)phystokv(lapic_addr))->apic_id.r >> 
> 24) & 0xff;
> +    int cpu = apic_get_cpu_kernel_id(apic_id);
> +
> +    do {
> +        asm volatile ("pause" : : : "memory");
> +    } while (bspdone != cpu);
> +
> +    __sync_synchronize();
> +
> +    cpu_setup(cpu);
>  }
> 
>  kern_return_t
>  cpu_start(int cpu)
>  {
> -     if (machine_slot[cpu].running)
> -             return KERN_FAILURE;
> +    assert(machine_slot[cpu].running != TRUE);
> +
> +    uint16_t apic_id = apic_get_cpu_apic_id(cpu);
> +
> +    printf("Trying to enable: %d\n", apic_id);
> 
> -     return intel_startCPU(cpu);
> +    smp_startup_cpu(apic_id, AP_BOOT_ADDR);
> +
> +    printf("Started cpu %d (lapic id %04x)\n", cpu, apic_id);
> +
> +    return KERN_SUCCESS;
>  }
> 
>  void
>  start_other_cpus(void)
>  {
> -     int cpu;
> -     for (cpu = 0; cpu < NCPUS; cpu++)
> -             if (cpu != cpu_number())
> -                     cpu_start(cpu);
> -}
> +     int ncpus = smp_get_numcpus();
> +
> +     //Copy cpu initialization assembly routine
> +     memcpy((void*)phystokv(AP_BOOT_ADDR), (void*) &apboot,
> +            (uint32_t)&apbootend - (uint32_t)&apboot);
> 
> +#ifndef APIC
> +     lapic_enable(); /* Enable lapic only once */
> +#endif
> +     unsigned cpu;
> +
> +     splhigh();
> +
> +     bspdone = 0;
> +     for (cpu = 1; cpu < ncpus; cpu++) {
> +             machine_slot[cpu].running = FALSE;
> +
> +             //Start cpu
> +             printf("Starting AP %d\n", cpu);
> +             cpu_start(cpu);
> +
> +             bspdone++;
> +             do {
> +                     asm volatile ("pause" : : : "memory");
> +             } while (machine_slot[cpu].running == FALSE);
> +
> +             __sync_synchronize();
> +     }
> +     printf("BSP: Completed SMP init\n");
> +}
>  #endif       /* NCPUS > 1 */
> diff --git a/i386/i386/mp_desc.h b/i386/i386/mp_desc.h
> index ede8775f..59d50e77 100644
> --- a/i386/i386/mp_desc.h
> +++ b/i386/i386/mp_desc.h
> @@ -46,6 +46,8 @@
>  #include "gdt.h"
>  #include "ldt.h"
> 
> +#define AP_BOOT_ADDR 0x7000
> +
>  /*
>   * The descriptor tables are together in a structure
>   * allocated one per processor (except for the boot processor).
> @@ -72,11 +74,12 @@ extern struct task_tss            *mp_ktss[NCPUS];
>   */
>  extern struct real_descriptor        *mp_gdt[NCPUS];
> 
> +extern uint8_t solid_intstack[];
> 
>  /*
>   * Each CPU calls this routine to set up its descriptor tables.
>   */
> -extern struct mp_desc_table *        mp_desc_init(int);
> +extern int mp_desc_init(int);
> 
> 
>  extern void interrupt_processor(int cpu);
> @@ -90,4 +93,6 @@ extern kern_return_t cpu_start(int cpu);
> 
>  extern kern_return_t cpu_control(int cpu, const int *info, unsigned int 
> count);
> 
> +extern void interrupt_stack_alloc(void);
> +
>  #endif       /* _I386_MP_DESC_H_ */
> diff --git a/i386/i386at/boothdr.S b/i386/i386at/boothdr.S
> index 82d4b34a..d1d1fa51 100644
> --- a/i386/i386at/boothdr.S
> +++ b/i386/i386at/boothdr.S
> @@ -1,6 +1,6 @@
> 
>  #include <mach/machine/asm.h>
> -
> +#include <i386/apic.h>
>  #include <i386/i386asm.h>
> 
>       /*
> @@ -54,7 +54,18 @@ boot_entry:
>       movw    %ax,%ss
> 
>       /* Switch to our own interrupt stack.  */
> -     movl    $_intstack+INTSTACK_SIZE,%esp
> +     movl    $solid_intstack+INTSTACK_SIZE-4, %esp
> +     andl    $0xfffffff0,%esp
> +
> +     /* Enable local apic */
> +     xorl    %eax, %eax
> +     xorl    %edx, %edx
> +     movl    $APIC_MSR, %ecx
> +     rdmsr
> +     orl     $APIC_MSR_ENABLE, %eax
> +     orl     $APIC_MSR_BSP, %eax
> +     movl    $APIC_MSR, %ecx
> +     wrmsr
> 
>       /* Reset EFLAGS to a known state.  */
>       pushl   $0
> @@ -91,9 +102,6 @@ iplt_done:
>       /* Jump into C code.  */
>       call    EXT(c_boot_entry)
> 
> -     .comm   _intstack,INTSTACK_SIZE
> -     .comm   _eintstack,0
> -
>  .align 16
>       .word 0
>  boot_gdt_descr:
> diff --git a/i386/i386at/ioapic.c b/i386/i386at/ioapic.c
> index 003690ed..f7b0d1d3 100644
> --- a/i386/i386at/ioapic.c
> +++ b/i386/i386at/ioapic.c
> @@ -186,9 +186,8 @@ lapic_enable_timer(void)
>      /* Some buggy hardware requires this set again */
>      lapic->divider_config.r = LAPIC_TIMER_DIVIDE_16;
> 
> -    /* Enable interrupts for the first time on BSP */
> -    asm("sti");
> -    printf("LAPIC timer configured\n");
> +    /* Enable interrupts for the first time */
> +    printf("LAPIC timer configured on cpu%d\n", cpu_number());
>  }
> 
>  void
> diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c
> index 98408c06..e0f669b6 100644
> --- a/i386/i386at/model_dep.c
> +++ b/i386/i386at/model_dep.c
> @@ -134,11 +134,9 @@ extern char      version[];
>  /* If set, reboot the system on ctrl-alt-delete.  */
>  boolean_t    rebootflag = FALSE;     /* exported to kdintr */
> 
> -/* Interrupt stack.  */
> -static char int_stack[INTSTACK_SIZE] __aligned(INTSTACK_SIZE);
> -#if NCPUS <= 1
> -vm_offset_t int_stack_top[1], int_stack_base[1];
> -#endif
> +/* Interrupt stacks  */
> +extern vm_offset_t int_stack_top[], int_stack_base[];
> +extern uint8_t solid_intstack[];    /* bottom */
> 
>  #ifdef LINUX_DEV
>  extern void linux_init(void);
> @@ -171,15 +169,16 @@ void machine_init(void)
>       hyp_init();
>  #else        /* MACH_HYP */
> 
> -#if defined(APIC)
> -     if (acpi_apic_init() != ACPI_SUCCESS) {
> -             panic("APIC not found, unable to boot");
> -     }
> -     ioapic_configure();
> -     lapic_enable_timer();
>  #if (NCPUS > 1)
> +     acpi_apic_init();
>       smp_init();
> +#endif
> +#if defined(APIC)
> +     ioapic_configure();
> +#endif
> +     startrtclock();
> 
> +#if defined(APIC)
>  #warning FIXME: Rather unmask them from their respective drivers
>       /* kd */
>       unmask_irq(1);
> @@ -187,8 +186,7 @@ void machine_init(void)
>       unmask_irq(4);
>       /* com1 */
>       unmask_irq(3);
> -#endif /* NCPUS > 1 */
> -#endif /* APIC */
> +#endif
> 
>  #ifdef LINUX_DEV
>       /*
> @@ -483,8 +481,7 @@ i386at_init(void)
>       hyp_p2m_init();
>  #endif       /* MACH_XEN */
> 
> -     int_stack_base[0] = (vm_offset_t)&int_stack;
> -     int_stack_top[0] = int_stack_base[0] + INTSTACK_SIZE - 4;
> +     interrupt_stack_alloc();
>  }
> 
>  /*
> @@ -576,7 +573,6 @@ void c_boot_entry(vm_offset_t bi)
>  #endif       /* MACH_KDB */
> 
>       machine_slot[0].is_cpu = TRUE;
> -     machine_slot[0].running = TRUE;
>       machine_slot[0].cpu_subtype = CPU_SUBTYPE_AT386;
> 
>       switch (cpu_type)
> @@ -622,8 +618,11 @@ timemmap(dev_t dev, vm_offset_t off, vm_prot_t prot)
>  void
>  startrtclock(void)
>  {
> -#ifndef APIC
> +#ifdef APIC
> +     lapic_enable_timer();
> +#else
>       clkstart();
> +     unmask_irq(0);
>  #endif
>  }
> 
> --
> 2.34.1
> 
> 
> 

-- 
Samuel
---
Pour une évaluation indépendante, transparente et rigoureuse !
Je soutiens la Commission d'Évaluation de l'Inria.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]