[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type
From: |
Michael S. Tsirkin |
Subject: |
Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type |
Date: |
Thu, 25 Jul 2019 06:03:05 -0400 |
On Wed, Jul 24, 2019 at 01:14:35PM +0200, Paolo Bonzini wrote:
> On 23/07/19 12:01, Paolo Bonzini wrote:
> > The number of buses is determined by the firmware, not by QEMU, so
> > fw_cfg would not be the right interface. In fact (as I have just
> > learnt) lastbus is an x86-specific option that overrides the last bus
> > returned by SeaBIOS's handle_1ab101.
> >
> > So the next step could be to figure out what is the lastbus returned by
> > handle_1ab101 and possibly why it isn't zero.
>
> Some update:
>
> - for 64-bit, PCIBIOS (and thus handle_1ab101) is not called. PCIBIOS is
> only used by 32-bit kernels. As a side effect, PCI expander bridges do not
> work on 32-bit kernels with ACPI disabled, because they are located beyond
> pcibios_last_bus (with ACPI enabled, the DSDT exposes them).
>
> - for -M pc, pcibios_last_bus in Linux remains -1 and no "legacy scanning" is
> done.
>
> - for -M q35, pcibios_last_bus in Linux is set based on the size of the
> MMCONFIG aperture and Linux ends up scanning all 32*255 (bus,dev) pairs
> for buses above 0.
>
> Here is a patch that only scans devfn==0, which should mostly remove the need
> for pci=lastbus=0. (Testing is welcome).
>
> Actually, KVM could probably avoid the scanning altogether. The only
> "hidden" root
> buses we expect are from PCI expander bridges and if you found an MMCONFIG
> area
> through the ACPI MCFG table, you can also use the DSDT to find PCI expander
> bridges.
> However, I am being conservative.
>
> A possible alternative could be a mechanism whereby the vmlinuz real mode
> entry
> point, or the 32-bit PVH entry point, fetch lastbus and they pass it to the
> kernel via the vmlinuz or PVH boot information structs. However, I don't
> think
> that's very useful, and there is some risk of breaking real hardware too.
>
> Paolo
>
> diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
> index 73bb404f4d2a..17012aa60d22 100644
> --- a/arch/x86/include/asm/pci_x86.h
> +++ b/arch/x86/include/asm/pci_x86.h
> @@ -61,6 +61,7 @@ enum pci_bf_sort_state {
> extern struct pci_ops pci_root_ops;
>
> void pcibios_scan_specific_bus(int busn);
> +void pcibios_scan_bus_by_device(int busn);
>
> /* pci-irq.c */
>
> @@ -216,8 +217,10 @@ static inline void mmio_config_writel(void __iomem *pos,
> u32 val)
> # endif
> # define x86_default_pci_init_irq pcibios_irq_init
> # define x86_default_pci_fixup_irqs pcibios_fixup_irqs
> +# define x86_default_pci_scan_bus pcibios_scan_bus_by_device
> #else
> # define x86_default_pci_init NULL
> # define x86_default_pci_init_irq NULL
> # define x86_default_pci_fixup_irqs NULL
> +# define x86_default_pci_scan_bus NULL
> #endif
> diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
> index b85a7c54c6a1..4c3a0a17a600 100644
> --- a/arch/x86/include/asm/x86_init.h
> +++ b/arch/x86/include/asm/x86_init.h
> @@ -251,6 +251,7 @@ struct x86_hyper_runtime {
> * @save_sched_clock_state: save state for sched_clock() on suspend
> * @restore_sched_clock_state: restore state for sched_clock() on
> resume
> * @apic_post_init: adjust apic if needed
> + * @pci_scan_bus: scan a PCI bus
> * @legacy: legacy features
> * @set_legacy_features: override legacy features. Use of this callback
> * is highly discouraged. You should only need
> @@ -273,6 +274,7 @@ struct x86_platform_ops {
> void (*save_sched_clock_state)(void);
> void (*restore_sched_clock_state)(void);
> void (*apic_post_init)(void);
> + void (*pci_scan_bus)(int busn);
> struct x86_legacy_features legacy;
> void (*set_legacy_features)(void);
> struct x86_hyper_runtime hyper;
> diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
> index 6857b4577f17..b248d7036dd3 100644
> --- a/arch/x86/kernel/jailhouse.c
> +++ b/arch/x86/kernel/jailhouse.c
> @@ -11,12 +11,14 @@
> #include <linux/acpi_pmtmr.h>
> #include <linux/kernel.h>
> #include <linux/reboot.h>
> +#include <linux/pci.h>
> #include <asm/apic.h>
> #include <asm/cpu.h>
> #include <asm/hypervisor.h>
> #include <asm/i8259.h>
> #include <asm/irqdomain.h>
> #include <asm/pci_x86.h>
> +#include <asm/pci.h>
> #include <asm/reboot.h>
> #include <asm/setup.h>
> #include <asm/jailhouse_para.h>
> @@ -136,6 +138,22 @@ static int __init jailhouse_pci_arch_init(void)
> return 0;
> }
>
> +static void jailhouse_pci_scan_bus_by_function(int busn)
> +{
> + int devfn;
> + u32 l;
> +
> + for (devfn = 0; devfn < 256; devfn++) {
> + if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) &&
> + l != 0x0000 && l != 0xffff) {
> + DBG("Found device at %02x:%02x [%04x]\n", busn,
> devfn, l);
> + pr_info("PCI: Discovered peer bus %02x\n", busn);
> + pcibios_scan_root(busn);
> + return;
> + }
> + }
> +}
> +
> static void __init jailhouse_init_platform(void)
> {
> u64 pa_data = boot_params.hdr.setup_data;
> @@ -153,6 +171,7 @@ static void __init jailhouse_init_platform(void)
> x86_platform.legacy.rtc = 0;
> x86_platform.legacy.warm_reset = 0;
> x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT;
> + x86_platform.pci_scan_bus = jailhouse_pci_scan_bus_by_function;
>
> legacy_pic = &null_legacy_pic;
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 82caf01b63dd..59f7204ed8f3 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -24,6 +24,7 @@
> #include <linux/debugfs.h>
> #include <linux/nmi.h>
> #include <linux/swait.h>
> +#include <linux/pci.h>
> #include <asm/timer.h>
> #include <asm/cpu.h>
> #include <asm/traps.h>
> @@ -33,6 +34,7 @@
> #include <asm/apicdef.h>
> #include <asm/hypervisor.h>
> #include <asm/tlb.h>
> +#include <asm/pci.h>
>
> static int kvmapf = 1;
>
> @@ -621,10 +623,31 @@ static void kvm_flush_tlb_others(const struct cpumask
> *cpumask,
> native_flush_tlb_others(flushmask, info);
> }
>
> +#ifdef CONFIG_PCI
> +static void kvm_pci_scan_bus(int busn)
> +{
> + u32 l;
> +
> + /*
> + * Assume that there are no "hidden" buses, i.e. all PCI root buses
> + * have a host bridge at device 0, function 0.
> + */
> + if (!raw_pci_read(0, busn, 0, PCI_VENDOR_ID, 2, &l) &&
> + l != 0x0000 && l != 0xffff) {
> + pr_info("PCI: Discovered peer bus %02x\n", busn);
> + pcibios_scan_root(busn);
> + }
> +}
> +#endif
> +
> static void __init kvm_guest_init(void)
> {
> int i;
>
> +#ifdef CONFIG_PCI
> + x86_platform.pci_scan_bus = kvm_pci_scan_bus;
> +#endif
> +
> if (!kvm_para_available())
> return;
>
Shouldn't this happen after kvm_para_available?
In fact, let's add a CPU ID flag for this, so it's
easy to tell guest whether to scan extra buses.
What do you say?
> diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
> index 50a2b492fdd6..19e1cc2cb6e0 100644
> --- a/arch/x86/kernel/x86_init.c
> +++ b/arch/x86/kernel/x86_init.c
> @@ -118,6 +118,7 @@ struct x86_platform_ops x86_platform __ro_after_init = {
> .get_nmi_reason = default_get_nmi_reason,
> .save_sched_clock_state = tsc_save_sched_clock_state,
> .restore_sched_clock_state = tsc_restore_sched_clock_state,
> + .pci_scan_bus = x86_default_pci_scan_bus,
> .hyper.pin_vcpu = x86_op_int_noop,
> };
>
> diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
> index 467311b1eeea..6214dbce26d3 100644
> --- a/arch/x86/pci/legacy.c
> +++ b/arch/x86/pci/legacy.c
> @@ -36,14 +36,19 @@ int __init pci_legacy_init(void)
>
> void pcibios_scan_specific_bus(int busn)
> {
> - int stride = jailhouse_paravirt() ? 1 : 8;
> - int devfn;
> - u32 l;
> -
> if (pci_find_bus(0, busn))
> return;
>
> - for (devfn = 0; devfn < 256; devfn += stride) {
> + x86_platform.pci_scan_bus(busn);
> +}
> +EXPORT_SYMBOL_GPL(pcibios_scan_specific_bus);
> +
> +void pcibios_scan_bus_by_device(int busn)
> +{
> + int devfn;
> + u32 l;
> +
> + for (devfn = 0; devfn < 256; devfn += 8) {
> if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) &&
> l != 0x0000 && l != 0xffff) {
> DBG("Found device at %02x:%02x [%04x]\n", busn, devfn,
> l);
> @@ -53,7 +58,6 @@ void pcibios_scan_specific_bus(int busn)
> }
> }
> }
> -EXPORT_SYMBOL_GPL(pcibios_scan_specific_bus);
>
> static int __init pci_subsys_init(void)
> {
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, (continued)
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Sergio Lopez, 2019/07/18
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Stefan Hajnoczi, 2019/07/19
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Sergio Lopez, 2019/07/19
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Stefan Hajnoczi, 2019/07/19
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Montes, Julio, 2019/07/19
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Sergio Lopez, 2019/07/23
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Stefan Hajnoczi, 2019/07/23
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Paolo Bonzini, 2019/07/23
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Paolo Bonzini, 2019/07/24
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Sergio Lopez, 2019/07/25
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type,
Michael S. Tsirkin <=
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Paolo Bonzini, 2019/07/25
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Michael S. Tsirkin, 2019/07/25
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Paolo Bonzini, 2019/07/25
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Michael S. Tsirkin, 2019/07/25
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Michael S. Tsirkin, 2019/07/25
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Paolo Bonzini, 2019/07/26
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Michael S. Tsirkin, 2019/07/26
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Stefano Garzarella, 2019/07/23
- Re: [Qemu-devel] [PATCH v3 0/4] Introduce the microvm machine type, Stefano Garzarella, 2019/07/24