qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] ioapic: allow buggy guests mishandling level-trigge


From: Vitaly Kuznetsov
Subject: [Qemu-devel] [PATCH] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
Date: Mon, 1 Apr 2019 15:36:59 +0200

It was found that Hyper-V 2016 on KVM in some configurations (q35 machine +
piix4-usb-uhci) hangs on boot. Trace analysis led us to the conclusion that
it is mishandling level-triggered interrupt performing EOI without fixing
the root cause. This causes immediate re-assertion and L2 VM (which is
supposedly expected to fix the cause of the interrupt) is not making any
progress.

Gory details: https://www.spinics.net/lists/kvm/msg184484.html

Turns out we were dealing with similar issues before; in-kernel IOAPIC
implementation has commit 184564efae4d ("kvm: ioapic: conditionally delay
irq delivery duringeoi broadcast") which describes a very similar issue.

Steal the idea from the above mentioned commit for IOAPIC implementation in
QEMU. SUCCESSIVE_IRQ_MAX_COUNT, delay and the comment are borrowed as well.

Signed-off-by: Vitaly Kuznetsov <address@hidden>
---
 hw/intc/ioapic.c                  | 43 ++++++++++++++++++++++++++++++-
 hw/intc/trace-events              |  1 +
 include/hw/i386/ioapic_internal.h |  3 +++
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 9d75f84d3b..daf45cc8a8 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -139,6 +139,15 @@ static void ioapic_service(IOAPICCommonState *s)
     }
 }
 
+#define SUCCESSIVE_IRQ_MAX_COUNT 10000
+
+static void ioapic_timer(void *opaque)
+{
+    IOAPICCommonState *s = opaque;
+
+    ioapic_service(s);
+}
+
 static void ioapic_set_irq(void *opaque, int vector, int level)
 {
     IOAPICCommonState *s = opaque;
@@ -227,7 +236,28 @@ void ioapic_eoi_broadcast(int vector)
                 trace_ioapic_clear_remote_irr(n, vector);
                 s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
                 if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
-                    ioapic_service(s);
+                    bool level = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1)
+                        == IOAPIC_TRIGGER_LEVEL;
+
+                    ++s->irq_reassert[vector];
+                    if (!level ||
+                        s->irq_reassert[vector] < SUCCESSIVE_IRQ_MAX_COUNT) {
+                        ioapic_service(s);
+                    } else {
+                        /*
+                         * Real hardware does not deliver the interrupt
+                         * immediately during eoi broadcast, and this lets a
+                         * buggy guest make slow progress even if it does not
+                         * correctly handle a level-triggered interrupt. 
Emulate
+                         * this behavior if we detect an interrupt storm.
+                         */
+                        trace_ioapic_eoi_delayed_reassert(vector);
+                        timer_mod(s->timer,
+                                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+                                  NANOSECONDS_PER_SECOND / 100);
+                    }
+                } else {
+                    s->irq_reassert[vector] = 0;
                 }
             }
         }
@@ -401,6 +431,8 @@ static void ioapic_realize(DeviceState *dev, Error **errp)
     memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
                           "ioapic", 0x1000);
 
+    s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ioapic_timer, s);
+
     qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
 
     ioapics[ioapic_no] = s;
@@ -408,6 +440,14 @@ static void ioapic_realize(DeviceState *dev, Error **errp)
     qemu_add_machine_init_done_notifier(&s->machine_done);
 }
 
+static void ioapic_unrealize(DeviceState *dev, Error **errp)
+{
+    IOAPICCommonState *s = IOAPIC_COMMON(dev);
+
+    timer_del(s->timer);
+    timer_free(s->timer);
+}
+
 static Property ioapic_properties[] = {
     DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF),
     DEFINE_PROP_END_OF_LIST(),
@@ -419,6 +459,7 @@ static void ioapic_class_init(ObjectClass *klass, void 
*data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->realize = ioapic_realize;
+    k->unrealize = ioapic_unrealize;
     /*
      * If APIC is in kernel, we need to update the kernel cache after
      * migration, otherwise first 24 gsi routes will be invalid.
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index a28bdce925..90c9d07c1a 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -25,6 +25,7 @@ apic_mem_writel(uint64_t addr, uint32_t val) "0x%"PRIx64" = 
0x%08x"
 ioapic_set_remote_irr(int n) "set remote irr for pin %d"
 ioapic_clear_remote_irr(int n, int vector) "clear remote irr for pin %d vector 
%d"
 ioapic_eoi_broadcast(int vector) "EOI broadcast for vector %d"
+ioapic_eoi_delayed_reassert(int vector) "delayed reassert on EOI broadcast for 
vector %d"
 ioapic_mem_read(uint8_t addr, uint8_t regsel, uint8_t size, uint32_t val) 
"ioapic mem read addr 0x%"PRIx8" regsel: 0x%"PRIx8" size 0x%"PRIx8" retval 
0x%"PRIx32
 ioapic_mem_write(uint8_t addr, uint8_t regsel, uint8_t size, uint32_t val) 
"ioapic mem write addr 0x%"PRIx8" regsel: 0x%"PRIx8" size 0x%"PRIx8" val 
0x%"PRIx32
 ioapic_set_irq(int vector, int level) "vector: %d level: %d"
diff --git a/include/hw/i386/ioapic_internal.h 
b/include/hw/i386/ioapic_internal.h
index 9848f391bb..e0ee88db40 100644
--- a/include/hw/i386/ioapic_internal.h
+++ b/include/hw/i386/ioapic_internal.h
@@ -96,6 +96,7 @@ typedef struct IOAPICCommonClass {
     SysBusDeviceClass parent_class;
 
     DeviceRealize realize;
+    DeviceUnrealize unrealize;
     void (*pre_save)(IOAPICCommonState *s);
     void (*post_load)(IOAPICCommonState *s);
 } IOAPICCommonClass;
@@ -111,6 +112,8 @@ struct IOAPICCommonState {
     uint8_t version;
     uint64_t irq_count[IOAPIC_NUM_PINS];
     int irq_level[IOAPIC_NUM_PINS];
+    int irq_reassert[IOAPIC_NUM_PINS];
+    QEMUTimer *timer;
 };
 
 void ioapic_reset_common(DeviceState *dev);
-- 
2.20.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]