qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH qemu v2 2/2] ppc/spapr: Implement H_WATCHDOG


From: Cédric Le Goater
Subject: Re: [PATCH qemu v2 2/2] ppc/spapr: Implement H_WATCHDOG
Date: Mon, 20 Jun 2022 08:23:44 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.9.0

On 6/20/22 05:13, Alexey Kardashevskiy wrote:


On 6/18/22 21:01, Cédric Le Goater wrote:
On 6/17/22 08:07, Alexey Kardashevskiy wrote:
The new PAPR 2.12 defines a watchdog facility managed via the new
H_WATCHDOG hypercall.

This adds H_WATCHDOG support which a proposed driver for pseries uses:
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=303120

This was tested by running QEMU with a debug kernel and command line:
-append \
  "pseries-wdt.timeout=60 pseries-wdt.nowayout=1 pseries-wdt.action=2"

and running "echo V > /dev/watchdog0" inside the VM.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Changes:
v2:
* QOM'ed timers, "action" and "expire" are available via QMP
* removed @timeout from SpaprWatchdog
* moved the driver to hw/watchdog
* fixed error handling in the hcall handler
* used new SETFIELD/GETFIELD
---
  include/hw/ppc/spapr.h       |  29 +++-
  hw/ppc/spapr.c               |   4 +
  hw/watchdog/spapr_watchdog.c | 248 +++++++++++++++++++++++++++++++++++
  hw/watchdog/meson.build      |   1 +
  hw/watchdog/trace-events     |   7 +
  5 files changed, 288 insertions(+), 1 deletion(-)
  create mode 100644 hw/watchdog/spapr_watchdog.c

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 072dda2c7265..ef1e38abd5c7 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -164,6 +164,25 @@ struct SpaprMachineClass {
      SpaprIrq *irq;
  };
+#define WDT_MAX_WATCHDOGS       4      /* Maximum number of watchdog devices */
+
+#define WDT_HARD_POWER_OFF      0
+#define WDT_HARD_RESTART        1
+#define WDT_DUMP_RESTART        2
+
+#define TYPE_SPAPR_WDT "spapr-wdt"
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprWatchdog, SPAPR_WDT)
+
+typedef struct SpaprWatchdog {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    unsigned num;

uint8_t should be enough no ? I see num is only used for trace events.


It should but why? It is not migrating, and using uint8_t creates alignment gap 
here, and no benefit :) And I am removing it anyway, see below.


+    QEMUTimer timer;
+    uint8_t action;
+} SpaprWatchdog;
+
  /**
   * SpaprMachineState:
   */
@@ -264,6 +283,8 @@ struct SpaprMachineState {
      uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
      Error *fwnmi_migration_blocker;
+
+    SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
  };
  #define H_SUCCESS         0
@@ -344,6 +365,7 @@ struct SpaprMachineState {
  #define H_P7              -60
  #define H_P8              -61
  #define H_P9              -62
+#define H_NOOP            -63
  #define H_UNSUPPORTED     -67
  #define H_OVERLAP         -68
  #define H_UNSUPPORTED_FLAG -256
@@ -564,8 +586,9 @@ struct SpaprMachineState {
  #define H_SCM_HEALTH            0x400
  #define H_RPT_INVALIDATE        0x448
  #define H_SCM_FLUSH             0x44C
+#define H_WATCHDOG              0x45C
-#define MAX_HCALL_OPCODE        H_SCM_FLUSH
+#define MAX_HCALL_OPCODE        H_WATCHDOG
  /* The hcalls above are standardized in PAPR and implemented by pHyp
   * as well.
@@ -1027,6 +1050,7 @@ extern const VMStateDescription 
vmstate_spapr_cap_large_decr;
  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
  extern const VMStateDescription vmstate_spapr_cap_fwnmi;
  extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
+extern const VMStateDescription vmstate_spapr_wdt;
  static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
  {
@@ -1063,4 +1087,7 @@ target_ulong 
spapr_vof_client_architecture_support(MachineState *ms,
                                                     target_ulong ovec_addr);
  void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt);
+/* H_WATCHDOG */
+void spapr_watchdog_init(SpaprMachineState *spapr);
+
  #endif /* HW_SPAPR_H */
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index fd4942e8813c..9a5382d5270f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -898,6 +898,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void 
*fdt)
          add_str(hypertas, "hcall-hpt-resize");
      }
+    add_str(hypertas, "hcall-watchdog");
+
      _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
                       hypertas->str, hypertas->len));
      g_string_free(hypertas, TRUE);
@@ -3051,6 +3053,8 @@ static void spapr_machine_init(MachineState *machine)
          spapr->vof->fw_size = fw_size; /* for claim() on itself */
          spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
      }
+
+    spapr_watchdog_init(spapr);
  }
  #define DEFAULT_KVM_TYPE "auto"
diff --git a/hw/watchdog/spapr_watchdog.c b/hw/watchdog/spapr_watchdog.c
new file mode 100644
index 000000000000..aeaf7c52cbad
--- /dev/null
+++ b/hw/watchdog/spapr_watchdog.c
@@ -0,0 +1,248 @@
+/*
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "migration/vmstate.h"
+#include "trace.h"
+
+#include "hw/ppc/spapr.h"
+
+/*
+ * Bits 47: "leaveOtherWatchdogsRunningOnTimeout", specified on
+ * the "Start watchdog" operation,
+ * 0 - stop out-standing watchdogs on timeout,
+ * 1 - leave outstanding watchdogs running on timeout
+ */
+#define PSERIES_WDTF_LEAVE_OTHER    PPC_BIT(47)
+
+/*    Bits 48-55: "operation" */
+#define PSERIES_WDTF_OP(op)             SETFIELD(PPC_BITMASK(48, 55), 0, (op))
+#define PSERIES_WDTF_OP_START           PSERIES_WDTF_OP(0x1)
+#define PSERIES_WDTF_OP_STOP            PSERIES_WDTF_OP(0x2)
+#define PSERIES_WDTF_OP_QUERY           PSERIES_WDTF_OP(0x3)
+#define PSERIES_WDTF_OP_QUERY_LPM       PSERIES_WDTF_OP(0x4)
+
+/*    Bits 56-63: "timeoutAction" */
+#define PSERIES_WDTF_ACTION(ac)         SETFIELD(PPC_BITMASK(56, 63), 0, (ac))
+#define PSERIES_WDTF_ACTION_HARD_POWER_OFF  PSERIES_WDTF_ACTION(0x1)
+#define PSERIES_WDTF_ACTION_HARD_RESTART    PSERIES_WDTF_ACTION(0x2)
+#define PSERIES_WDTF_ACTION_DUMP_RESTART    PSERIES_WDTF_ACTION(0x3)
+#define PSERIES_WDTF_RESERVED           PPC_BITMASK(0, 46)
+
+/*
+ * For the "Query watchdog capabilities" operation, a uint64 structure
+ * defined as:
+ * Bits 0-15: The minimum supported timeout in milliseconds
+ * Bits 16-31: The number of watchdogs supported
+ * Bits 32-63: Reserved
+ */
+#define PSERIES_WDTQ_MIN_TIMEOUT(ms)    SETFIELD(PPC_BITMASK(0, 15), 0, (ms))
+#define PSERIES_WDTQ_NUM(n)             SETFIELD(PPC_BITMASK(16, 31), 0, (n))
+
+/*
+ * For the "Query watchdog LPM requirement" operation:
+ * 1 = The given "watchdogNumber" must be stopped prior to suspending
+ * 2 = The given "watchdogNumber" does not have to be stopped prior to
+ * suspending
+ */
+#define PSERIES_WDTQL_STOPPED               1
+#define PSERIES_WDTQL_QUERY_NOT_STOPPED     2
+
+#define WDT_MIN_TIMEOUT 1 /* 1ms */
+
+static void watchdog_expired(void *pw)
+{
+    struct SpaprWatchdog *w = pw;

s/struct//

+    CPUState *cs;
+
+    trace_spapr_watchdog_expired(w->num, w->action);
+    switch (w->action) {
+    case WDT_HARD_POWER_OFF:
+        qemu_system_vmstop_request(RUN_STATE_SHUTDOWN);
+        break;
+    case WDT_HARD_RESTART:
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+        break;
+    case WDT_DUMP_RESTART:
+        CPU_FOREACH(cs) {
+            async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, 
RUN_ON_CPU_NULL);
+        }
+        break;
+    }
+}
+
+static target_ulong watchdog_stop(unsigned watchdogNumber,
+                                  struct SpaprWatchdog *w)
+{
+    target_ulong ret = H_NOOP;
+
+    if (timer_pending(&w->timer)) {
+        timer_del(&w->timer);
+        ret = H_SUCCESS;
+    }
+    trace_spapr_watchdog_stop(watchdogNumber, ret);
+
+    return ret;
+}
+
+static target_ulong h_watchdog(PowerPCCPU *cpu,
+                               SpaprMachineState *spapr,
+                               target_ulong opcode, target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong watchdogNumber = args[1];
+    target_ulong timeoutInMs = args[2];
+    unsigned operation = flags & PSERIES_WDTF_OP(~0);
+    unsigned timeoutAction = flags & PSERIES_WDTF_ACTION(~0);
+    struct SpaprWatchdog *w;
+
+    if (flags & PSERIES_WDTF_RESERVED) {
+        return H_PARAMETER;
+    }
+
+    switch (operation) {
+    case PSERIES_WDTF_OP_START:
+        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
+            return H_P2;
+        }
+        if (timeoutInMs <= WDT_MIN_TIMEOUT) {
+            return H_P3;
+        }
+
+        w = &spapr->wds[watchdogNumber - 1];

So first index is 1 in PAPR ...


Yes, I thought I commented on this somewhere but lost in rebases.



+        switch (timeoutAction) {
+        case PSERIES_WDTF_ACTION_HARD_POWER_OFF:
+            w->action = WDT_HARD_POWER_OFF;
+            break;
+        case PSERIES_WDTF_ACTION_HARD_RESTART:
+            w->action = WDT_HARD_RESTART;
+            break;
+        case PSERIES_WDTF_ACTION_DUMP_RESTART:
+            w->action = WDT_DUMP_RESTART;
+            break;
+        default:
+            return H_PARAMETER;
+        }
+        timer_mod(&w->timer,
+                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + timeoutInMs);
+        trace_spapr_watchdog_start(flags, watchdogNumber, timeoutInMs);
+        break;
+    case PSERIES_WDTF_OP_STOP:
+        if (watchdogNumber == (uint64_t) ~0) {

May be add a define for this special value. It's better for readability.

Will do.


+            int i;
+
+            for (i = 1; i <= ARRAY_SIZE(spapr->wds); ++i) {
+                watchdog_stop(i, &spapr->wds[i - 1]);
+            }
+        } else if (watchdogNumber <= ARRAY_SIZE(spapr->wds)) {
+            watchdog_stop(watchdogNumber, &spapr->wds[watchdogNumber - 1]);
+        } else {
+            return H_P2;
+        }
+        break;
+    case PSERIES_WDTF_OP_QUERY:
+        args[0] = PSERIES_WDTQ_MIN_TIMEOUT(WDT_MIN_TIMEOUT) |
+            PSERIES_WDTQ_NUM(ARRAY_SIZE(spapr->wds));
+        trace_spapr_watchdog_query(args[0]);
+        break;
+    case PSERIES_WDTF_OP_QUERY_LPM:
+        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
+            return H_P2;
+        }
+        args[0] = PSERIES_WDTQL_QUERY_NOT_STOPPED;
+        trace_spapr_watchdog_query_lpm(args[0]);
+        break;
+    default:
+        return H_PARAMETER;
+    }
+
+    return H_SUCCESS;
+}
+
+void spapr_watchdog_init(SpaprMachineState *spapr)

This could have a 'Error **errp' parameter.


I was repeating somewhat similar spapr_rtc_create(), and the called - 
spapr_machine_init() - does not have *errp. Seems pointless as it fails - 
something is horrendously broken.


Well, it's up to the caller to decide what to do in case of
errors. If in this case, it is "log an error and exit",
I would simply :

  spapr_watchdog_init(spapr, &error_fatal);

But it is not necessarily fatal to fail to initialize some
device.




+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(spapr->wds); ++i) {
+        char name[16];g_autofree char *name = g_strdup_printf("wdt%d", i + 1);

+        SpaprWatchdog *w = &spapr->wds[i];
+
+        w->num = i + 1;

it should be a property.

This cannot change and used only for tracing, and the QOM name has the number 
as well. I am replacing it with

SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
unsigned num = w - spapr->wds;

and removing the @num from the struct.

Nice !

Thanks

C.



Thanks,


Thanks,

C.

+        snprintf(name, sizeof(name) - 1, "wdt%d", i + 1);
+        object_initialize_child_with_props(OBJECT(spapr), name, w,
+                                           sizeof(SpaprWatchdog),
+                                           TYPE_SPAPR_WDT,
+                                           &error_fatal, NULL);
+        qdev_realize(DEVICE(w), NULL, &error_fatal);
+    }
+}
+
+static bool watchdog_needed(void *opaque)
+{
+    SpaprWatchdog *w = opaque;
+
+    return timer_pending(&w->timer);
+}
+
+static const VMStateDescription vmstate_wdt = {
+    .name = "spapr_watchdog",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = watchdog_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(action, SpaprWatchdog),
+        VMSTATE_TIMER(timer, SpaprWatchdog),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void spapr_wdt_realize(DeviceState *dev, Error **errp)
+{
+    SpaprWatchdog *w = SPAPR_WDT(dev);
+
+    timer_init_ms(&w->timer, QEMU_CLOCK_VIRTUAL, watchdog_expired, w);
+
+    object_property_add_uint64_ptr(OBJECT(dev), "expire",
+                                   (uint64_t *)&w->timer.expire_time,
+                                   OBJ_PROP_FLAG_READ);
+    object_property_add_uint8_ptr(OBJECT(dev), "action", &w->action,
+                                  OBJ_PROP_FLAG_READ);
+}
+
+static void spapr_wdt_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->renualize = spapr_wdt_realize;
+    dc->vmsd = &vmstate_wdt;
+    dc->user_creatable = false;
+}
+
+static const TypeInfo spapr_wdt_info = {
+    .name          = TYPE_SPAPR_WDT,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(SpaprWatchdog),
+    .class_init    = spapr_wdt_class_init,
+};
+
+static void spapr_watchdog_register_types(void)
+{
+    spapr_register_hypercall(H_WATCHDOG, h_watchdog);
+    type_register_static(&spapr_wdt_info);
+}
+
+type_init(spapr_watchdog_register_types)
diff --git a/hw/watchdog/meson.build b/hw/watchdog/meson.build
index 054c403dea7c..8974b5cf4c8a 100644
--- a/hw/watchdog/meson.build
+++ b/hw/watchdog/meson.build
@@ -6,3 +6,4 @@ softmmu_ss.add(when: 'CONFIG_WDT_DIAG288', if_true: 
files('wdt_diag288.c'))
  softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('wdt_aspeed.c'))
  softmmu_ss.add(when: 'CONFIG_WDT_IMX2', if_true: files('wdt_imx2.c'))
  softmmu_ss.add(when: 'CONFIG_WDT_SBSA', if_true: files('sbsa_gwdt.c'))
+specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_watchdog.c'))
diff --git a/hw/watchdog/trace-events b/hw/watchdog/trace-events
index e7523e22aaf2..89ccbcfdfd20 100644
--- a/hw/watchdog/trace-events
+++ b/hw/watchdog/trace-events
@@ -9,3 +9,10 @@ cmsdk_apb_watchdog_lock(uint32_t lock) "CMSDK APB watchdog: lock 
%" PRIu32
  # wdt-aspeed.c
  aspeed_wdt_read(uint64_t addr, uint32_t size) "@0x%" PRIx64 " size=%d"
  aspeed_wdt_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " 
size=%d value=0x%"PRIx64
+
+# spapr_watchdog.c
+spapr_watchdog_start(uint64_t flags, uint64_t num, uint64_t timeout) "Flags 0x%" PRIx64 " num=%" 
PRId64 " %" PRIu64 "ms"
+spapr_watchdog_stop(uint64_t num, uint64_t ret) "num=%" PRIu64 " ret=%" PRId64
+spapr_watchdog_query(uint64_t caps) "caps=0x%" PRIx64
+spapr_watchdog_query_lpm(uint64_t caps) "caps=0x%" PRIx64
+spapr_watchdog_expired(uint64_t num, unsigned action) "num=%" PRIu64 " 
action=%u"





reply via email to

[Prev in Thread] Current Thread [Next in Thread]