2022-04-25 14:32:46

by Sebastian Ene

[permalink] [raw]
Subject: [PATCH v3 2/2] misc: Add a mechanism to detect stalls on guest vCPUs

This driver creates per-cpu hrtimers which are required to do the
periodic 'pet' operation. On a conventional watchdog-core driver, the
userspace is responsible for delivering the 'pet' events by writing to
the particular /dev/watchdogN node. In this case we require a strong
thread affinity to be able to account for lost time on a per vCPU.

This part of the driver is the 'frontend' which is reponsible for
delivering the periodic 'pet' events, configuring the virtual peripheral
and listening for cpu hotplug events. The other part of the driver
handles the peripheral emulation and this part accounts for lost time by
looking at the /proc/{}/task/{}/stat entries and is located here:
https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3548817

Signed-off-by: Sebastian Ene <[email protected]>
---
drivers/misc/Kconfig | 12 +++
drivers/misc/Makefile | 1 +
drivers/misc/vm-wdt.c | 207 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 220 insertions(+)
create mode 100644 drivers/misc/vm-wdt.c

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 2b9572a6d114..71c173e3f064 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -493,6 +493,18 @@ config OPEN_DICE

If unsure, say N.

+config VM_WATCHDOG
+ tristate "Virtual Machine Watchdog"
+ select LOCKUP_DETECTOR
+ help
+ Detect CPU locks on the virtual machine. This driver relies on the
+ hrtimers which are CPU-binded to do the 'pet' operation. When a vCPU
+ has to do a 'pet', it exists the guest through MMIO write and the
+ backend driver takes into account the lost ticks for this particular
+ CPU.
+ To compile this driver as a module, choose M here: the
+ module will be called vm-wdt.
+
source "drivers/misc/c2port/Kconfig"
source "drivers/misc/eeprom/Kconfig"
source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 2ec634354cf5..fa9d644da5db 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -59,3 +59,4 @@ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o
obj-$(CONFIG_UID_SYS_STATS) += uid_sys_stats.o
obj-$(CONFIG_OPEN_DICE) += open-dice.o
+obj-$(CONFIG_VM_WATCHDOG) += vm-wdt.o
\ No newline at end of file
diff --git a/drivers/misc/vm-wdt.c b/drivers/misc/vm-wdt.c
new file mode 100644
index 000000000000..0c4df2fefbb9
--- /dev/null
+++ b/drivers/misc/vm-wdt.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Virtual watchdog driver.
+// Copyright (C) Google, 2022
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/nmi.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/param.h>
+#include <linux/percpu.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define DRV_NAME "vm_wdt"
+
+#define VMWDT_REG_STATUS (0x00)
+#define VMWDT_REG_LOAD_CNT (0x04)
+#define VMWDT_REG_CURRENT_CNT (0x08)
+#define VMWDT_REG_CLOCK_FREQ_HZ (0x0C)
+#define VMWDT_REG_LEN (0x10)
+
+#define VMWDT_DEFAULT_CLOCK_HZ (10)
+#define VMWDT_DEFAULT_TIMEOT_SEC (8)
+
+struct vm_wdt_s {
+ void __iomem *membase;
+ u32 clock_freq;
+ u32 expiration_sec;
+ u32 ping_timeout_ms;
+ struct hrtimer per_cpu_hrtimer;
+ struct platform_device *dev;
+};
+
+#define vmwdt_reg_write(wdt, reg, value) \
+ iowrite32((value), (wdt)->membase + (reg))
+#define vmwdt_reg_read(wdt, reg) \
+ io32read((wdt)->membase + (reg))
+
+static struct platform_device *virt_dev;
+
+static enum hrtimer_restart vmwdt_timer_fn(struct hrtimer *hrtimer)
+{
+ struct vm_wdt_s *cpu_wdt;
+ u32 ticks;
+
+ cpu_wdt = container_of(hrtimer, struct vm_wdt_s, per_cpu_hrtimer);
+ ticks = cpu_wdt->clock_freq * cpu_wdt->expiration_sec;
+ vmwdt_reg_write(cpu_wdt, VMWDT_REG_LOAD_CNT, ticks);
+ hrtimer_forward_now(hrtimer, ms_to_ktime(cpu_wdt->ping_timeout_ms));
+
+ return HRTIMER_RESTART;
+}
+
+static void vmwdt_start(void *arg)
+{
+ u32 ticks;
+ struct vm_wdt_s *cpu_wdt = arg;
+ struct hrtimer *hrtimer = &cpu_wdt->per_cpu_hrtimer;
+
+ vmwdt_reg_write(cpu_wdt, VMWDT_REG_CLOCK_FREQ_HZ,
+ cpu_wdt->clock_freq);
+
+ /* Compute the number of ticks required for the watchdog counter
+ * register based on the internal clock frequency and the watchdog
+ * timeout given from the device tree.
+ */
+ ticks = cpu_wdt->clock_freq * cpu_wdt->expiration_sec;
+ vmwdt_reg_write(cpu_wdt, VMWDT_REG_LOAD_CNT, ticks);
+
+ /* Enable the internal clock and start the watchdog */
+ vmwdt_reg_write(cpu_wdt, VMWDT_REG_STATUS, 1);
+
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer->function = vmwdt_timer_fn;
+ hrtimer_start(hrtimer, ms_to_ktime(cpu_wdt->ping_timeout_ms),
+ HRTIMER_MODE_REL_PINNED);
+}
+
+static void vmwdt_stop(void *arg)
+{
+ struct vm_wdt_s *cpu_wdt = arg;
+ struct hrtimer *hrtimer = &cpu_wdt->per_cpu_hrtimer;
+
+ hrtimer_cancel(hrtimer);
+
+ /* Disable the watchdog */
+ vmwdt_reg_write(cpu_wdt, VMWDT_REG_STATUS, 0);
+}
+
+static int start_watchdog_on_cpu(unsigned int cpu)
+{
+ struct vm_wdt_s *vm_wdt = platform_get_drvdata(virt_dev);
+
+ vmwdt_start(this_cpu_ptr(vm_wdt));
+ return 0;
+}
+
+static int stop_watchdog_on_cpu(unsigned int cpu)
+{
+ struct vm_wdt_s *vm_wdt = platform_get_drvdata(virt_dev);
+
+ vmwdt_stop(this_cpu_ptr(vm_wdt));
+ return 0;
+}
+
+static int vmwdt_probe(struct platform_device *dev)
+{
+ int cpu, ret, err;
+ void __iomem *membase;
+ struct resource *r;
+ struct vm_wdt_s *vm_wdt;
+ u32 wdt_clock, wdt_timeout_sec = 0;
+
+ r = platform_get_resource(dev, IORESOURCE_MEM, 0);
+ if (r == NULL)
+ return -ENOENT;
+
+ vm_wdt = alloc_percpu(typeof(struct vm_wdt_s));
+ if (!vm_wdt)
+ return -ENOMEM;
+
+ membase = ioremap(r->start, resource_size(r));
+ if (!membase) {
+ ret = -ENXIO;
+ goto err_withmem;
+ }
+
+ virt_dev = dev;
+ platform_set_drvdata(dev, vm_wdt);
+ if (of_property_read_u32(dev->dev.of_node, "clock", &wdt_clock))
+ wdt_clock = VMWDT_DEFAULT_CLOCK_HZ;
+
+ if (of_property_read_u32(dev->dev.of_node, "timeout-sec",
+ &wdt_timeout_sec))
+ wdt_timeout_sec = VMWDT_DEFAULT_TIMEOT_SEC;
+
+ for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) {
+ struct vm_wdt_s *cpu_wdt = per_cpu_ptr(vm_wdt, cpu);
+
+ cpu_wdt->membase = membase + cpu * VMWDT_REG_LEN;
+ cpu_wdt->clock_freq = wdt_clock;
+ cpu_wdt->expiration_sec = wdt_timeout_sec;
+ cpu_wdt->ping_timeout_ms = wdt_timeout_sec * MSEC_PER_SEC / 2;
+ smp_call_function_single(cpu, vmwdt_start, cpu_wdt, true);
+ }
+
+ err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "virt/watchdog:online",
+ start_watchdog_on_cpu,
+ stop_watchdog_on_cpu);
+ if (err < 0) {
+ pr_warn("could not be initialized");
+ ret = err;
+ goto err_withmem;
+ }
+
+ return 0;
+
+err_withmem:
+ free_percpu(vm_wdt);
+ return ret;
+}
+
+static int vmwdt_remove(struct platform_device *dev)
+{
+ int cpu;
+ struct vm_wdt_s *vm_wdt = platform_get_drvdata(dev);
+
+ for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) {
+ struct vm_wdt_s *cpu_wdt = per_cpu_ptr(vm_wdt, cpu);
+
+ smp_call_function_single(cpu, vmwdt_stop, cpu_wdt, true);
+ }
+
+ free_percpu(vm_wdt);
+ return 0;
+}
+
+static const struct of_device_id vmwdt_of_match[] = {
+ { .compatible = "qemu,vm-watchdog", },
+ {}
+};
+
+MODULE_DEVICE_TABLE(of, vm_watchdog_of_match);
+
+static struct platform_driver vmwdt_driver = {
+ .probe = vmwdt_probe,
+ .remove = vmwdt_remove,
+ .driver = {
+ .name = DRV_NAME,
+ .of_match_table = vmwdt_of_match,
+ },
+};
+
+module_platform_driver(vmwdt_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sebastian Ene <[email protected]>");
+MODULE_DESCRIPTION("Virtual watchdog driver");
--
2.36.0.rc2.479.g8af0fa9b8e-goog


2022-04-25 18:38:02

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] misc: Add a mechanism to detect stalls on guest vCPUs

On Mon, Apr 25, 2022 at 01:42:07PM +0000, Sebastian Ene wrote:
> This driver creates per-cpu hrtimers which are required to do the
> periodic 'pet' operation. On a conventional watchdog-core driver, the
> userspace is responsible for delivering the 'pet' events by writing to
> the particular /dev/watchdogN node. In this case we require a strong
> thread affinity to be able to account for lost time on a per vCPU.
>
> This part of the driver is the 'frontend' which is reponsible for
> delivering the periodic 'pet' events, configuring the virtual peripheral
> and listening for cpu hotplug events. The other part of the driver
> handles the peripheral emulation and this part accounts for lost time by
> looking at the /proc/{}/task/{}/stat entries and is located here:
> https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3548817
>
> Signed-off-by: Sebastian Ene <[email protected]>
> ---
> drivers/misc/Kconfig | 12 +++
> drivers/misc/Makefile | 1 +
> drivers/misc/vm-wdt.c | 207 ++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 220 insertions(+)
> create mode 100644 drivers/misc/vm-wdt.c
>
> diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
> index 2b9572a6d114..71c173e3f064 100644
> --- a/drivers/misc/Kconfig
> +++ b/drivers/misc/Kconfig
> @@ -493,6 +493,18 @@ config OPEN_DICE
>
> If unsure, say N.
>
> +config VM_WATCHDOG
> + tristate "Virtual Machine Watchdog"
> + select LOCKUP_DETECTOR
> + help
> + Detect CPU locks on the virtual machine. This driver relies on the
> + hrtimers which are CPU-binded to do the 'pet' operation. When a vCPU
> + has to do a 'pet', it exists the guest through MMIO write and the
> + backend driver takes into account the lost ticks for this particular
> + CPU.
> + To compile this driver as a module, choose M here: the
> + module will be called vm-wdt.
> +
> source "drivers/misc/c2port/Kconfig"
> source "drivers/misc/eeprom/Kconfig"
> source "drivers/misc/cb710/Kconfig"
> diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
> index 2ec634354cf5..fa9d644da5db 100644
> --- a/drivers/misc/Makefile
> +++ b/drivers/misc/Makefile
> @@ -59,3 +59,4 @@ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
> obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o
> obj-$(CONFIG_UID_SYS_STATS) += uid_sys_stats.o
> obj-$(CONFIG_OPEN_DICE) += open-dice.o
> +obj-$(CONFIG_VM_WATCHDOG) += vm-wdt.o

We have no limit on names, why not "vm-watchdog"?

> \ No newline at end of file
> diff --git a/drivers/misc/vm-wdt.c b/drivers/misc/vm-wdt.c
> new file mode 100644
> index 000000000000..0c4df2fefbb9
> --- /dev/null
> +++ b/drivers/misc/vm-wdt.c
> @@ -0,0 +1,207 @@
> +// SPDX-License-Identifier: GPL-2.0
> +//
> +// Virtual watchdog driver.
> +// Copyright (C) Google, 2022

I will need a watchdog maintainer to agree that this is the way to do
this as I really really do not understand why you can not use that
subsystem here.

> +
> +#include <linux/cpu.h>
> +#include <linux/init.h>
> +#include <linux/io.h>
> +#include <linux/kernel.h>
> +
> +#include <linux/device.h>
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/nmi.h>
> +#include <linux/of.h>
> +#include <linux/of_device.h>
> +#include <linux/param.h>
> +#include <linux/percpu.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +
> +#define DRV_NAME "vm_wdt"

KBUILD_MODNAME please

> +
> +#define VMWDT_REG_STATUS (0x00)
> +#define VMWDT_REG_LOAD_CNT (0x04)
> +#define VMWDT_REG_CURRENT_CNT (0x08)
> +#define VMWDT_REG_CLOCK_FREQ_HZ (0x0C)
> +#define VMWDT_REG_LEN (0x10)
> +
> +#define VMWDT_DEFAULT_CLOCK_HZ (10)
> +#define VMWDT_DEFAULT_TIMEOT_SEC (8)
> +
> +struct vm_wdt_s {
> + void __iomem *membase;
> + u32 clock_freq;
> + u32 expiration_sec;
> + u32 ping_timeout_ms;
> + struct hrtimer per_cpu_hrtimer;
> + struct platform_device *dev;
> +};
> +
> +#define vmwdt_reg_write(wdt, reg, value) \
> + iowrite32((value), (wdt)->membase + (reg))
> +#define vmwdt_reg_read(wdt, reg) \
> + io32read((wdt)->membase + (reg))
> +
> +static struct platform_device *virt_dev;

Only one device in the system? Please no, use the correct apis and you
will not have any limits like this.

> +
> +static enum hrtimer_restart vmwdt_timer_fn(struct hrtimer *hrtimer)
> +{
> + struct vm_wdt_s *cpu_wdt;
> + u32 ticks;
> +
> + cpu_wdt = container_of(hrtimer, struct vm_wdt_s, per_cpu_hrtimer);
> + ticks = cpu_wdt->clock_freq * cpu_wdt->expiration_sec;
> + vmwdt_reg_write(cpu_wdt, VMWDT_REG_LOAD_CNT, ticks);
> + hrtimer_forward_now(hrtimer, ms_to_ktime(cpu_wdt->ping_timeout_ms));
> +
> + return HRTIMER_RESTART;
> +}
> +
> +static void vmwdt_start(void *arg)
> +{
> + u32 ticks;
> + struct vm_wdt_s *cpu_wdt = arg;
> + struct hrtimer *hrtimer = &cpu_wdt->per_cpu_hrtimer;
> +
> + vmwdt_reg_write(cpu_wdt, VMWDT_REG_CLOCK_FREQ_HZ,
> + cpu_wdt->clock_freq);
> +
> + /* Compute the number of ticks required for the watchdog counter
> + * register based on the internal clock frequency and the watchdog
> + * timeout given from the device tree.
> + */
> + ticks = cpu_wdt->clock_freq * cpu_wdt->expiration_sec;
> + vmwdt_reg_write(cpu_wdt, VMWDT_REG_LOAD_CNT, ticks);
> +
> + /* Enable the internal clock and start the watchdog */
> + vmwdt_reg_write(cpu_wdt, VMWDT_REG_STATUS, 1);
> +
> + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> + hrtimer->function = vmwdt_timer_fn;
> + hrtimer_start(hrtimer, ms_to_ktime(cpu_wdt->ping_timeout_ms),
> + HRTIMER_MODE_REL_PINNED);
> +}
> +
> +static void vmwdt_stop(void *arg)
> +{
> + struct vm_wdt_s *cpu_wdt = arg;
> + struct hrtimer *hrtimer = &cpu_wdt->per_cpu_hrtimer;
> +
> + hrtimer_cancel(hrtimer);
> +
> + /* Disable the watchdog */
> + vmwdt_reg_write(cpu_wdt, VMWDT_REG_STATUS, 0);
> +}
> +
> +static int start_watchdog_on_cpu(unsigned int cpu)
> +{
> + struct vm_wdt_s *vm_wdt = platform_get_drvdata(virt_dev);
> +
> + vmwdt_start(this_cpu_ptr(vm_wdt));
> + return 0;
> +}
> +
> +static int stop_watchdog_on_cpu(unsigned int cpu)
> +{
> + struct vm_wdt_s *vm_wdt = platform_get_drvdata(virt_dev);
> +
> + vmwdt_stop(this_cpu_ptr(vm_wdt));
> + return 0;
> +}
> +
> +static int vmwdt_probe(struct platform_device *dev)
> +{
> + int cpu, ret, err;
> + void __iomem *membase;
> + struct resource *r;
> + struct vm_wdt_s *vm_wdt;
> + u32 wdt_clock, wdt_timeout_sec = 0;
> +
> + r = platform_get_resource(dev, IORESOURCE_MEM, 0);
> + if (r == NULL)
> + return -ENOENT;
> +
> + vm_wdt = alloc_percpu(typeof(struct vm_wdt_s));
> + if (!vm_wdt)
> + return -ENOMEM;
> +
> + membase = ioremap(r->start, resource_size(r));
> + if (!membase) {
> + ret = -ENXIO;
> + goto err_withmem;
> + }
> +
> + virt_dev = dev;
> + platform_set_drvdata(dev, vm_wdt);
> + if (of_property_read_u32(dev->dev.of_node, "clock", &wdt_clock))
> + wdt_clock = VMWDT_DEFAULT_CLOCK_HZ;
> +
> + if (of_property_read_u32(dev->dev.of_node, "timeout-sec",
> + &wdt_timeout_sec))
> + wdt_timeout_sec = VMWDT_DEFAULT_TIMEOT_SEC;
> +
> + for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) {
> + struct vm_wdt_s *cpu_wdt = per_cpu_ptr(vm_wdt, cpu);
> +
> + cpu_wdt->membase = membase + cpu * VMWDT_REG_LEN;
> + cpu_wdt->clock_freq = wdt_clock;
> + cpu_wdt->expiration_sec = wdt_timeout_sec;
> + cpu_wdt->ping_timeout_ms = wdt_timeout_sec * MSEC_PER_SEC / 2;
> + smp_call_function_single(cpu, vmwdt_start, cpu_wdt, true);
> + }
> +
> + err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> + "virt/watchdog:online",
> + start_watchdog_on_cpu,
> + stop_watchdog_on_cpu);
> + if (err < 0) {
> + pr_warn("could not be initialized");

drivers should never use pr_* calls. dev_warn() please.

thanks,

greg k-h

2022-04-26 00:51:22

by Randy Dunlap

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] misc: Add a mechanism to detect stalls on guest vCPUs



On 4/25/22 06:42, Sebastian Ene wrote:
> This driver creates per-cpu hrtimers which are required to do the
> periodic 'pet' operation. On a conventional watchdog-core driver, the
> userspace is responsible for delivering the 'pet' events by writing to
> the particular /dev/watchdogN node. In this case we require a strong
> thread affinity to be able to account for lost time on a per vCPU.
>
> This part of the driver is the 'frontend' which is reponsible for
> delivering the periodic 'pet' events, configuring the virtual peripheral
> and listening for cpu hotplug events. The other part of the driver
> handles the peripheral emulation and this part accounts for lost time by
> looking at the /proc/{}/task/{}/stat entries and is located here:
> https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3548817
>
> Signed-off-by: Sebastian Ene <[email protected]>
> ---
> drivers/misc/Kconfig | 12 +++
> drivers/misc/Makefile | 1 +
> drivers/misc/vm-wdt.c | 207 ++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 220 insertions(+)
> create mode 100644 drivers/misc/vm-wdt.c
>
> diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
> index 2b9572a6d114..71c173e3f064 100644
> --- a/drivers/misc/Kconfig
> +++ b/drivers/misc/Kconfig
> @@ -493,6 +493,18 @@ config OPEN_DICE
>
> If unsure, say N.
>
> +config VM_WATCHDOG
> + tristate "Virtual Machine Watchdog"
> + select LOCKUP_DETECTOR
> + help
> + Detect CPU locks on the virtual machine. This driver relies on the
> + hrtimers which are CPU-binded to do the 'pet' operation. When a vCPU
> + has to do a 'pet', it exists the guest through MMIO write and the

exits ?
I dunno, but it's confusing.

> + backend driver takes into account the lost ticks for this particular
> + CPU.
> + To compile this driver as a module, choose M here: the
> + module will be called vm-wdt.



--
~Randy

2022-04-28 15:40:28

by Sebastian Ene

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] misc: Add a mechanism to detect stalls on guest vCPUs

On Mon, Apr 25, 2022 at 09:02:03AM -0700, Randy Dunlap wrote:
>
>
> On 4/25/22 06:42, Sebastian Ene wrote:
> > This driver creates per-cpu hrtimers which are required to do the
> > periodic 'pet' operation. On a conventional watchdog-core driver, the
> > userspace is responsible for delivering the 'pet' events by writing to
> > the particular /dev/watchdogN node. In this case we require a strong
> > thread affinity to be able to account for lost time on a per vCPU.
> >
> > This part of the driver is the 'frontend' which is reponsible for
> > delivering the periodic 'pet' events, configuring the virtual peripheral
> > and listening for cpu hotplug events. The other part of the driver
> > handles the peripheral emulation and this part accounts for lost time by
> > looking at the /proc/{}/task/{}/stat entries and is located here:
> > https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3548817
> >
> > Signed-off-by: Sebastian Ene <[email protected]>
> > ---
> > drivers/misc/Kconfig | 12 +++
> > drivers/misc/Makefile | 1 +
> > drivers/misc/vm-wdt.c | 207 ++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 220 insertions(+)
> > create mode 100644 drivers/misc/vm-wdt.c
> >
> > diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
> > index 2b9572a6d114..71c173e3f064 100644
> > --- a/drivers/misc/Kconfig
> > +++ b/drivers/misc/Kconfig
> > @@ -493,6 +493,18 @@ config OPEN_DICE
> >
> > If unsure, say N.
> >
> > +config VM_WATCHDOG
> > + tristate "Virtual Machine Watchdog"
> > + select LOCKUP_DETECTOR
> > + help
> > + Detect CPU locks on the virtual machine. This driver relies on the
> > + hrtimers which are CPU-binded to do the 'pet' operation. When a vCPU
> > + has to do a 'pet', it exists the guest through MMIO write and the

Hi,

>
> exits ?
> I dunno, but it's confusing.
>

Yes, I will correct this typo, thanks for the notice.

> > + backend driver takes into account the lost ticks for this particular
> > + CPU.
> > + To compile this driver as a module, choose M here: the
> > + module will be called vm-wdt.
>
>
>
> --
> ~Randy

Sebastian

2022-04-28 21:51:10

by Sebastian Ene

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] misc: Add a mechanism to detect stalls on guest vCPUs

On Mon, Apr 25, 2022 at 04:15:54PM +0200, Greg Kroah-Hartman wrote:
> On Mon, Apr 25, 2022 at 01:42:07PM +0000, Sebastian Ene wrote:
> > This driver creates per-cpu hrtimers which are required to do the
> > periodic 'pet' operation. On a conventional watchdog-core driver, the
> > userspace is responsible for delivering the 'pet' events by writing to
> > the particular /dev/watchdogN node. In this case we require a strong
> > thread affinity to be able to account for lost time on a per vCPU.
> >
> > This part of the driver is the 'frontend' which is reponsible for
> > delivering the periodic 'pet' events, configuring the virtual peripheral
> > and listening for cpu hotplug events. The other part of the driver
> > handles the peripheral emulation and this part accounts for lost time by
> > looking at the /proc/{}/task/{}/stat entries and is located here:
> > https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3548817
> >
> > Signed-off-by: Sebastian Ene <[email protected]>
> > ---
> > drivers/misc/Kconfig | 12 +++
> > drivers/misc/Makefile | 1 +
> > drivers/misc/vm-wdt.c | 207 ++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 220 insertions(+)
> > create mode 100644 drivers/misc/vm-wdt.c
> >
> > diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
> > index 2b9572a6d114..71c173e3f064 100644
> > --- a/drivers/misc/Kconfig
> > +++ b/drivers/misc/Kconfig
> > @@ -493,6 +493,18 @@ config OPEN_DICE
> >
> > If unsure, say N.
> >
> > +config VM_WATCHDOG
> > + tristate "Virtual Machine Watchdog"
> > + select LOCKUP_DETECTOR
> > + help
> > + Detect CPU locks on the virtual machine. This driver relies on the
> > + hrtimers which are CPU-binded to do the 'pet' operation. When a vCPU
> > + has to do a 'pet', it exists the guest through MMIO write and the
> > + backend driver takes into account the lost ticks for this particular
> > + CPU.
> > + To compile this driver as a module, choose M here: the
> > + module will be called vm-wdt.
> > +
> > source "drivers/misc/c2port/Kconfig"
> > source "drivers/misc/eeprom/Kconfig"
> > source "drivers/misc/cb710/Kconfig"
> > diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
> > index 2ec634354cf5..fa9d644da5db 100644
> > --- a/drivers/misc/Makefile
> > +++ b/drivers/misc/Makefile
> > @@ -59,3 +59,4 @@ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
> > obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o
> > obj-$(CONFIG_UID_SYS_STATS) += uid_sys_stats.o
> > obj-$(CONFIG_OPEN_DICE) += open-dice.o
> > +obj-$(CONFIG_VM_WATCHDOG) += vm-wdt.o

Hi,

>
> We have no limit on names, why not "vm-watchdog"?
>

I will update the name to vm-watchdog.

> > \ No newline at end of file
> > diff --git a/drivers/misc/vm-wdt.c b/drivers/misc/vm-wdt.c
> > new file mode 100644
> > index 000000000000..0c4df2fefbb9
> > --- /dev/null
> > +++ b/drivers/misc/vm-wdt.c
> > @@ -0,0 +1,207 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +//
> > +// Virtual watchdog driver.
> > +// Copyright (C) Google, 2022
>
> I will need a watchdog maintainer to agree that this is the way to do
> this as I really really do not understand why you can not use that
> subsystem here.
>

Sure, let me CC Guenter Roeck as he initially helped me with the
feedback on this.

> > +
> > +#include <linux/cpu.h>
> > +#include <linux/init.h>
> > +#include <linux/io.h>
> > +#include <linux/kernel.h>
> > +
> > +#include <linux/device.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/module.h>
> > +#include <linux/nmi.h>
> > +#include <linux/of.h>
> > +#include <linux/of_device.h>
> > +#include <linux/param.h>
> > +#include <linux/percpu.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/slab.h>
> > +
> > +#define DRV_NAME "vm_wdt"
>
> KBUILD_MODNAME please
>

Fixed.

> > +
> > +#define VMWDT_REG_STATUS (0x00)
> > +#define VMWDT_REG_LOAD_CNT (0x04)
> > +#define VMWDT_REG_CURRENT_CNT (0x08)
> > +#define VMWDT_REG_CLOCK_FREQ_HZ (0x0C)
> > +#define VMWDT_REG_LEN (0x10)
> > +
> > +#define VMWDT_DEFAULT_CLOCK_HZ (10)
> > +#define VMWDT_DEFAULT_TIMEOT_SEC (8)
> > +
> > +struct vm_wdt_s {
> > + void __iomem *membase;
> > + u32 clock_freq;
> > + u32 expiration_sec;
> > + u32 ping_timeout_ms;
> > + struct hrtimer per_cpu_hrtimer;
> > + struct platform_device *dev;
> > +};
> > +
> > +#define vmwdt_reg_write(wdt, reg, value) \
> > + iowrite32((value), (wdt)->membase + (reg))
> > +#define vmwdt_reg_read(wdt, reg) \
> > + io32read((wdt)->membase + (reg))
> > +
> > +static struct platform_device *virt_dev;
>
> Only one device in the system? Please no, use the correct apis and you
> will not have any limits like this.
>

Can you please explain what do you mean by use the correct APIs ? I
don't think it will be possible or necessary to register multipple
instances of this.

> > +
> > +static enum hrtimer_restart vmwdt_timer_fn(struct hrtimer *hrtimer)
> > +{
> > + struct vm_wdt_s *cpu_wdt;
> > + u32 ticks;
> > +
> > + cpu_wdt = container_of(hrtimer, struct vm_wdt_s, per_cpu_hrtimer);
> > + ticks = cpu_wdt->clock_freq * cpu_wdt->expiration_sec;
> > + vmwdt_reg_write(cpu_wdt, VMWDT_REG_LOAD_CNT, ticks);
> > + hrtimer_forward_now(hrtimer, ms_to_ktime(cpu_wdt->ping_timeout_ms));
> > +
> > + return HRTIMER_RESTART;
> > +}
> > +
> > +static void vmwdt_start(void *arg)
> > +{
> > + u32 ticks;
> > + struct vm_wdt_s *cpu_wdt = arg;
> > + struct hrtimer *hrtimer = &cpu_wdt->per_cpu_hrtimer;
> > +
> > + vmwdt_reg_write(cpu_wdt, VMWDT_REG_CLOCK_FREQ_HZ,
> > + cpu_wdt->clock_freq);
> > +
> > + /* Compute the number of ticks required for the watchdog counter
> > + * register based on the internal clock frequency and the watchdog
> > + * timeout given from the device tree.
> > + */
> > + ticks = cpu_wdt->clock_freq * cpu_wdt->expiration_sec;
> > + vmwdt_reg_write(cpu_wdt, VMWDT_REG_LOAD_CNT, ticks);
> > +
> > + /* Enable the internal clock and start the watchdog */
> > + vmwdt_reg_write(cpu_wdt, VMWDT_REG_STATUS, 1);
> > +
> > + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> > + hrtimer->function = vmwdt_timer_fn;
> > + hrtimer_start(hrtimer, ms_to_ktime(cpu_wdt->ping_timeout_ms),
> > + HRTIMER_MODE_REL_PINNED);
> > +}
> > +
> > +static void vmwdt_stop(void *arg)
> > +{
> > + struct vm_wdt_s *cpu_wdt = arg;
> > + struct hrtimer *hrtimer = &cpu_wdt->per_cpu_hrtimer;
> > +
> > + hrtimer_cancel(hrtimer);
> > +
> > + /* Disable the watchdog */
> > + vmwdt_reg_write(cpu_wdt, VMWDT_REG_STATUS, 0);
> > +}
> > +
> > +static int start_watchdog_on_cpu(unsigned int cpu)
> > +{
> > + struct vm_wdt_s *vm_wdt = platform_get_drvdata(virt_dev);
> > +
> > + vmwdt_start(this_cpu_ptr(vm_wdt));
> > + return 0;
> > +}
> > +
> > +static int stop_watchdog_on_cpu(unsigned int cpu)
> > +{
> > + struct vm_wdt_s *vm_wdt = platform_get_drvdata(virt_dev);
> > +
> > + vmwdt_stop(this_cpu_ptr(vm_wdt));
> > + return 0;
> > +}
> > +
> > +static int vmwdt_probe(struct platform_device *dev)
> > +{
> > + int cpu, ret, err;
> > + void __iomem *membase;
> > + struct resource *r;
> > + struct vm_wdt_s *vm_wdt;
> > + u32 wdt_clock, wdt_timeout_sec = 0;
> > +
> > + r = platform_get_resource(dev, IORESOURCE_MEM, 0);
> > + if (r == NULL)
> > + return -ENOENT;
> > +
> > + vm_wdt = alloc_percpu(typeof(struct vm_wdt_s));
> > + if (!vm_wdt)
> > + return -ENOMEM;
> > +
> > + membase = ioremap(r->start, resource_size(r));
> > + if (!membase) {
> > + ret = -ENXIO;
> > + goto err_withmem;
> > + }
> > +
> > + virt_dev = dev;
> > + platform_set_drvdata(dev, vm_wdt);
> > + if (of_property_read_u32(dev->dev.of_node, "clock", &wdt_clock))
> > + wdt_clock = VMWDT_DEFAULT_CLOCK_HZ;
> > +
> > + if (of_property_read_u32(dev->dev.of_node, "timeout-sec",
> > + &wdt_timeout_sec))
> > + wdt_timeout_sec = VMWDT_DEFAULT_TIMEOT_SEC;
> > +
> > + for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) {
> > + struct vm_wdt_s *cpu_wdt = per_cpu_ptr(vm_wdt, cpu);
> > +
> > + cpu_wdt->membase = membase + cpu * VMWDT_REG_LEN;
> > + cpu_wdt->clock_freq = wdt_clock;
> > + cpu_wdt->expiration_sec = wdt_timeout_sec;
> > + cpu_wdt->ping_timeout_ms = wdt_timeout_sec * MSEC_PER_SEC / 2;
> > + smp_call_function_single(cpu, vmwdt_start, cpu_wdt, true);
> > + }
> > +
> > + err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> > + "virt/watchdog:online",
> > + start_watchdog_on_cpu,
> > + stop_watchdog_on_cpu);
> > + if (err < 0) {
> > + pr_warn("could not be initialized");
>
> drivers should never use pr_* calls. dev_warn() please.
>

Removed this call,

> thanks,
>
> greg k-h

Thanks,
Sebastian