Hello,
This is a small update of the previously introduced vcpu stall detector
which adds an interrupt to the virtual device to notify the guest VM in
case it stalls. This lets the guest VM to handle the reboot and to
panic in case it expires.
Changelog from v1:
* 1/2 : collected the Ack from Conor Dooley, thank you Conor !
* 2/2 : applied the feedback received from Conor and used
platform_get_irq_optional. Removed the error messages during
probe
Thanks,
Sebastian Ene (2):
dt-bindings: vcpu_stall_detector: Add a PPI interrupt to the virtual
device
misc: Register a PPI for the vcpu stall detection virtual device
.../misc/qemu,vcpu-stall-detector.yaml | 6 ++++
drivers/misc/vcpu_stall_detector.c | 31 +++++++++++++++++--
2 files changed, 35 insertions(+), 2 deletions(-)
--
2.45.2.505.gda0bf45e8d-goog
The vcpu stall detector allows the host to monitor the availability of a
guest VM. Introduce a PPI interrupt which can be injected from the host
into the virtual gic to let the guest reboot itself.
Signed-off-by: Sebastian Ene <[email protected]>
Acked-by: Conor Dooley <[email protected]>
---
.../devicetree/bindings/misc/qemu,vcpu-stall-detector.yaml | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/Documentation/devicetree/bindings/misc/qemu,vcpu-stall-detector.yaml b/Documentation/devicetree/bindings/misc/qemu,vcpu-stall-detector.yaml
index 1aebeb696ee0..e12d80be00cd 100644
--- a/Documentation/devicetree/bindings/misc/qemu,vcpu-stall-detector.yaml
+++ b/Documentation/devicetree/bindings/misc/qemu,vcpu-stall-detector.yaml
@@ -29,6 +29,9 @@ properties:
Defaults to 10 if unset.
default: 10
+ interrupts:
+ maxItems: 1
+
timeout-sec:
description: |
The stall detector expiration timeout measured in seconds.
@@ -43,9 +46,12 @@ additionalProperties: false
examples:
- |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
vmwdt@9030000 {
compatible = "qemu,vcpu-stall-detector";
reg = <0x9030000 0x10000>;
clock-frequency = <10>;
timeout-sec = <8>;
+ interrupts = <GIC_PPI 15 IRQ_TYPE_EDGE_RISING>;
};
--
2.45.2.505.gda0bf45e8d-goog
Request a PPI for each vCPU during probe which will be used by the host
to communicate a stall detected event on the vCPU. When the host raises
this interrupt from the virtual machine monitor, the guest is expected to
handle the interrupt and panic.
Signed-off-by: Sebastian Ene <[email protected]>
---
drivers/misc/vcpu_stall_detector.c | 31 ++++++++++++++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)
diff --git a/drivers/misc/vcpu_stall_detector.c b/drivers/misc/vcpu_stall_detector.c
index e2015c87f03f..17808e71cc30 100644
--- a/drivers/misc/vcpu_stall_detector.c
+++ b/drivers/misc/vcpu_stall_detector.c
@@ -32,6 +32,7 @@
struct vcpu_stall_detect_config {
u32 clock_freq_hz;
u32 stall_timeout_sec;
+ int ppi_irq;
void __iomem *membase;
struct platform_device *dev;
@@ -77,6 +78,12 @@ vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
+static irqreturn_t vcpu_stall_detector_irq(int irq, void *dev)
+{
+ panic("vCPU stall detector");
+ return IRQ_HANDLED;
+}
+
static int start_stall_detector_cpu(unsigned int cpu)
{
u32 ticks, ping_timeout_ms;
@@ -132,7 +139,7 @@ static int stop_stall_detector_cpu(unsigned int cpu)
static int vcpu_stall_detect_probe(struct platform_device *pdev)
{
- int ret;
+ int ret, irq;
struct resource *r;
void __iomem *membase;
u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
@@ -169,9 +176,22 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
vcpu_stall_config = (struct vcpu_stall_detect_config) {
.membase = membase,
.clock_freq_hz = clock_freq_hz,
- .stall_timeout_sec = stall_timeout_sec
+ .stall_timeout_sec = stall_timeout_sec,
+ .ppi_irq = -1,
};
+ irq = platform_get_irq_optional(pdev, 0);
+ if (irq > 0 && irq_is_percpu_devid(irq)) {
+ ret = request_percpu_irq(irq,
+ vcpu_stall_detector_irq,
+ "vcpu_stall_detector",
+ vcpu_stall_detectors);
+ if (ret)
+ goto err;
+
+ vcpu_stall_config.ppi_irq = irq;
+ }
+
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
"virt/vcpu_stall_detector:online",
start_stall_detector_cpu,
@@ -184,6 +204,9 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
vcpu_stall_config.hp_online = ret;
return 0;
err:
+ if (vcpu_stall_config.ppi_irq > 0)
+ free_percpu_irq(vcpu_stall_config.ppi_irq,
+ vcpu_stall_detectors);
return ret;
}
@@ -193,6 +216,10 @@ static void vcpu_stall_detect_remove(struct platform_device *pdev)
cpuhp_remove_state(vcpu_stall_config.hp_online);
+ if (vcpu_stall_config.ppi_irq > 0)
+ free_percpu_irq(vcpu_stall_config.ppi_irq,
+ vcpu_stall_detectors);
+
for_each_possible_cpu(cpu)
stop_stall_detector_cpu(cpu);
}
--
2.45.2.505.gda0bf45e8d-goog
Hi Sebastian,
kernel test robot noticed the following build errors:
[auto build test ERROR on robh/for-next]
[also build test ERROR on soc/for-next char-misc/char-misc-testing char-misc/char-misc-next char-misc/char-misc-linus linus/master v6.10-rc3 next-20240613]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Sebastian-Ene/dt-bindings-vcpu_stall_detector-Add-a-PPI-interrupt-to-the-virtual-device/20240611-190759
base: https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git for-next
patch link: https://lore.kernel.org/r/20240611110136.2003137-4-sebastianene%40google.com
patch subject: [PATCH v2 2/2] misc: Register a PPI for the vcpu stall detection virtual device
config: s390-randconfig-r112-20240613 (https://download.01.org/0day-ci/archive/20240613/[email protected]/config)
compiler: clang version 15.0.7 (https://github.com/llvm/llvm-project 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a)
reproduce: (https://download.01.org/0day-ci/archive/20240613/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All errors (new ones prefixed by >>):
In file included from drivers/misc/vcpu_stall_detector.c:8:
In file included from include/linux/io.h:14:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __raw_readb(PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
~~~~~~~~~~ ^
include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
#define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
^
include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
#define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
^
In file included from drivers/misc/vcpu_stall_detector.c:8:
In file included from include/linux/io.h:14:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
~~~~~~~~~~ ^
include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
#define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
^
include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
#define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
^
In file included from drivers/misc/vcpu_stall_detector.c:8:
In file included from include/linux/io.h:14:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writeb(value, PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
readsb(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
readsw(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
readsl(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
writesb(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
writesw(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
writesl(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
>> drivers/misc/vcpu_stall_detector.c:184:17: error: call to undeclared function 'irq_is_percpu_devid'; ISO C99 and later do not support implicit function declarations [-Werror,-Wimplicit-function-declaration]
if (irq > 0 && irq_is_percpu_devid(irq)) {
^
12 warnings and 1 error generated.
vim +/irq_is_percpu_devid +184 drivers/misc/vcpu_stall_detector.c
139
140 static int vcpu_stall_detect_probe(struct platform_device *pdev)
141 {
142 int ret, irq;
143 struct resource *r;
144 void __iomem *membase;
145 u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
146 u32 stall_timeout_sec = VCPU_STALL_DEFAULT_TIMEOUT_SEC;
147 struct device_node *np = pdev->dev.of_node;
148
149 vcpu_stall_detectors = devm_alloc_percpu(&pdev->dev,
150 typeof(struct vcpu_stall_priv));
151 if (!vcpu_stall_detectors)
152 return -ENOMEM;
153
154 membase = devm_platform_get_and_ioremap_resource(pdev, 0, &r);
155 if (IS_ERR(membase)) {
156 dev_err(&pdev->dev, "Failed to get memory resource\n");
157 return PTR_ERR(membase);
158 }
159
160 if (!of_property_read_u32(np, "clock-frequency", &clock_freq_hz)) {
161 if (!(clock_freq_hz > 0 &&
162 clock_freq_hz < VCPU_STALL_MAX_CLOCK_HZ)) {
163 dev_warn(&pdev->dev, "clk out of range\n");
164 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
165 }
166 }
167
168 if (!of_property_read_u32(np, "timeout-sec", &stall_timeout_sec)) {
169 if (!(stall_timeout_sec > 0 &&
170 stall_timeout_sec < VCPU_STALL_MAX_TIMEOUT_SEC)) {
171 dev_warn(&pdev->dev, "stall timeout out of range\n");
172 stall_timeout_sec = VCPU_STALL_DEFAULT_TIMEOUT_SEC;
173 }
174 }
175
176 vcpu_stall_config = (struct vcpu_stall_detect_config) {
177 .membase = membase,
178 .clock_freq_hz = clock_freq_hz,
179 .stall_timeout_sec = stall_timeout_sec,
180 .ppi_irq = -1,
181 };
182
183 irq = platform_get_irq_optional(pdev, 0);
> 184 if (irq > 0 && irq_is_percpu_devid(irq)) {
185 ret = request_percpu_irq(irq,
186 vcpu_stall_detector_irq,
187 "vcpu_stall_detector",
188 vcpu_stall_detectors);
189 if (ret)
190 goto err;
191
192 vcpu_stall_config.ppi_irq = irq;
193 }
194
195 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
196 "virt/vcpu_stall_detector:online",
197 start_stall_detector_cpu,
198 stop_stall_detector_cpu);
199 if (ret < 0) {
200 dev_err(&pdev->dev, "failed to install cpu hotplug");
201 goto err;
202 }
203
204 vcpu_stall_config.hp_online = ret;
205 return 0;
206 err:
207 if (vcpu_stall_config.ppi_irq > 0)
208 free_percpu_irq(vcpu_stall_config.ppi_irq,
209 vcpu_stall_detectors);
210 return ret;
211 }
212
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki