Hello,I have found a memory leak bug in kvm module by syzkaller.It was found in linux-5.4 but it also could be reproduced in the latest linux version.
Here is the bug info:
ioctl$KVM_CREATE_PIT2(r1, 0x4040ae77, &(0x7f0000000040))
ioctl$KVM_CREATE_IRQCHIP(r1, 0xae60)
ioctl$KVM_CREATE_IRQCHIP(r1, 0xae60)
ioctl$KVM_SET_PIT2(r1, 0x4070aea0, &(0x7f0000000080))
BUG: memory leak
unreferenced object 0xffff888112a54880 (size 64):
comm "syz-executor.2", pid 5258, jiffies 4297861402 (age 14.129s)
hex dump (first 32 bytes):
38 c7 67 15 00 c9 ff ff 38 c7 67 15 00 c9 ff ff 8.g.....8.g.....
e0 c7 e1 83 ff ff ff ff 00 30 67 15 00 c9 ff ff .........0g.....
backtrace:
[<0000000006995a8a>] kmalloc include/linux/slab.h:556 [inline]
[<0000000006995a8a>] kzalloc include/linux/slab.h:690 [inline]
[<0000000006995a8a>] kvm_vm_ioctl_register_coalesced_mmio+0x8e/0x3d0 arch/x86/kvm/../../../virt/kvm/coalesced_mmio.c:150
[<00000000022550c2>] kvm_vm_ioctl+0x47d/0x1600 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3323
[<000000008a75102f>] vfs_ioctl fs/ioctl.c:46 [inline]
[<000000008a75102f>] file_ioctl fs/ioctl.c:509 [inline]
[<000000008a75102f>] do_vfs_ioctl+0xbab/0x1160 fs/ioctl.c:696
[<0000000080e3f669>] ksys_ioctl+0x76/0xa0 fs/ioctl.c:713
[<0000000059ef4888>] __do_sys_ioctl fs/ioctl.c:720 [inline]
[<0000000059ef4888>] __se_sys_ioctl fs/ioctl.c:718 [inline]
[<0000000059ef4888>] __x64_sys_ioctl+0x6f/0xb0 fs/ioctl.c:718
[<000000006444fa05>] do_syscall_64+0x9f/0x4e0 arch/x86/entry/common.c:290
[<000000009a4ed50b>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
BUG: leak checking failed
And here is the reproduce code by syzkaller,the bug can be reproduced by open kmemleak config:
// autogenerated by syzkaller (https://github.com/google/syzkaller)
#define _GNU_SOURCE
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
static void sleep_ms(uint64_t ms)
{
usleep(ms * 1000);
}
static uint64_t current_time_ms(void)
{
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts))
exit(1);
return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}
static bool write_file(const char* file, const char* what, ...)
{
char buf[1024];
va_list args;
va_start(args, what);
vsnprintf(buf, sizeof(buf), what, args);
va_end(args);
buf[sizeof(buf) - 1] = 0;
int len = strlen(buf);
int fd = open(file, O_WRONLY | O_CLOEXEC);
if (fd == -1)
return false;
if (write(fd, buf, len) != len) {
int err = errno;
close(fd);
errno = err;
return false;
}
close(fd);
return true;
}
static int inject_fault(int nth)
{
int fd;
fd = open("/proc/thread-self/fail-nth", O_RDWR);
if (fd == -1)
exit(1);
char buf[16];
sprintf(buf, "%d", nth + 1);
if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
exit(1);
return fd;
}
static void kill_and_wait(int pid, int* status)
{
kill(-pid, SIGKILL);
kill(pid, SIGKILL);
for (int i = 0; i < 100; i++) {
if (waitpid(-1, status, WNOHANG | __WALL) == pid)
return;
usleep(1000);
}
DIR* dir = opendir("/sys/fs/fuse/connections");
if (dir) {
for (;;) {
struct dirent* ent = readdir(dir);
if (!ent)
break;
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
continue;
char abort[300];
snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
ent->d_name);
int fd = open(abort, O_WRONLY);
if (fd == -1) {
continue;
}
if (write(fd, abort, 1) < 0) {
}
close(fd);
}
closedir(dir);
} else {
}
while (waitpid(-1, status, __WALL) != pid) {
}
}
static void setup_test()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
write_file("/proc/self/oom_score_adj", "1000");
}
static void setup_fault()
{
static struct {
const char* file;
const char* val;
bool fatal;
} files[] = {
{"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true},
{"/sys/kernel/debug/fail_futex/ignore-private", "N", false},
{"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false},
{"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false},
{"/sys/kernel/debug/fail_page_alloc/min-order", "0", false},
};
unsigned i;
for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) {
if (!write_file(files[i].file, files[i].val)) {
if (files[i].fatal)
exit(1);
}
}
}
#define KMEMLEAK_FILE "/sys/kernel/debug/kmemleak"
static void setup_leak()
{
if (!write_file(KMEMLEAK_FILE, "scan"))
exit(1);
sleep(5);
if (!write_file(KMEMLEAK_FILE, "scan"))
exit(1);
if (!write_file(KMEMLEAK_FILE, "clear"))
exit(1);
}
static void check_leaks(void)
{
int fd = open(KMEMLEAK_FILE, O_RDWR);
if (fd == -1)
exit(1);
uint64_t start = current_time_ms();
if (write(fd, "scan", 4) != 4)
exit(1);
sleep(1);
while (current_time_ms() - start < 4 * 1000)
sleep(1);
if (write(fd, "scan", 4) != 4)
exit(1);
static char buf[128 << 10];
ssize_t n = read(fd, buf, sizeof(buf) - 1);
if (n < 0)
exit(1);
int nleaks = 0;
if (n != 0) {
sleep(1);
if (write(fd, "scan", 4) != 4)
exit(1);
if (lseek(fd, 0, SEEK_SET) < 0)
exit(1);
n = read(fd, buf, sizeof(buf) - 1);
if (n < 0)
exit(1);
buf[n] = 0;
char* pos = buf;
char* end = buf + n;
while (pos < end) {
char* next = strstr(pos + 1, "unreferenced object");
if (!next)
next = end;
char prev = *next;
*next = 0;
fprintf(stderr, "BUG: memory leak\n%s\n", pos);
*next = prev;
pos = next;
nleaks++;
}
}
if (write(fd, "clear", 5) != 5)
exit(1);
close(fd);
if (nleaks)
exit(1);
}
static void execute_one(void);
#define WAIT_FLAGS __WALL
static void loop(void)
{
int iter = 0;
for (;; iter++) {
int pid = fork();
if (pid < 0)
exit(1);
if (pid == 0) {
setup_test();
execute_one();
exit(0);
}
int status = 0;
uint64_t start = current_time_ms();
for (;;) {
if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
break;
sleep_ms(1);
if (current_time_ms() - start < 5000) {
continue;
}
kill_and_wait(pid, &status);
break;
}
check_leaks();
}
}
uint64_t r[2] = {0xffffffffffffffff, 0xffffffffffffffff};
void execute_one(void)
{
intptr_t res = 0;
memcpy((void*)0x20000080, "/dev/kvm\000", 9);
res = syscall(__NR_openat, 0xffffffffffffff9cul, 0x20000080ul, 0ul, 0ul);
if (res != -1)
r[0] = res;
res = syscall(__NR_ioctl, r[0], 0xae01, 0ul);
if (res != -1)
r[1] = res;
*(uint64_t*)0x20000100 = 0;
*(uint32_t*)0x20000108 = 0;
*(uint32_t*)0x2000010c = 0;
syscall(__NR_ioctl, r[1], 0x4010ae67, 0x20000100ul);
*(uint64_t*)0x20000000 = 0;
*(uint32_t*)0x20000008 = 0x101000;
*(uint32_t*)0x2000000c = 0;
syscall(__NR_ioctl, r[1], 0x4010ae67, 0x20000000ul);
*(uint64_t*)0x200000c0 = 0;
*(uint32_t*)0x200000c8 = 0x10000;
*(uint32_t*)0x200000cc = 0;
inject_fault(0);
syscall(__NR_ioctl, r[1], 0x4010ae68, 0x200000c0ul);
}
int main(void)
{
syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
setup_leak();
setup_fault();
loop();
return 0;
}
Here is the memory leak function
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone)
{
int ret;
struct kvm_coalesced_mmio_dev *dev;
if (zone->pio != 1 && zone->pio != 0)
return -EINVAL;
dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev),//Here is the memory leak alloc
GFP_KERNEL_ACCOUNT);
if (!dev)
return -ENOMEM;
kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
dev->kvm = kvm;
dev->zone = *zone;
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm,
zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS,
zone->addr, zone->size, &dev->dev);
if (ret < 0)
goto out_free_dev;
list_add_tail(&dev->list, &kvm->coalesced_zones);
mutex_unlock(&kvm->slots_lock);
return 0;
out_free_dev:
mutex_unlock(&kvm->slots_lock);
kfree(dev);
return ret;
}
On Mon, Dec 12, 2022, 柳菁峰 wrote:
> Hello,I have found a memory leak bug in kvm module by syzkaller.It was found
> in linux-5.4 but it also could be reproduced in the latest linux version.
Ah, I assume by "linux-5.4" you mean "stable v5.4.x kernels that contain commit
7d1bc32d6477 ("KVM: Stop looking for coalesced MMIO zones if the bus is destroyed")",
because without that fix I can't see any bug that would affect both 5.4 and the
upstream kernel.
If my assumption is correct, then I'm 99% certain the issue is that the target
device isn't destroyed if allocating the new bus fails. I haven't had luck with
the automatic fault injection, but was able to confirm a leak with this hack.
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 13e88297f999..22d9ab1b5c25 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5424,7 +5424,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev)
{
int i, j;
- struct kvm_io_bus *new_bus, *bus;
+ struct kvm_io_bus *new_bus = NULL, *bus;
lockdep_assert_held(&kvm->slots_lock);
@@ -5441,6 +5441,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
if (i == bus->dev_count)
return 0;
+ if (!IS_ENABLED(CONFIG_X86_64))
new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1),
GFP_KERNEL_ACCOUNT);
if (new_bus) {
The fix is to destroy the target device before bailing. I'll send a proper patch
either way, but it would be nice to get confirmation that this is the same bug
that you hit with "linux-5.4".
Thanks!
---
virt/kvm/coalesced_mmio.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 0be80c213f7f..5ef88f5a0864 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -187,15 +187,17 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
r = kvm_io_bus_unregister_dev(kvm,
zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev);
+ kvm_iodevice_destructor(&dev->dev);
+
/*
* On failure, unregister destroys all devices on the
* bus _except_ the target device, i.e. coalesced_zones
- * has been modified. No need to restart the walk as
- * there aren't any zones left.
+ * has been modified. Bail after destroying the target
+ * device, there's no need to restart the walk as there
+ * aren't any zones left.
*/
if (r)
break;
- kvm_iodevice_destructor(&dev->dev);
}
}
base-commit: 0f30b25edea48433eb32448990557364436818e6
--
I had tried to retest with your patch,and I think the memory leak was resolved by it.
Thanks!
-----邮件原件-----
发件人: Sean Christopherson [mailto:[email protected]]
发送时间: 2022年12月16日 2:17
收件人: 柳菁峰 <[email protected]>
抄送: [email protected]; [email protected]; [email protected]; [email protected]
主题: Re: Found a memory leak in kvm module
On Mon, Dec 12, 2022, 柳菁峰 wrote:
> Hello,I have found a memory leak bug in kvm module by syzkaller.It was
> found in linux-5.4 but it also could be reproduced in the latest linux version.
Ah, I assume by "linux-5.4" you mean "stable v5.4.x kernels that contain commit
7d1bc32d6477 ("KVM: Stop looking for coalesced MMIO zones if the bus is destroyed")", because without that fix I can't see any bug that would affect both 5.4 and the upstream kernel.
If my assumption is correct, then I'm 99% certain the issue is that the target device isn't destroyed if allocating the new bus fails. I haven't had luck with the automatic fault injection, but was able to confirm a leak with this hack.
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 13e88297f999..22d9ab1b5c25 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5424,7 +5424,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev) {
int i, j;
- struct kvm_io_bus *new_bus, *bus;
+ struct kvm_io_bus *new_bus = NULL, *bus;
lockdep_assert_held(&kvm->slots_lock);
@@ -5441,6 +5441,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
if (i == bus->dev_count)
return 0;
+ if (!IS_ENABLED(CONFIG_X86_64))
new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1),
GFP_KERNEL_ACCOUNT);
if (new_bus) {
The fix is to destroy the target device before bailing. I'll send a proper patch either way, but it would be nice to get confirmation that this is the same bug that you hit with "linux-5.4".
Thanks!
---
virt/kvm/coalesced_mmio.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 0be80c213f7f..5ef88f5a0864 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -187,15 +187,17 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
r = kvm_io_bus_unregister_dev(kvm,
zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev);
+ kvm_iodevice_destructor(&dev->dev);
+
/*
* On failure, unregister destroys all devices on the
* bus _except_ the target device, i.e. coalesced_zones
- * has been modified. No need to restart the walk as
- * there aren't any zones left.
+ * has been modified. Bail after destroying the target
+ * device, there's no need to restart the walk as there
+ * aren't any zones left.
*/
if (r)
break;
- kvm_iodevice_destructor(&dev->dev);
}
}
base-commit: 0f30b25edea48433eb32448990557364436818e6
--