2022-12-12 03:27:54

by 柳菁峰

[permalink] [raw]
Subject: Found a memory leak in kvm module

Hello,I have found a memory leak bug in kvm module by syzkaller.It was found in linux-5.4 but it also could be reproduced in the latest linux version.

Here is the bug info:

ioctl$KVM_CREATE_PIT2(r1, 0x4040ae77, &(0x7f0000000040))
ioctl$KVM_CREATE_IRQCHIP(r1, 0xae60)
ioctl$KVM_CREATE_IRQCHIP(r1, 0xae60)
ioctl$KVM_SET_PIT2(r1, 0x4070aea0, &(0x7f0000000080))
BUG: memory leak
unreferenced object 0xffff888112a54880 (size 64):
  comm "syz-executor.2", pid 5258, jiffies 4297861402 (age 14.129s)
  hex dump (first 32 bytes):
    38 c7 67 15 00 c9 ff ff 38 c7 67 15 00 c9 ff ff  8.g.....8.g.....
    e0 c7 e1 83 ff ff ff ff 00 30 67 15 00 c9 ff ff  .........0g.....
  backtrace:
    [<0000000006995a8a>] kmalloc include/linux/slab.h:556 [inline]
    [<0000000006995a8a>] kzalloc include/linux/slab.h:690 [inline]
    [<0000000006995a8a>] kvm_vm_ioctl_register_coalesced_mmio+0x8e/0x3d0 arch/x86/kvm/../../../virt/kvm/coalesced_mmio.c:150
    [<00000000022550c2>] kvm_vm_ioctl+0x47d/0x1600 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3323
    [<000000008a75102f>] vfs_ioctl fs/ioctl.c:46 [inline]
    [<000000008a75102f>] file_ioctl fs/ioctl.c:509 [inline]
    [<000000008a75102f>] do_vfs_ioctl+0xbab/0x1160 fs/ioctl.c:696
    [<0000000080e3f669>] ksys_ioctl+0x76/0xa0 fs/ioctl.c:713
    [<0000000059ef4888>] __do_sys_ioctl fs/ioctl.c:720 [inline]
    [<0000000059ef4888>] __se_sys_ioctl fs/ioctl.c:718 [inline]
    [<0000000059ef4888>] __x64_sys_ioctl+0x6f/0xb0 fs/ioctl.c:718
    [<000000006444fa05>] do_syscall_64+0x9f/0x4e0 arch/x86/entry/common.c:290
    [<000000009a4ed50b>] entry_SYSCALL_64_after_hwframe+0x49/0xbe

BUG: leak checking failed



And here is the reproduce code by syzkaller,the bug can be reproduced by open kmemleak config:

// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

static void sleep_ms(uint64_t ms)
{
  usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
  struct timespec ts;
  if (clock_gettime(CLOCK_MONOTONIC, &ts))
    exit(1);
  return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static bool write_file(const char* file, const char* what, ...)
{
  char buf[1024];
  va_list args;
  va_start(args, what);
  vsnprintf(buf, sizeof(buf), what, args);
  va_end(args);
  buf[sizeof(buf) - 1] = 0;
  int len = strlen(buf);
  int fd = open(file, O_WRONLY | O_CLOEXEC);
  if (fd == -1)
    return false;
  if (write(fd, buf, len) != len) {
    int err = errno;
    close(fd);
    errno = err;
    return false;
  }
  close(fd);
  return true;
}

static int inject_fault(int nth)
{
  int fd;
  fd = open("/proc/thread-self/fail-nth", O_RDWR);
  if (fd == -1)
    exit(1);
  char buf[16];
  sprintf(buf, "%d", nth + 1);
  if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
    exit(1);
  return fd;
}

static void kill_and_wait(int pid, int* status)
{
  kill(-pid, SIGKILL);
  kill(pid, SIGKILL);
  for (int i = 0; i < 100; i++) {
    if (waitpid(-1, status, WNOHANG | __WALL) == pid)
      return;
    usleep(1000);
  }
  DIR* dir = opendir("/sys/fs/fuse/connections");
  if (dir) {
    for (;;) {
      struct dirent* ent = readdir(dir);
      if (!ent)
        break;
      if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
        continue;
      char abort[300];
      snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
               ent->d_name);
      int fd = open(abort, O_WRONLY);
      if (fd == -1) {
        continue;
      }
      if (write(fd, abort, 1) < 0) {
      }
      close(fd);
    }
    closedir(dir);
  } else {
  }
  while (waitpid(-1, status, __WALL) != pid) {
  }
}

static void setup_test()
{
  prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  setpgrp();
  write_file("/proc/self/oom_score_adj", "1000");
}

static void setup_fault()
{
  static struct {
    const char* file;
    const char* val;
    bool fatal;
  } files[] = {
      {"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true},
      {"/sys/kernel/debug/fail_futex/ignore-private", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/min-order", "0", false},
  };
  unsigned i;
  for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) {
    if (!write_file(files[i].file, files[i].val)) {
      if (files[i].fatal)
        exit(1);
    }
  }
}

#define KMEMLEAK_FILE "/sys/kernel/debug/kmemleak"

static void setup_leak()
{
  if (!write_file(KMEMLEAK_FILE, "scan"))
    exit(1);
  sleep(5);
  if (!write_file(KMEMLEAK_FILE, "scan"))
    exit(1);
  if (!write_file(KMEMLEAK_FILE, "clear"))
    exit(1);
}

static void check_leaks(void)
{
  int fd = open(KMEMLEAK_FILE, O_RDWR);
  if (fd == -1)
    exit(1);
  uint64_t start = current_time_ms();
  if (write(fd, "scan", 4) != 4)
    exit(1);
  sleep(1);
  while (current_time_ms() - start < 4 * 1000)
    sleep(1);
  if (write(fd, "scan", 4) != 4)
    exit(1);
  static char buf[128 << 10];
  ssize_t n = read(fd, buf, sizeof(buf) - 1);
  if (n < 0)
    exit(1);
  int nleaks = 0;
  if (n != 0) {
    sleep(1);
    if (write(fd, "scan", 4) != 4)
      exit(1);
    if (lseek(fd, 0, SEEK_SET) < 0)
      exit(1);
    n = read(fd, buf, sizeof(buf) - 1);
    if (n < 0)
      exit(1);
    buf[n] = 0;
    char* pos = buf;
    char* end = buf + n;
    while (pos < end) {
      char* next = strstr(pos + 1, "unreferenced object");
      if (!next)
        next = end;
      char prev = *next;
      *next = 0;
      fprintf(stderr, "BUG: memory leak\n%s\n", pos);
      *next = prev;
      pos = next;
      nleaks++;
    }
  }
  if (write(fd, "clear", 5) != 5)
    exit(1);
  close(fd);
  if (nleaks)
    exit(1);
}

static void execute_one(void);

#define WAIT_FLAGS __WALL

static void loop(void)
{
  int iter = 0;
  for (;; iter++) {
    int pid = fork();
    if (pid < 0)
      exit(1);
    if (pid == 0) {
      setup_test();
      execute_one();
      exit(0);
    }
    int status = 0;
    uint64_t start = current_time_ms();
    for (;;) {
      if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
        break;
      sleep_ms(1);
      if (current_time_ms() - start < 5000) {
        continue;
      }
      kill_and_wait(pid, &status);
      break;
    }
    check_leaks();
  }
}

uint64_t r[2] = {0xffffffffffffffff, 0xffffffffffffffff};

void execute_one(void)
{
  intptr_t res = 0;
  memcpy((void*)0x20000080, "/dev/kvm\000", 9);
  res = syscall(__NR_openat, 0xffffffffffffff9cul, 0x20000080ul, 0ul, 0ul);
  if (res != -1)
    r[0] = res;
  res = syscall(__NR_ioctl, r[0], 0xae01, 0ul);
  if (res != -1)
    r[1] = res;
  *(uint64_t*)0x20000100 = 0;
  *(uint32_t*)0x20000108 = 0;
  *(uint32_t*)0x2000010c = 0;
  syscall(__NR_ioctl, r[1], 0x4010ae67, 0x20000100ul);
  *(uint64_t*)0x20000000 = 0;
  *(uint32_t*)0x20000008 = 0x101000;
  *(uint32_t*)0x2000000c = 0;
  syscall(__NR_ioctl, r[1], 0x4010ae67, 0x20000000ul);
  *(uint64_t*)0x200000c0 = 0;
  *(uint32_t*)0x200000c8 = 0x10000;
  *(uint32_t*)0x200000cc = 0;
  inject_fault(0);
  syscall(__NR_ioctl, r[1], 0x4010ae68, 0x200000c0ul);
}
int main(void)
{
  syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
  syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
  syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
  setup_leak();
  setup_fault();
  loop();
  return 0;
}



Here is the memory leak function

int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
                     struct kvm_coalesced_mmio_zone *zone)
{
    int ret;
    struct kvm_coalesced_mmio_dev *dev;

    if (zone->pio != 1 && zone->pio != 0)
        return -EINVAL;

    dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev),//Here is the memory leak alloc
              GFP_KERNEL_ACCOUNT);
    if (!dev)
        return -ENOMEM;

    kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
    dev->kvm = kvm;
    dev->zone = *zone;

    mutex_lock(&kvm->slots_lock);
    ret = kvm_io_bus_register_dev(kvm,
                zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS,
                zone->addr, zone->size, &dev->dev);
    if (ret < 0)
        goto out_free_dev;
    list_add_tail(&dev->list, &kvm->coalesced_zones);
    mutex_unlock(&kvm->slots_lock);

    return 0;

out_free_dev:
    mutex_unlock(&kvm->slots_lock);
    kfree(dev);

    return ret;
}



2022-12-15 18:22:02

by Sean Christopherson

[permalink] [raw]
Subject: Re: Found a memory leak in kvm module

On Mon, Dec 12, 2022, 柳菁峰 wrote:
> Hello,I have found a memory leak bug in kvm module by syzkaller.It was found
> in linux-5.4 but it also could be reproduced in the latest linux version.

Ah, I assume by "linux-5.4" you mean "stable v5.4.x kernels that contain commit
7d1bc32d6477 ("KVM: Stop looking for coalesced MMIO zones if the bus is destroyed")",
because without that fix I can't see any bug that would affect both 5.4 and the
upstream kernel.

If my assumption is correct, then I'm 99% certain the issue is that the target
device isn't destroyed if allocating the new bus fails. I haven't had luck with
the automatic fault injection, but was able to confirm a leak with this hack.

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 13e88297f999..22d9ab1b5c25 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5424,7 +5424,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev)
{
int i, j;
- struct kvm_io_bus *new_bus, *bus;
+ struct kvm_io_bus *new_bus = NULL, *bus;

lockdep_assert_held(&kvm->slots_lock);

@@ -5441,6 +5441,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
if (i == bus->dev_count)
return 0;

+ if (!IS_ENABLED(CONFIG_X86_64))
new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1),
GFP_KERNEL_ACCOUNT);
if (new_bus) {


The fix is to destroy the target device before bailing. I'll send a proper patch
either way, but it would be nice to get confirmation that this is the same bug
that you hit with "linux-5.4".

Thanks!

---
virt/kvm/coalesced_mmio.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 0be80c213f7f..5ef88f5a0864 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -187,15 +187,17 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
r = kvm_io_bus_unregister_dev(kvm,
zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev);

+ kvm_iodevice_destructor(&dev->dev);
+
/*
* On failure, unregister destroys all devices on the
* bus _except_ the target device, i.e. coalesced_zones
- * has been modified. No need to restart the walk as
- * there aren't any zones left.
+ * has been modified. Bail after destroying the target
+ * device, there's no need to restart the walk as there
+ * aren't any zones left.
*/
if (r)
break;
- kvm_iodevice_destructor(&dev->dev);
}
}


base-commit: 0f30b25edea48433eb32448990557364436818e6
--

2022-12-19 07:53:58

by 柳菁峰

[permalink] [raw]
Subject: 答复: Found a memory leak in kvm module

I had tried to retest with your patch,and I think the memory leak was resolved by it.
Thanks!

-----邮件原件-----
发件人: Sean Christopherson [mailto:[email protected]]
发送时间: 2022年12月16日 2:17
收件人: 柳菁峰 <[email protected]>
抄送: [email protected]; [email protected]; [email protected]; [email protected]
主题: Re: Found a memory leak in kvm module

On Mon, Dec 12, 2022, 柳菁峰 wrote:
> Hello,I have found a memory leak bug in kvm module by syzkaller.It was
> found in linux-5.4 but it also could be reproduced in the latest linux version.

Ah, I assume by "linux-5.4" you mean "stable v5.4.x kernels that contain commit
7d1bc32d6477 ("KVM: Stop looking for coalesced MMIO zones if the bus is destroyed")", because without that fix I can't see any bug that would affect both 5.4 and the upstream kernel.

If my assumption is correct, then I'm 99% certain the issue is that the target device isn't destroyed if allocating the new bus fails. I haven't had luck with the automatic fault injection, but was able to confirm a leak with this hack.

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 13e88297f999..22d9ab1b5c25 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5424,7 +5424,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev) {
int i, j;
- struct kvm_io_bus *new_bus, *bus;
+ struct kvm_io_bus *new_bus = NULL, *bus;

lockdep_assert_held(&kvm->slots_lock);

@@ -5441,6 +5441,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
if (i == bus->dev_count)
return 0;

+ if (!IS_ENABLED(CONFIG_X86_64))
new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1),
GFP_KERNEL_ACCOUNT);
if (new_bus) {


The fix is to destroy the target device before bailing. I'll send a proper patch either way, but it would be nice to get confirmation that this is the same bug that you hit with "linux-5.4".

Thanks!

---
virt/kvm/coalesced_mmio.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 0be80c213f7f..5ef88f5a0864 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -187,15 +187,17 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
r = kvm_io_bus_unregister_dev(kvm,
zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev);

+ kvm_iodevice_destructor(&dev->dev);
+
/*
* On failure, unregister destroys all devices on the
* bus _except_ the target device, i.e. coalesced_zones
- * has been modified. No need to restart the walk as
- * there aren't any zones left.
+ * has been modified. Bail after destroying the target
+ * device, there's no need to restart the walk as there
+ * aren't any zones left.
*/
if (r)
break;
- kvm_iodevice_destructor(&dev->dev);
}
}


base-commit: 0f30b25edea48433eb32448990557364436818e6
--