2021-01-22 03:48:27

by Alexey Kardashevskiy

[permalink] [raw]
Subject: BUG: MAX_LOCKDEP_KEYS too low!

Hi!

Syzkaller found this bug and it has a repro (below). I googled a similar
bug in 2019 which was fixed so this seems new.

The repro takes about a half a minute to produce the message, "grep
lock-classes /proc/lockdep_stats" reports 8177 of 8192, before running
the repro it is 702. It is a POWER8 box.

The offender is htab->lockdep_key. If I run repro at the slow rate, no
problems appears, traces show lockdep_unregister_key() is called and the
leak is quite slow.

Is this something known? Any hints how to debug this further? I'd give
it a try since I have an easy reproducer. Thanks,



root@le-dbg:~# egrep "BD.*htab->lockdep_key" /proc/lockdep | wc -l
7449
root@le-dbg:~# egrep "BD.*htab->lockdep_key" /proc/lockdep | tail -n 3
(____ptrval____) FD: 1 BD: 1 ....: &htab->lockdep_key#9531
(____ptrval____) FD: 1 BD: 1 ....: &htab->lockdep_key#9532
(____ptrval____) FD: 1 BD: 1 ....: &htab->lockdep_key#9533


// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define __unix__ 1
#define __gnu_linux__ 1
#define __linux__ 1

#define _GNU_SOURCE

#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

static unsigned long long procid;

static void sleep_ms(uint64_t ms)
{
usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts))
exit(1);
return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static bool write_file(const char* file, const char* what, ...)
{
char buf[1024];
va_list args;
va_start(args, what);
vsnprintf(buf, sizeof(buf), what, args);
va_end(args);
buf[sizeof(buf) - 1] = 0;
int len = strlen(buf);
int fd = open(file, O_WRONLY | O_CLOEXEC);
if (fd == -1)
return false;
if (write(fd, buf, len) != len) {
int err = errno;
close(fd);
errno = err;
return false;
}
close(fd);
return true;
}

static void kill_and_wait(int pid, int* status)
{
kill(-pid, SIGKILL);
kill(pid, SIGKILL);
for (int i = 0; i < 100; i++) {
if (waitpid(-1, status, WNOHANG | __WALL) == pid)
return;
usleep(1000);
}
DIR* dir = opendir("/sys/fs/fuse/connections");
if (dir) {
for (;;) {
struct dirent* ent = readdir(dir);
if (!ent)
break;
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
continue;
char abort[300];
snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
ent->d_name);
int fd = open(abort, O_WRONLY);
if (fd == -1) {
continue;
}
if (write(fd, abort, 1) < 0) {
}
close(fd);
}
closedir(dir);
} else {
}
while (waitpid(-1, status, __WALL) != pid) {
}
}

static void setup_test()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
write_file("/proc/self/oom_score_adj", "1000");
}

static void execute_one(void);

#define WAIT_FLAGS __WALL

static void loop(void)
{
int iter = 0;
for (;; iter++) {
int pid = fork();
if (pid < 0)
exit(1);
if (pid == 0) {
setup_test();
execute_one();
exit(0);
}
int status = 0;
uint64_t start = current_time_ms();
for (;;) {
if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
break;
sleep_ms(1);
if (current_time_ms() - start < 5000) {
continue;
}
kill_and_wait(pid, &status);
break;
}
}
}

#ifndef __NR_bpf
#define __NR_bpf 361
#endif
#ifndef __NR_mmap
#define __NR_mmap 90
#endif

uint64_t r[1] = {0xffffffffffffffff};

void execute_one(void)
{
intptr_t res = 0;
*(uint32_t*)0x20000280 = 9;
*(uint32_t*)0x20000284 = 1;
*(uint32_t*)0x20000288 = 6;
*(uint32_t*)0x2000028c = 5;
*(uint32_t*)0x20000290 = 0;
*(uint32_t*)0x20000294 = -1;
*(uint32_t*)0x20000298 = 0;
*(uint8_t*)0x2000029c = 0;
*(uint8_t*)0x2000029d = 0;
*(uint8_t*)0x2000029e = 0;
*(uint8_t*)0x2000029f = 0;
*(uint8_t*)0x200002a0 = 0;
*(uint8_t*)0x200002a1 = 0;
*(uint8_t*)0x200002a2 = 0;
*(uint8_t*)0x200002a3 = 0;
*(uint8_t*)0x200002a4 = 0;
*(uint8_t*)0x200002a5 = 0;
*(uint8_t*)0x200002a6 = 0;
*(uint8_t*)0x200002a7 = 0;
*(uint8_t*)0x200002a8 = 0;
*(uint8_t*)0x200002a9 = 0;
*(uint8_t*)0x200002aa = 0;
*(uint8_t*)0x200002ab = 0;
*(uint32_t*)0x200002ac = 0;
*(uint32_t*)0x200002b0 = -1;
*(uint32_t*)0x200002b4 = 0;
*(uint32_t*)0x200002b8 = 0;
*(uint32_t*)0x200002bc = 0;
res = syscall(__NR_bpf, 0ul, 0x20000280ul, 0x40ul);
if (res != -1)
r[0] = res;
*(uint64_t*)0x20000100 = 0;
*(uint64_t*)0x20000108 = 0;
*(uint64_t*)0x20000110 = 0x200002c0;
*(uint64_t*)0x20000118 = 0x20000000;
*(uint32_t*)0x20000120 = 0x1000;
*(uint32_t*)0x20000124 = r[0];
*(uint64_t*)0x20000128 = 0;
*(uint64_t*)0x20000130 = 0;
syscall(__NR_bpf, 0x1aul, 0x20000100ul, 0x38ul);

}
int main(void)
{
syscall(__NR_mmap, 0x1fff0000ul, 0x10000ul, 0ul, 0x32ul, -1, 0ul);
syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
syscall(__NR_mmap, 0x21000000ul, 0x10000ul, 0ul, 0x32ul, -1, 0ul);
for (procid = 0; procid < 16; procid++) {
if (fork() == 0) {
loop();
}
}
sleep(1000000);
return 0;
}




--
Alexey


2021-01-22 09:41:09

by Dmitry Vyukov

[permalink] [raw]
Subject: Re: BUG: MAX_LOCKDEP_KEYS too low!

On Fri, Jan 22, 2021 at 4:43 AM Alexey Kardashevskiy <[email protected]> wrote:
>
> Hi!
>
> Syzkaller found this bug and it has a repro (below). I googled a similar
> bug in 2019 which was fixed so this seems new.
>
> The repro takes about a half a minute to produce the message, "grep
> lock-classes /proc/lockdep_stats" reports 8177 of 8192, before running
> the repro it is 702. It is a POWER8 box.
>
> The offender is htab->lockdep_key. If I run repro at the slow rate, no
> problems appears, traces show lockdep_unregister_key() is called and the
> leak is quite slow.
>
> Is this something known? Any hints how to debug this further? I'd give
> it a try since I have an easy reproducer. Thanks,

+netdev as it discusses net namespaces as well

Hi Alexey,

The reproducer only does 2 bpf syscalls, so something is slowly leaking in bpf.
My first suspect would be one of these. Since workqueue is async, it
may cause such slow drain that happens only when tasks are spawned
fast. I don't know if there is a procfs/debugfs introspection file to
monitor workqueue lengths to verify this hypothesis.

$ grep INIT_WORK kernel/bpf/*.c
kernel/bpf/arraymap.c: INIT_WORK(&aux->work, prog_array_map_clear_deferred);
kernel/bpf/cgroup.c: INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
kernel/bpf/core.c: INIT_WORK(&aux->work, bpf_prog_free_deferred);
kernel/bpf/cpumap.c: INIT_WORK(&old_rcpu->kthread_stop_wq,
cpu_map_kthread_stop);
kernel/bpf/syscall.c: INIT_WORK(&map->work, bpf_map_free_deferred);
kernel/bpf/syscall.c: INIT_WORK(&link->work, bpf_link_put_deferred);

However, if it's indeed one of the workqueues, I am not sure how it
should be fixed.
We are having a similar (even worser) problem with async destruction
of network namespaces. These are way slower and take lots of mutexes.
I suspect that lots of hangs on net mutexes on syzbot dashboard are
related to that.

Unbounded async queueing is never a good idea. The classical solution
to this is to make the queue bounded and put back pressure on
producers. In this case it would limit the speed at which new
processes are created and make resource consumption (including # of
lockdep entries) bounded.
The restriction probably needs to be per-callback type, at least for
global workqueues.
However, I suspect that lots of callers of schedule_work can't block
(the reason for moving the work to background in the first place?). So
potentially the back pressure may be need to be applied at a different
point, which makes things a bit more complicated.


> root@le-dbg:~# egrep "BD.*htab->lockdep_key" /proc/lockdep | wc -l
> 7449
> root@le-dbg:~# egrep "BD.*htab->lockdep_key" /proc/lockdep | tail -n 3
> (____ptrval____) FD: 1 BD: 1 ....: &htab->lockdep_key#9531
> (____ptrval____) FD: 1 BD: 1 ....: &htab->lockdep_key#9532
> (____ptrval____) FD: 1 BD: 1 ....: &htab->lockdep_key#9533
>
>
> // autogenerated by syzkaller (https://github.com/google/syzkaller)
>
> #define __unix__ 1
> #define __gnu_linux__ 1
> #define __linux__ 1
>
> #define _GNU_SOURCE
>
> #include <dirent.h>
> #include <endian.h>
> #include <errno.h>
> #include <fcntl.h>
> #include <signal.h>
> #include <stdarg.h>
> #include <stdbool.h>
> #include <stdint.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <sys/prctl.h>
> #include <sys/stat.h>
> #include <sys/syscall.h>
> #include <sys/types.h>
> #include <sys/wait.h>
> #include <time.h>
> #include <unistd.h>
>
> static unsigned long long procid;
>
> static void sleep_ms(uint64_t ms)
> {
> usleep(ms * 1000);
> }
>
> static uint64_t current_time_ms(void)
> {
> struct timespec ts;
> if (clock_gettime(CLOCK_MONOTONIC, &ts))
> exit(1);
> return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
> }
>
> static bool write_file(const char* file, const char* what, ...)
> {
> char buf[1024];
> va_list args;
> va_start(args, what);
> vsnprintf(buf, sizeof(buf), what, args);
> va_end(args);
> buf[sizeof(buf) - 1] = 0;
> int len = strlen(buf);
> int fd = open(file, O_WRONLY | O_CLOEXEC);
> if (fd == -1)
> return false;
> if (write(fd, buf, len) != len) {
> int err = errno;
> close(fd);
> errno = err;
> return false;
> }
> close(fd);
> return true;
> }
>
> static void kill_and_wait(int pid, int* status)
> {
> kill(-pid, SIGKILL);
> kill(pid, SIGKILL);
> for (int i = 0; i < 100; i++) {
> if (waitpid(-1, status, WNOHANG | __WALL) == pid)
> return;
> usleep(1000);
> }
> DIR* dir = opendir("/sys/fs/fuse/connections");
> if (dir) {
> for (;;) {
> struct dirent* ent = readdir(dir);
> if (!ent)
> break;
> if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
> continue;
> char abort[300];
> snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
> ent->d_name);
> int fd = open(abort, O_WRONLY);
> if (fd == -1) {
> continue;
> }
> if (write(fd, abort, 1) < 0) {
> }
> close(fd);
> }
> closedir(dir);
> } else {
> }
> while (waitpid(-1, status, __WALL) != pid) {
> }
> }
>
> static void setup_test()
> {
> prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
> setpgrp();
> write_file("/proc/self/oom_score_adj", "1000");
> }
>
> static void execute_one(void);
>
> #define WAIT_FLAGS __WALL
>
> static void loop(void)
> {
> int iter = 0;
> for (;; iter++) {
> int pid = fork();
> if (pid < 0)
> exit(1);
> if (pid == 0) {
> setup_test();
> execute_one();
> exit(0);
> }
> int status = 0;
> uint64_t start = current_time_ms();
> for (;;) {
> if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
> break;
> sleep_ms(1);
> if (current_time_ms() - start < 5000) {
> continue;
> }
> kill_and_wait(pid, &status);
> break;
> }
> }
> }
>
> #ifndef __NR_bpf
> #define __NR_bpf 361
> #endif
> #ifndef __NR_mmap
> #define __NR_mmap 90
> #endif
>
> uint64_t r[1] = {0xffffffffffffffff};
>
> void execute_one(void)
> {
> intptr_t res = 0;
> *(uint32_t*)0x20000280 = 9;
> *(uint32_t*)0x20000284 = 1;
> *(uint32_t*)0x20000288 = 6;
> *(uint32_t*)0x2000028c = 5;
> *(uint32_t*)0x20000290 = 0;
> *(uint32_t*)0x20000294 = -1;
> *(uint32_t*)0x20000298 = 0;
> *(uint8_t*)0x2000029c = 0;
> *(uint8_t*)0x2000029d = 0;
> *(uint8_t*)0x2000029e = 0;
> *(uint8_t*)0x2000029f = 0;
> *(uint8_t*)0x200002a0 = 0;
> *(uint8_t*)0x200002a1 = 0;
> *(uint8_t*)0x200002a2 = 0;
> *(uint8_t*)0x200002a3 = 0;
> *(uint8_t*)0x200002a4 = 0;
> *(uint8_t*)0x200002a5 = 0;
> *(uint8_t*)0x200002a6 = 0;
> *(uint8_t*)0x200002a7 = 0;
> *(uint8_t*)0x200002a8 = 0;
> *(uint8_t*)0x200002a9 = 0;
> *(uint8_t*)0x200002aa = 0;
> *(uint8_t*)0x200002ab = 0;
> *(uint32_t*)0x200002ac = 0;
> *(uint32_t*)0x200002b0 = -1;
> *(uint32_t*)0x200002b4 = 0;
> *(uint32_t*)0x200002b8 = 0;
> *(uint32_t*)0x200002bc = 0;
> res = syscall(__NR_bpf, 0ul, 0x20000280ul, 0x40ul);
> if (res != -1)
> r[0] = res;
> *(uint64_t*)0x20000100 = 0;
> *(uint64_t*)0x20000108 = 0;
> *(uint64_t*)0x20000110 = 0x200002c0;
> *(uint64_t*)0x20000118 = 0x20000000;
> *(uint32_t*)0x20000120 = 0x1000;
> *(uint32_t*)0x20000124 = r[0];
> *(uint64_t*)0x20000128 = 0;
> *(uint64_t*)0x20000130 = 0;
> syscall(__NR_bpf, 0x1aul, 0x20000100ul, 0x38ul);
>
> }
> int main(void)
> {
> syscall(__NR_mmap, 0x1fff0000ul, 0x10000ul, 0ul, 0x32ul, -1, 0ul);
> syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
> syscall(__NR_mmap, 0x21000000ul, 0x10000ul, 0ul, 0x32ul, -1, 0ul);
> for (procid = 0; procid < 16; procid++) {
> if (fork() == 0) {
> loop();
> }
> }
> sleep(1000000);
> return 0;
> }
>
>
>
>
> --
> Alexey

2021-01-22 22:58:39

by Alexey Kardashevskiy

[permalink] [raw]
Subject: Re: BUG: MAX_LOCKDEP_KEYS too low!



On 23/01/2021 02:30, Tetsuo Handa wrote:
> On 2021/01/22 22:28, Tetsuo Handa wrote:
>> On 2021/01/22 21:10, Dmitry Vyukov wrote:
>>> On Fri, Jan 22, 2021 at 1:03 PM Alexey Kardashevskiy <[email protected]> wrote:
>>>>
>>>>
>>>>
>>>> On 22/01/2021 21:30, Tetsuo Handa wrote:
>>>>> On 2021/01/22 18:16, Dmitry Vyukov wrote:
>>>>>> The reproducer only does 2 bpf syscalls, so something is slowly leaking in bpf.
>>>>>> My first suspect would be one of these. Since workqueue is async, it
>>>>>> may cause such slow drain that happens only when tasks are spawned
>>>>>> fast. I don't know if there is a procfs/debugfs introspection file to
>>>>>> monitor workqueue lengths to verify this hypothesis.
>>>>>
>>>>> If you can reproduce locally, you can call show_workqueue_state()
>>>>> (part of SysRq-t) when hitting the limit.
>>>>>
>>>>> --- a/kernel/locking/lockdep.c
>>>>> +++ b/kernel/locking/lockdep.c
>>>>> @@ -1277,6 +1277,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
>>>>>
>>>>> print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
>>>>> dump_stack();
>>>>> + show_workqueue_state();
>>>>> return NULL;
>>>>> }
>>>>> nr_lock_classes++;
>>>>>
>>>>
>>>>
>>>>
>>>> Here is the result:
>>>> https://pastebin.com/rPn0Cytu
>>>
>>> Do you mind posting this publicly?
>>> Yes, it seems that bpf_map_free_deferred is the problem (11138
>>> outstanding callbacks).
>>>
>>
>> Wow. Horribly stuck queue. I guess BPF wants to try WQ created by
>>
>> alloc_workqueue("bpf_free_wq", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
>>
>> rather than system_wq . You can add Tejun Heo <[email protected]> for WQ.
>>
>> Anyway, please post your result to ML.



https://pastebin.com/JfrmzguK is with the patch below applied. Seems
less output. Interestingly when I almost hit "send", OOM kicked in and
tried killing a bunch of "maxlockdep" processes (my test case):

[ 891.037315] [ 31007] 0 31007 281 5 49152 0
1000 maxlockdep
[ 891.037540] [ 31009] 0 31009 281 5 49152 0
1000 maxlockdep
[ 891.037760] [ 31012] 0 31012 281 5 49152 0
1000 maxlockdep
[ 891.037980] [ 31013] 0 31013 281 5 47104 0
0 maxlockdep
[ 891.038210] [ 31014] 0 31014 281 5 49152 0
1000 maxlockdep
[ 891.038429] [ 31018] 0 31018 281 5 47104 0
0 maxlockdep
[ 891.038652] [ 31019] 0 31019 281 5 49152 0
1000 maxlockdep
[ 891.038874] [ 31020] 0 31020 281 5 49152 0
1000 maxlockdep
[ 891.039095] [ 31021] 0 31021 281 5 49152 0
1000 maxlockdep
[ 891.039317] [ 31022] 0 31022 281 5 47104 0
0 maxlockdep




And (re)adding LKML and Tejun as suggested. Thanks,


>>
>
> Does this patch (which is only compile tested) reduce number of pending works
> when hitting "BUG: MAX_LOCKDEP_KEYS too low!" ?
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 07cb5d15e743..c6c6902090f0 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -41,6 +41,7 @@ struct bpf_local_storage_map;
> struct kobject;
> struct mem_cgroup;
>
> +extern struct workqueue_struct *bpf_free_wq;
> extern struct idr btf_idr;
> extern spinlock_t btf_idr_lock;
> extern struct kobject *btf_kobj;
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 1f8453343bf2..8b1cf6aab089 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -994,7 +994,7 @@ static void prog_array_map_clear(struct bpf_map *map)
> struct bpf_array_aux *aux = container_of(map, struct bpf_array,
> map)->aux;
> bpf_map_inc(map);
> - schedule_work(&aux->work);
> + queue_work(bpf_free_wq, &aux->work);
> }
>
> static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 96555a8a2c54..f272844163df 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -160,7 +160,7 @@ static void cgroup_bpf_release_fn(struct percpu_ref *ref)
> struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
>
> INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
> - queue_work(system_wq, &cgrp->bpf.release_work);
> + queue_work(bpf_free_wq, &cgrp->bpf.release_work);
> }
>
> /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 261f8692d0d2..9d76c0d77687 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -34,6 +34,15 @@
> #include <linux/log2.h>
> #include <asm/unaligned.h>
>
> +struct workqueue_struct *bpf_free_wq;
> +
> +static int __init bpf_free_wq_init(void)
> +{
> + bpf_free_wq = alloc_workqueue("bpf_free_wq", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
> + return 0;
> +}
> +subsys_initcall(bpf_free_wq_init);
> +
> /* Registers */
> #define BPF_R0 regs[BPF_REG_0]
> #define BPF_R1 regs[BPF_REG_1]
> @@ -2152,7 +2161,7 @@ void bpf_prog_free(struct bpf_prog *fp)
> if (aux->dst_prog)
> bpf_prog_put(aux->dst_prog);
> INIT_WORK(&aux->work, bpf_prog_free_deferred);
> - schedule_work(&aux->work);
> + queue_work(bpf_free_wq, &aux->work);
> }
> EXPORT_SYMBOL_GPL(bpf_prog_free);
>
> diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
> index 747313698178..6507cc8263fc 100644
> --- a/kernel/bpf/cpumap.c
> +++ b/kernel/bpf/cpumap.c
> @@ -515,7 +515,7 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
> if (old_rcpu) {
> call_rcu(&old_rcpu->rcu, __cpu_map_entry_free);
> INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop);
> - schedule_work(&old_rcpu->kthread_stop_wq);
> + queue_work(bpf_free_wq, &old_rcpu->kthread_stop_wq);
> }
> }
>
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index e5999d86c76e..084b903b4ee6 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -477,7 +477,7 @@ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
> bpf_map_free_id(map, do_idr_lock);
> btf_put(map->btf);
> INIT_WORK(&map->work, bpf_map_free_deferred);
> - schedule_work(&map->work);
> + queue_work(bpf_free_wq, &map->work);
> }
> }
>
> @@ -2343,7 +2343,7 @@ void bpf_link_put(struct bpf_link *link)
>
> if (in_atomic()) {
> INIT_WORK(&link->work, bpf_link_put_deferred);
> - schedule_work(&link->work);
> + queue_work(bpf_free_wq, &link->work);
> } else {
> bpf_link_free(link);
> }
>

--
Alexey

2021-01-23 00:44:02

by Tetsuo Handa

[permalink] [raw]
Subject: BPF: unbounded bpf_map_free_deferred problem

Hello, BPF developers.

Alexey Kardashevskiy is reporting that system_wq gets stuck due to flooding of
unbounded bpf_map_free_deferred work. Use of WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND
workqueue did not solve this problem. Is it possible that a refcount leak somewhere
preventing bpf_map_free_deferred from completing? Please see
https://lkml.kernel.org/r/CACT4Y+Z+kwPM=WUzJ-e359PWeLLqmF0w4Yxp1spzZ=+J0ekrag@mail.gmail.com .

On 2021/01/23 7:53, Alexey Kardashevskiy wrote:
>
>
> On 23/01/2021 02:30, Tetsuo Handa wrote:
>> On 2021/01/22 22:28, Tetsuo Handa wrote:
>>> On 2021/01/22 21:10, Dmitry Vyukov wrote:
>>>> On Fri, Jan 22, 2021 at 1:03 PM Alexey Kardashevskiy <[email protected]> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 22/01/2021 21:30, Tetsuo Handa wrote:
>>>>>> On 2021/01/22 18:16, Dmitry Vyukov wrote:
>>>>>>> The reproducer only does 2 bpf syscalls, so something is slowly leaking in bpf.
>>>>>>> My first suspect would be one of these. Since workqueue is async, it
>>>>>>> may cause such slow drain that happens only when tasks are spawned
>>>>>>> fast. I don't know if there is a procfs/debugfs introspection file to
>>>>>>> monitor workqueue lengths to verify this hypothesis.
>>>>>>
>>>>>> If you can reproduce locally, you can call show_workqueue_state()
>>>>>> (part of SysRq-t) when hitting the limit.
>>>>>>
>>>>>> --- a/kernel/locking/lockdep.c
>>>>>> +++ b/kernel/locking/lockdep.c
>>>>>> @@ -1277,6 +1277,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
>>>>>>
>>>>>>                   print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
>>>>>>                   dump_stack();
>>>>>> +               show_workqueue_state();
>>>>>>                   return NULL;
>>>>>>           }
>>>>>>           nr_lock_classes++;
>>>>>>
>>>>>
>>>>>
>>>>>
>>>>> Here is the result:
>>>>> https://pastebin.com/rPn0Cytu
>>>>
>>>> Do you mind posting this publicly?
>>>> Yes, it seems that bpf_map_free_deferred is the problem (11138
>>>> outstanding callbacks).
>>>>
>>>
>>> Wow. Horribly stuck queue. I guess BPF wants to try WQ created by
>>>
>>>    alloc_workqueue("bpf_free_wq", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
>>>
>>> rather than system_wq . You can add Tejun Heo <[email protected]> for WQ.
>>>
>>> Anyway, please post your result to ML.
>
>
>
> https://pastebin.com/JfrmzguK is with the patch below applied. Seems less output. Interestingly when I almost hit "send", OOM kicked in and tried killing a bunch of "maxlockdep" processes (my test case):
>
> [  891.037315] [  31007]     0 31007      281        5    49152        0          1000 maxlockdep
> [  891.037540] [  31009]     0 31009      281        5    49152        0          1000 maxlockdep
> [  891.037760] [  31012]     0 31012      281        5    49152        0          1000 maxlockdep
> [  891.037980] [  31013]     0 31013      281        5    47104        0             0 maxlockdep
> [  891.038210] [  31014]     0 31014      281        5    49152        0          1000 maxlockdep
> [  891.038429] [  31018]     0 31018      281        5    47104        0             0 maxlockdep
> [  891.038652] [  31019]     0 31019      281        5    49152        0          1000 maxlockdep
> [  891.038874] [  31020]     0 31020      281        5    49152        0          1000 maxlockdep
> [  891.039095] [  31021]     0 31021      281        5    49152        0          1000 maxlockdep
> [  891.039317] [  31022]     0 31022      281        5    47104        0             0 maxlockdep
>
>
>
>
> And (re)adding LKML and Tejun as suggested. Thanks,
>
>
>>>
>>
>> Does this patch (which is only compile tested) reduce number of pending works
>> when hitting "BUG: MAX_LOCKDEP_KEYS too low!" ?
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 07cb5d15e743..c6c6902090f0 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -41,6 +41,7 @@ struct bpf_local_storage_map;
>>   struct kobject;
>>   struct mem_cgroup;
>>   +extern struct workqueue_struct *bpf_free_wq;
>>   extern struct idr btf_idr;
>>   extern spinlock_t btf_idr_lock;
>>   extern struct kobject *btf_kobj;
>> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
>> index 1f8453343bf2..8b1cf6aab089 100644
>> --- a/kernel/bpf/arraymap.c
>> +++ b/kernel/bpf/arraymap.c
>> @@ -994,7 +994,7 @@ static void prog_array_map_clear(struct bpf_map *map)
>>       struct bpf_array_aux *aux = container_of(map, struct bpf_array,
>>                            map)->aux;
>>       bpf_map_inc(map);
>> -    schedule_work(&aux->work);
>> +    queue_work(bpf_free_wq, &aux->work);
>>   }
>>     static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
>> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
>> index 96555a8a2c54..f272844163df 100644
>> --- a/kernel/bpf/cgroup.c
>> +++ b/kernel/bpf/cgroup.c
>> @@ -160,7 +160,7 @@ static void cgroup_bpf_release_fn(struct percpu_ref *ref)
>>       struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
>>         INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
>> -    queue_work(system_wq, &cgrp->bpf.release_work);
>> +    queue_work(bpf_free_wq, &cgrp->bpf.release_work);
>>   }
>>     /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
>> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
>> index 261f8692d0d2..9d76c0d77687 100644
>> --- a/kernel/bpf/core.c
>> +++ b/kernel/bpf/core.c
>> @@ -34,6 +34,15 @@
>>   #include <linux/log2.h>
>>   #include <asm/unaligned.h>
>>   +struct workqueue_struct *bpf_free_wq;
>> +
>> +static int __init bpf_free_wq_init(void)
>> +{
>> +    bpf_free_wq = alloc_workqueue("bpf_free_wq", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
>> +    return 0;
>> +}
>> +subsys_initcall(bpf_free_wq_init);
>> +
>>   /* Registers */
>>   #define BPF_R0    regs[BPF_REG_0]
>>   #define BPF_R1    regs[BPF_REG_1]
>> @@ -2152,7 +2161,7 @@ void bpf_prog_free(struct bpf_prog *fp)
>>       if (aux->dst_prog)
>>           bpf_prog_put(aux->dst_prog);
>>       INIT_WORK(&aux->work, bpf_prog_free_deferred);
>> -    schedule_work(&aux->work);
>> +    queue_work(bpf_free_wq, &aux->work);
>>   }
>>   EXPORT_SYMBOL_GPL(bpf_prog_free);
>>   diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
>> index 747313698178..6507cc8263fc 100644
>> --- a/kernel/bpf/cpumap.c
>> +++ b/kernel/bpf/cpumap.c
>> @@ -515,7 +515,7 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
>>       if (old_rcpu) {
>>           call_rcu(&old_rcpu->rcu, __cpu_map_entry_free);
>>           INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop);
>> -        schedule_work(&old_rcpu->kthread_stop_wq);
>> +        queue_work(bpf_free_wq, &old_rcpu->kthread_stop_wq);
>>       }
>>   }
>>   diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index e5999d86c76e..084b903b4ee6 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -477,7 +477,7 @@ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
>>           bpf_map_free_id(map, do_idr_lock);
>>           btf_put(map->btf);
>>           INIT_WORK(&map->work, bpf_map_free_deferred);
>> -        schedule_work(&map->work);
>> +        queue_work(bpf_free_wq, &map->work);
>>       }
>>   }
>>   @@ -2343,7 +2343,7 @@ void bpf_link_put(struct bpf_link *link)
>>         if (in_atomic()) {
>>           INIT_WORK(&link->work, bpf_link_put_deferred);
>> -        schedule_work(&link->work);
>> +        queue_work(bpf_free_wq, &link->work);
>>       } else {
>>           bpf_link_free(link);
>>       }
>>
>

2021-01-23 03:31:04

by Cong Wang

[permalink] [raw]
Subject: Re: BPF: unbounded bpf_map_free_deferred problem

On Fri, Jan 22, 2021 at 4:42 PM Tetsuo Handa
<[email protected]> wrote:
>
> Hello, BPF developers.
>
> Alexey Kardashevskiy is reporting that system_wq gets stuck due to flooding of
> unbounded bpf_map_free_deferred work. Use of WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND
> workqueue did not solve this problem. Is it possible that a refcount leak somewhere
> preventing bpf_map_free_deferred from completing? Please see
> https://lkml.kernel.org/r/CACT4Y+Z+kwPM=WUzJ-e359PWeLLqmF0w4Yxp1spzZ=+J0ekrag@mail.gmail.com .
>

Which map does the reproducer create? And where exactly do
those work block on?

Different map->ops->map_free() waits for different reasons,
for example, htab_map_free() waits for flying htab_elem_free_rcu().
I can't immediately see how they could wait for each other, if this
is what you meant above.

Thanks.

2021-01-23 06:39:09

by Alexey Kardashevskiy

[permalink] [raw]
Subject: Re: BUG: MAX_LOCKDEP_KEYS too low!



On 23/01/2021 17:01, Hillf Danton wrote:
> On Sat, 23 Jan 2021 09:53:42 +1100 Alexey Kardashevskiy wrote:
>> On 23/01/2021 02:30, Tetsuo Handa wrote:
>>> On 2021/01/22 22:28, Tetsuo Handa wrote:
>>>> On 2021/01/22 21:10, Dmitry Vyukov wrote:
>>>>> On Fri, Jan 22, 2021 at 1:03 PM Alexey Kardashevskiy <[email protected]> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 22/01/2021 21:30, Tetsuo Handa wrote:
>>>>>>> On 2021/01/22 18:16, Dmitry Vyukov wrote:
>>>>>>>> The reproducer only does 2 bpf syscalls, so something is slowly leaking in bpf.
>>>>>>>> My first suspect would be one of these. Since workqueue is async, it
>>>>>>>> may cause such slow drain that happens only when tasks are spawned
>>>>>>>> fast. I don't know if there is a procfs/debugfs introspection file to
>>>>>>>> monitor workqueue lengths to verify this hypothesis.
>>>>>>>
>>>>>>> If you can reproduce locally, you can call show_workqueue_state()
>>>>>>> (part of SysRq-t) when hitting the limit.
>>>>>>>
>>>>>>> --- a/kernel/locking/lockdep.c
>>>>>>> +++ b/kernel/locking/lockdep.c
>>>>>>> @@ -1277,6 +1277,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
>>>>>>>
>>>>>>> print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
>>>>>>> dump_stack();
>>>>>>> + show_workqueue_state();
>>>>>>> return NULL;
>>>>>>> }
>>>>>>> nr_lock_classes++;
>>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> Here is the result:
>>>>>> https://pastebin.com/rPn0Cytu
>>>>>
>>>>> Do you mind posting this publicly?
>>>>> Yes, it seems that bpf_map_free_deferred is the problem (11138
>>>>> outstanding callbacks).
>
> Need to set up a local queue for releasing bpf maps if 10,000 means
> flooding.
>
>>>>>
>>>>
>>>> Wow. Horribly stuck queue. I guess BPF wants to try WQ created by
>>>>
>>>> alloc_workqueue("bpf_free_wq", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
>>>>
>>>> rather than system_wq . You can add Tejun Heo <[email protected]> for WQ.
>>>>
>>>> Anyway, please post your result to ML.
>>
>> https://pastebin.com/JfrmzguK is with the patch below applied. Seems
>> less output. Interestingly when I almost hit "send", OOM kicked in and
>> tried killing a bunch of "maxlockdep" processes (my test case):
>>
>> [ 891.037315] [ 31007] 0 31007 281 5 49152 0
>> 1000 maxlockdep
>> [ 891.037540] [ 31009] 0 31009 281 5 49152 0
>> 1000 maxlockdep
>> [ 891.037760] [ 31012] 0 31012 281 5 49152 0
>> 1000 maxlockdep
>> [ 891.037980] [ 31013] 0 31013 281 5 47104 0
>> 0 maxlockdep
>> [ 891.038210] [ 31014] 0 31014 281 5 49152 0
>> 1000 maxlockdep
>> [ 891.038429] [ 31018] 0 31018 281 5 47104 0
>> 0 maxlockdep
>> [ 891.038652] [ 31019] 0 31019 281 5 49152 0
>> 1000 maxlockdep
>> [ 891.038874] [ 31020] 0 31020 281 5 49152 0
>> 1000 maxlockdep
>> [ 891.039095] [ 31021] 0 31021 281 5 49152 0
>> 1000 maxlockdep
>> [ 891.039317] [ 31022] 0 31022 281 5 47104 0
>> 0 maxlockdep
>>
>
> A local queue, the mix of list head and spin lock, helps to collapse
> the entities of map->work into one work in order to cut the risk of
> work flooding to WQ.
>
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -448,16 +448,40 @@ static void bpf_map_release_memcg(struct
> }
> #endif
>
> -/* called from workqueue */
> +static int worker_idle = 1;
> +static LIST_HEAD(bpf_map_work_list);
> +static DEFINE_SPINLOCK(bpf_map_work_lock);
> +
> static void bpf_map_free_deferred(struct work_struct *work)
> {
> - struct bpf_map *map = container_of(work, struct bpf_map, work);
> + struct bpf_map *map;
> +
> + worker_idle = 0;
> +again:
> + map = NULL;
> + spin_lock_irq(&bpf_map_work_lock);
> +
> + if (!list_empty(&bpf_map_work_list)) {
> + map = list_first_entry(&bpf_map_work_list, struct bpf_map,
> + work_list);
> + list_del_init(&map->work_list);
> + } else
> + worker_idle = 1;
> +
> + spin_unlock_irq(&bpf_map_work_lock);
> +
> + if (!map)
> + return;
>
> security_bpf_map_free(map);
> bpf_map_release_memcg(map);
> /* implementation dependent freeing */
> map->ops->map_free(map);
> +
> + cond_resched();
> + goto again;
> }
> +static DECLARE_WORK(bpf_map_release_work, bpf_map_free_deferred);
>
> static void bpf_map_put_uref(struct bpf_map *map)
> {
> @@ -473,11 +497,20 @@ static void bpf_map_put_uref(struct bpf_
> static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
> {
> if (atomic64_dec_and_test(&map->refcnt)) {
> + unsigned long flags;
> + int idle;
> +
> /* bpf_map_free_id() must be called first */
> bpf_map_free_id(map, do_idr_lock);
> btf_put(map->btf);
> - INIT_WORK(&map->work, bpf_map_free_deferred);
> - schedule_work(&map->work);
> +
> + spin_lock_irqsave(&bpf_map_work_lock, flags);
> + idle = worker_idle;
> + list_add(&map->work_list, &bpf_map_work_list);
> + spin_unlock_irqrestore(&bpf_map_work_lock, flags);
> +
> + if (idle)
> + queue_work(system_unbound_wq, &bpf_map_release_work);
> }
> }
>
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -149,7 +149,7 @@ struct bpf_map {
> */
> atomic64_t refcnt ____cacheline_aligned;
> atomic64_t usercnt;
> - struct work_struct work;
> + struct list_head work_list;
> struct mutex freeze_mutex;
> u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */
> };
> --
>

this behaves quite different but still produces the message (i have
show_workqueue_state() right after the bug message):


[ 85.803991] BUG: MAX_LOCKDEP_KEYS too low!
[ 85.804338] turning off the locking correctness validator.
[ 85.804474] Showing busy workqueues and worker pools:
[ 85.804620] workqueue events_unbound: flags=0x2
[ 85.804764] pwq 16: cpus=0-7 flags=0x4 nice=0 active=1/512 refcnt=3
[ 85.804965] in-flight: 81:bpf_map_free_deferred
[ 85.805229] workqueue events_power_efficient: flags=0x80
[ 85.805357] pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
[ 85.805558] in-flight: 57:gc_worker
[ 85.805877] pool 4: cpus=2 node=0 flags=0x0 nice=0 hung=0s workers=3
idle: 82 24
[ 85.806147] pool 16: cpus=0-7 flags=0x4 nice=0 hung=69s workers=3
idle: 7 251
^C[ 100.129747] maxlockdep (5104) used greatest stack depth: 8032 bytes
left

root@le-dbg:~# grep "lock-classes" /proc/lockdep_stats
lock-classes: 8192 [max: 8192]





--
Alexey

2021-01-23 10:34:28

by Tetsuo Handa

[permalink] [raw]
Subject: Re: BUG: MAX_LOCKDEP_KEYS too low!

On 2021/01/23 15:35, Alexey Kardashevskiy wrote:
> this behaves quite different but still produces the message (i have show_workqueue_state() right after the bug message):
>
>
> [   85.803991] BUG: MAX_LOCKDEP_KEYS too low!
> [   85.804338] turning off the locking correctness validator.
> [   85.804474] Showing busy workqueues and worker pools:
> [   85.804620] workqueue events_unbound: flags=0x2
> [   85.804764]   pwq 16: cpus=0-7 flags=0x4 nice=0 active=1/512 refcnt=3
> [   85.804965]     in-flight: 81:bpf_map_free_deferred
> [   85.805229] workqueue events_power_efficient: flags=0x80
> [   85.805357]   pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
> [   85.805558]     in-flight: 57:gc_worker
> [   85.805877] pool 4: cpus=2 node=0 flags=0x0 nice=0 hung=0s workers=3 idle: 82 24
> [   85.806147] pool 16: cpus=0-7 flags=0x4 nice=0 hung=69s workers=3 idle: 7 251
> ^C[  100.129747] maxlockdep (5104) used greatest stack depth: 8032 bytes left
>
> root@le-dbg:~# grep "lock-classes" /proc/lockdep_stats
>  lock-classes:                         8192 [max: 8192]
>

Right. Hillf's patch can reduce number of active workqueue's worker threads, for
only one worker thread can call bpf_map_free_deferred() (which is nice because
it avoids bloat of active= and refcnt= fields). But Hillf's patch is not for
fixing the cause of "BUG: MAX_LOCKDEP_KEYS too low!" message.

Like Dmitry mentioned, bpf syscall allows producing work items faster than
bpf_map_free_deferred() can consume. (And a similar problem is observed for
network namespaces.) Unless there is a bug that prevents bpf_map_free_deferred()
from completing, the classical solution is to put pressure on producers (i.e.
slow down bpf syscall side) in a way that consumers (i.e. __bpf_map_put())
will not schedule thousands of backlog "struct bpf_map" works.

2021-01-23 11:36:42

by Alexey Kardashevskiy

[permalink] [raw]
Subject: Re: BUG: MAX_LOCKDEP_KEYS too low!



On 23/01/2021 21:29, Tetsuo Handa wrote:
> On 2021/01/23 15:35, Alexey Kardashevskiy wrote:
>> this behaves quite different but still produces the message (i have show_workqueue_state() right after the bug message):
>>
>>
>> [   85.803991] BUG: MAX_LOCKDEP_KEYS too low!
>> [   85.804338] turning off the locking correctness validator.
>> [   85.804474] Showing busy workqueues and worker pools:
>> [   85.804620] workqueue events_unbound: flags=0x2
>> [   85.804764]   pwq 16: cpus=0-7 flags=0x4 nice=0 active=1/512 refcnt=3
>> [   85.804965]     in-flight: 81:bpf_map_free_deferred
>> [   85.805229] workqueue events_power_efficient: flags=0x80
>> [   85.805357]   pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
>> [   85.805558]     in-flight: 57:gc_worker
>> [   85.805877] pool 4: cpus=2 node=0 flags=0x0 nice=0 hung=0s workers=3 idle: 82 24
>> [   85.806147] pool 16: cpus=0-7 flags=0x4 nice=0 hung=69s workers=3 idle: 7 251
>> ^C[  100.129747] maxlockdep (5104) used greatest stack depth: 8032 bytes left
>>
>> root@le-dbg:~# grep "lock-classes" /proc/lockdep_stats
>>  lock-classes:                         8192 [max: 8192]
>>
>
> Right. Hillf's patch can reduce number of active workqueue's worker threads, for
> only one worker thread can call bpf_map_free_deferred() (which is nice because
> it avoids bloat of active= and refcnt= fields). But Hillf's patch is not for
> fixing the cause of "BUG: MAX_LOCKDEP_KEYS too low!" message.
>
> Like Dmitry mentioned, bpf syscall allows producing work items faster than
> bpf_map_free_deferred() can consume. (And a similar problem is observed for
> network namespaces.) Unless there is a bug that prevents bpf_map_free_deferred()
> from completing, the classical solution is to put pressure on producers (i.e.
> slow down bpf syscall side) in a way that consumers (i.e. __bpf_map_put())
> will not schedule thousands of backlog "struct bpf_map" works.


Should not the first 8192 from "grep lock-classes /proc/lockdep_stats"
decrease after time (it does not), or once it has failed, it is permanent?




--
Alexey

2021-01-23 13:16:29

by Tetsuo Handa

[permalink] [raw]
Subject: Re: BUG: MAX_LOCKDEP_KEYS too low!

On 2021/01/23 20:26, Alexey Kardashevskiy wrote:
> Should not the first 8192 from "grep lock-classes /proc/lockdep_stats" decrease
> after time (it does not), or once it has failed, it is permanent?

Since lockdep_unregister_key() becomes a no-op because graph_lock() returns 0
due to debug_locks being changed from 1 to 0 by __debug_locks_off() from
debug_locks_off() from debug_locks_off_graph_unlock(), lock-classes value in
/proc/lockdep_stats will not decrease after "BUG: MAX_LOCKDEP_KEYS too low!"
is printed.