2021-03-16 19:50:55

by Oliver Glitta

[permalink] [raw]
Subject: [PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality

From: Oliver Glitta <[email protected]>

SLUB has resiliency_test() function which is hidden behind #ifdef
SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody
runs it. Kselftest should proper replacement for it.

Try changing byte in redzone after allocation and changing
pointer to next free node, first byte, 50th byte and redzone
byte. Check if validation finds errors.

There are several differences from the original resiliency test:
Tests create own caches with known state instead of corrupting
shared kmalloc caches.

The corruption of freepointer uses correct offset, the original
resiliency test got broken with freepointer changes.

Scratch changing random byte test, because it does not have
meaning in this form where we need deterministic results.

Add new option CONFIG_TEST_SLUB in Kconfig.

Add parameter to function validate_slab_cache() to return
number of errors in cache.

Signed-off-by: Oliver Glitta <[email protected]>
---
lib/Kconfig.debug | 4 +
lib/Makefile | 1 +
lib/test_slub.c | 125 +++++++++++++++++++++++++++
mm/slab.h | 1 +
mm/slub.c | 34 +++++---
tools/testing/selftests/lib/Makefile | 2 +-
tools/testing/selftests/lib/config | 1 +
tools/testing/selftests/lib/slub.sh | 3 +
8 files changed, 159 insertions(+), 12 deletions(-)
create mode 100644 lib/test_slub.c
create mode 100755 tools/testing/selftests/lib/slub.sh

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2779c29d9981..2d56092abbc4 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2123,6 +2123,10 @@ config TEST_KSTRTOX
config TEST_PRINTF
tristate "Test printf() family of functions at runtime"

+config TEST_SLUB
+ tristate "Test SLUB cache errors at runtime"
+ depends on SLUB_DEBUG
+
config TEST_BITMAP
tristate "Test bitmap_*() family of functions at runtime"
help
diff --git a/lib/Makefile b/lib/Makefile
index b5307d3eec1a..b6603803b1c4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
obj-$(CONFIG_TEST_PRINTF) += test_printf.o
+obj-$(CONFIG_TEST_SLUB) += test_slub.o
obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
obj-$(CONFIG_TEST_UUID) += test_uuid.o
diff --git a/lib/test_slub.c b/lib/test_slub.c
new file mode 100644
index 000000000000..0075d9b44251
--- /dev/null
+++ b/lib/test_slub.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test cases for slub facility.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include "../mm/slab.h"
+
+#include "../tools/testing/selftests/kselftest_module.h"
+
+
+KSTM_MODULE_GLOBALS();
+
+
+static void __init validate_result(struct kmem_cache *s, int expected_errors)
+{
+ int errors = 0;
+
+ validate_slab_cache(s, &errors);
+ KSTM_CHECK_ZERO(errors - expected_errors);
+}
+
+static void __init test_clobber_zone(void)
+{
+ struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
+ SLAB_RED_ZONE, NULL);
+ u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+ p[64] = 0x12;
+ pr_err("1. kmem_cache: Clobber Redzone 0x12->0x%p\n", p + 64);
+
+ validate_result(s, 1);
+ kmem_cache_free(s, p);
+ kmem_cache_destroy(s);
+}
+
+static void __init test_next_pointer(void)
+{
+ struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
+ SLAB_RED_ZONE, NULL);
+ u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+ kmem_cache_free(s, p);
+ p[s->offset] = 0x12;
+ pr_err("1. kmem_cache: Clobber next pointer 0x34 -> -0x%p\n", p);
+
+ validate_result(s, 1);
+ kmem_cache_destroy(s);
+}
+
+static void __init test_first_word(void)
+{
+ struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
+ SLAB_POISON, NULL);
+ u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+ kmem_cache_free(s, p);
+ *p = 0x78;
+ pr_err("2. kmem_cache: Clobber first word 0x78->0x%p\n", p);
+
+ validate_result(s, 1);
+ kmem_cache_destroy(s);
+}
+
+static void __init test_clobber_50th_byte(void)
+{
+ struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
+ SLAB_POISON, NULL);
+ u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+ kmem_cache_free(s, p);
+ p[50] = 0x9a;
+ pr_err("3. kmem_cache: Clobber 50th byte 0x9a->0x%p\n", p);
+
+ validate_result(s, 1);
+ kmem_cache_destroy(s);
+}
+
+static void __init test_clobber_redzone_free(void)
+{
+ struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
+ SLAB_RED_ZONE, NULL);
+ u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+ kmem_cache_free(s, p);
+ p[64] = 0xab;
+ pr_err("4. kmem_cache: Clobber redzone 0xab->0x%p\n", p);
+
+ validate_result(s, 1);
+ kmem_cache_destroy(s);
+}
+
+static void __init resiliency_test(void)
+{
+
+ BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
+
+ pr_err("SLUB resiliency testing\n");
+ pr_err("-----------------------\n");
+ pr_err("A. Corruption after allocation\n");
+
+ test_clobber_zone();
+
+ pr_err("\nB. Corruption after free\n");
+
+ test_next_pointer();
+ test_first_word();
+ test_clobber_50th_byte();
+ test_clobber_redzone_free();
+}
+
+
+static void __init selftest(void)
+{
+ resiliency_test();
+}
+
+
+KSTM_MODULE_LOADERS(test_slub);
+MODULE_LICENSE("GPL");
diff --git a/mm/slab.h b/mm/slab.h
index 076582f58f68..5fc18d506b3b 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -215,6 +215,7 @@ DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
#endif
extern void print_tracking(struct kmem_cache *s, void *object);
+long validate_slab_cache(struct kmem_cache *s, int *errors);
#else
static inline void print_tracking(struct kmem_cache *s, void *object)
{
diff --git a/mm/slub.c b/mm/slub.c
index e26c274b4657..c00e2b263e03 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4612,7 +4612,8 @@ static int count_total(struct page *page)
#endif

#ifdef CONFIG_SLUB_DEBUG
-static void validate_slab(struct kmem_cache *s, struct page *page)
+static void validate_slab(struct kmem_cache *s, struct page *page,
+ int *errors)
{
void *p;
void *addr = page_address(page);
@@ -4620,8 +4621,10 @@ static void validate_slab(struct kmem_cache *s, struct page *page)

slab_lock(page);

- if (!check_slab(s, page) || !on_freelist(s, page, NULL))
+ if (!check_slab(s, page) || !on_freelist(s, page, NULL)) {
+ *errors += 1;
goto unlock;
+ }

/* Now we know that a valid freelist exists */
map = get_map(s, page);
@@ -4629,8 +4632,10 @@ static void validate_slab(struct kmem_cache *s, struct page *page)
u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;

- if (!check_object(s, page, p, val))
+ if (!check_object(s, page, p, val)) {
+ *errors += 1;
break;
+ }
}
put_map(map);
unlock:
@@ -4638,7 +4643,7 @@ static void validate_slab(struct kmem_cache *s, struct page *page)
}

static int validate_slab_node(struct kmem_cache *s,
- struct kmem_cache_node *n)
+ struct kmem_cache_node *n, int *errors)
{
unsigned long count = 0;
struct page *page;
@@ -4647,30 +4652,34 @@ static int validate_slab_node(struct kmem_cache *s,
spin_lock_irqsave(&n->list_lock, flags);

list_for_each_entry(page, &n->partial, slab_list) {
- validate_slab(s, page);
+ validate_slab(s, page, errors);
count++;
}
- if (count != n->nr_partial)
+ if (count != n->nr_partial) {
pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
s->name, count, n->nr_partial);
+ *errors += 1;
+ }

if (!(s->flags & SLAB_STORE_USER))
goto out;

list_for_each_entry(page, &n->full, slab_list) {
- validate_slab(s, page);
+ validate_slab(s, page, errors);
count++;
}
- if (count != atomic_long_read(&n->nr_slabs))
+ if (count != atomic_long_read(&n->nr_slabs)) {
pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
s->name, count, atomic_long_read(&n->nr_slabs));
+ *errors += 1;
+ }

out:
spin_unlock_irqrestore(&n->list_lock, flags);
return count;
}

-static long validate_slab_cache(struct kmem_cache *s)
+long validate_slab_cache(struct kmem_cache *s, int *errors)
{
int node;
unsigned long count = 0;
@@ -4678,10 +4687,12 @@ static long validate_slab_cache(struct kmem_cache *s)

flush_all(s);
for_each_kmem_cache_node(s, node, n)
- count += validate_slab_node(s, n);
+ count += validate_slab_node(s, n, errors);

return count;
}
+EXPORT_SYMBOL(validate_slab_cache);
+
/*
* Generate lists of code addresses where slabcache objects are allocated
* and freed.
@@ -5336,9 +5347,10 @@ static ssize_t validate_store(struct kmem_cache *s,
const char *buf, size_t length)
{
int ret = -EINVAL;
+ int errors = 0;

if (buf[0] == '1') {
- ret = validate_slab_cache(s);
+ ret = validate_slab_cache(s, &errors);
if (ret >= 0)
ret = length;
}
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index a105f094676e..f168313b7949 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,6 +4,6 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:

-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh slub.sh

include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index b80ee3f6e265..4190863032e7 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -3,3 +3,4 @@ CONFIG_TEST_BITMAP=m
CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_STRSCPY=m
CONFIG_TEST_BITOPS=m
+CONFIG_TEST_SLUB=m
\ No newline at end of file
diff --git a/tools/testing/selftests/lib/slub.sh b/tools/testing/selftests/lib/slub.sh
new file mode 100755
index 000000000000..8b5757702910
--- /dev/null
+++ b/tools/testing/selftests/lib/slub.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+$(dirname $0)/../kselftest/module.sh "slub" test_slub
--
2.17.1


2021-03-17 08:43:10

by Oliver Sang

[permalink] [raw]
Subject: [selftests] e48d82b67a: BUG_TestSlub_RZ_alloc(Not_tainted):Redzone_overwritten



Greeting,

FYI, we noticed the following commit (built with gcc-9):

commit: e48d82b67a2b760eedf7b95ca15f41267496386c ("[PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality")
url: https://github.com/0day-ci/linux/commits/glittao-gmail-com/selftests-add-a-kselftest-for-SLUB-debugging-functionality/20210316-204257
base: https://git.kernel.org/cgit/linux/kernel/git/shuah/linux-kselftest.git next

in testcase: trinity
version: trinity-static-i386-x86_64-f93256fb_2019-08-28
with following parameters:

group: group-04

test-description: Trinity is a linux system call fuzz tester.
test-url: http://codemonkey.org.uk/projects/trinity/


on test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 8G

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):


+---------------------------------------------------------------------------------------------------------------+-----------+------------+
| | v5.12-rc2 | e48d82b67a |
+---------------------------------------------------------------------------------------------------------------+-----------+------------+
| BUG_TestSlub_RZ_alloc(Not_tainted):Redzone_overwritten | 0 | 69 |
| INFO:0x(ptrval)-0x(ptrval)@offset=#.First_byte#instead_of | 0 | 69 |
| INFO:Allocated_in_resiliency_test_age=#cpu=#pid= | 0 | 69 |
| INFO:Slab0x(ptrval)objects=#used=#fp=0x(ptrval)flags= | 0 | 69 |
| INFO:Object0x(ptrval)@offset=#fp=0x(ptrval) | 0 | 69 |
| BUG_TestSlub_next_ptr_free(Tainted:G_B):Freechain_corrupt | 0 | 69 |
| INFO:Freed_in_resiliency_test_age=#cpu=#pid= | 0 | 69 |
| BUG_TestSlub_next_ptr_free(Tainted:G_B):Wrong_object_count.Counter_is#but_counted_were | 0 | 69 |
| BUG_TestSlub_next_ptr_free(Tainted:G_B):Redzone_overwritten | 0 | 69 |
| BUG_TestSlub_next_ptr_free(Tainted:G_B):Objects_remaining_in_TestSlub_next_ptr_free_on__kmem_cache_shutdown() | 0 | 69 |
| INFO:Object0x(ptrval)@offset= | 0 | 69 |
| BUG_TestSlub_1th_word_free(Tainted:G_B):Poison_overwritten | 0 | 69 |
| BUG_TestSlub_50th_word_free(Tainted:G_B):Poison_overwritten | 0 | 69 |
| BUG_TestSlub_RZ_free(Tainted:G_B):Redzone_overwritten | 0 | 69 |
+---------------------------------------------------------------------------------------------------------------+-----------+------------+


If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>



[ 22.154049] random: get_random_u32 called from __kmem_cache_create+0x23/0x3e0 with crng_init=0
[ 22.154070] random: get_random_u32 called from cache_random_seq_create+0x7c/0x140 with crng_init=0
[ 22.154167] random: get_random_u32 called from allocate_slab+0x155/0x5e0 with crng_init=0
[ 22.154690] test_slub: 1. kmem_cache: Clobber Redzone 0x12->0x(ptrval)
[ 22.164499] =============================================================================
[ 22.166629] BUG TestSlub_RZ_alloc (Not tainted): Redzone overwritten
[ 22.168179] -----------------------------------------------------------------------------
[ 22.168179]
[ 22.168372] Disabling lock debugging due to kernel taint
[ 22.168372] INFO: 0x(ptrval)-0x(ptrval) @offset=1064. First byte 0x12 instead of 0xcc
[ 22.168372] INFO: Allocated in resiliency_test+0x47/0x1be age=3 cpu=0 pid=1
[ 22.168372] __slab_alloc+0x57/0x80
[ 22.168372] kmem_cache_alloc (kbuild/src/consumer/mm/slub.c:2871 kbuild/src/consumer/mm/slub.c:2915 kbuild/src/consumer/mm/slub.c:2920)
[ 22.168372] resiliency_test (kbuild/src/consumer/lib/test_slub.c:34 kbuild/src/consumer/lib/test_slub.c:107)
[ 22.168372] test_slub_init (kbuild/src/consumer/lib/test_slub.c:124)
[ 22.168372] do_one_initcall (kbuild/src/consumer/init/main.c:1226)
[ 22.168372] kernel_init_freeable (kbuild/src/consumer/init/main.c:1298 kbuild/src/consumer/init/main.c:1315 kbuild/src/consumer/init/main.c:1335 kbuild/src/consumer/init/main.c:1537)
[ 22.168372] kernel_init (kbuild/src/consumer/init/main.c:1426)
[ 22.168372] ret_from_fork (kbuild/src/consumer/arch/x86/entry/entry_32.S:856)
[ 22.168372] INFO: Slab 0x(ptrval) objects=16 used=1 fp=0x(ptrval) flags=0x40000201
[ 22.168372] INFO: Object 0x(ptrval) @offset=1000 fp=0x(ptrval)
[ 22.168372]
[ 22.168372] Redzone (ptrval): cc cc cc cc cc cc cc cc ........
[ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
[ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
[ 22.168372] Redzone (ptrval): 12 cc cc cc ....
[ 22.168372] Padding (ptrval): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
[ 22.168372] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G B 5.12.0-rc2-00001-ge48d82b67a2b #1
[ 22.168372] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
[ 22.168372] Call Trace:
[ 22.168372] dump_stack (kbuild/src/consumer/lib/dump_stack.c:122)
[ 22.168372] print_trailer (kbuild/src/consumer/mm/slub.c:737)
[ 22.168372] check_bytes_and_report.cold (kbuild/src/consumer/mm/slub.c:807)
[ 22.168372] check_object (kbuild/src/consumer/mm/slub.c:914)
[ 22.168372] validate_slab (kbuild/src/consumer/mm/slub.c:4635)


To reproduce:

# build kernel
cd linux
cp config-5.12.0-rc2-00001-ge48d82b67a2b .config
make HOSTCC=gcc-9 CC=gcc-9 ARCH=i386 olddefconfig prepare modules_prepare bzImage

git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k <bzImage> job-script # job-script is attached in this email



---
0DAY/LKP+ Test Infrastructure Open Source Technology Center
https://lists.01.org/hyperkitty/list/[email protected] Intel Corporation

Thanks,
Oliver Sang


Attachments:
(No filename) (6.76 kB)
config-5.12.0-rc2-00001-ge48d82b67a2b (273.26 kB)
job-script (4.08 kB)
dmesg.xz (18.29 kB)
Download all attachments

2021-03-17 11:28:45

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality

On 3/16/21 1:41 PM, [email protected] wrote:
> From: Oliver Glitta <[email protected]>
>
> SLUB has resiliency_test() function which is hidden behind #ifdef
> SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody
> runs it. Kselftest should proper replacement for it.
>
> Try changing byte in redzone after allocation and changing
> pointer to next free node, first byte, 50th byte and redzone
> byte. Check if validation finds errors.
>
> There are several differences from the original resiliency test:
> Tests create own caches with known state instead of corrupting
> shared kmalloc caches.
>
> The corruption of freepointer uses correct offset, the original
> resiliency test got broken with freepointer changes.
>
> Scratch changing random byte test, because it does not have
> meaning in this form where we need deterministic results.
>
> Add new option CONFIG_TEST_SLUB in Kconfig.
>
> Add parameter to function validate_slab_cache() to return
> number of errors in cache.
>
> Signed-off-by: Oliver Glitta <[email protected]>

Acked-by: Vlastimil Babka <[email protected]>

Disclaimer: this is done as part of Oliver's university project that I'm advising.

2021-03-17 11:31:26

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [selftests] e48d82b67a: BUG_TestSlub_RZ_alloc(Not_tainted):Redzone_overwritten

On 3/17/21 9:36 AM, kernel test robot wrote:
>
>
> Greeting,
>
> FYI, we noticed the following commit (built with gcc-9):
>
> commit: e48d82b67a2b760eedf7b95ca15f41267496386c ("[PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality")
> url: https://github.com/0day-ci/linux/commits/glittao-gmail-com/selftests-add-a-kselftest-for-SLUB-debugging-functionality/20210316-204257
> base: https://git.kernel.org/cgit/linux/kernel/git/shuah/linux-kselftest.git next
>
> in testcase: trinity
> version: trinity-static-i386-x86_64-f93256fb_2019-08-28
> with following parameters:
>
> group: group-04
>
> test-description: Trinity is a linux system call fuzz tester.
> test-url: http://codemonkey.org.uk/projects/trinity/
>
>
> on test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
>
> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>
>
> +---------------------------------------------------------------------------------------------------------------+-----------+------------+
> | | v5.12-rc2 | e48d82b67a |
> +---------------------------------------------------------------------------------------------------------------+-----------+------------+
> | BUG_TestSlub_RZ_alloc(Not_tainted):Redzone_overwritten | 0 | 69 |
> | INFO:0x(ptrval)-0x(ptrval)@offset=#.First_byte#instead_of | 0 | 69 |
> | INFO:Allocated_in_resiliency_test_age=#cpu=#pid= | 0 | 69 |
> | INFO:Slab0x(ptrval)objects=#used=#fp=0x(ptrval)flags= | 0 | 69 |
> | INFO:Object0x(ptrval)@offset=#fp=0x(ptrval) | 0 | 69 |
> | BUG_TestSlub_next_ptr_free(Tainted:G_B):Freechain_corrupt | 0 | 69 |
> | INFO:Freed_in_resiliency_test_age=#cpu=#pid= | 0 | 69 |
> | BUG_TestSlub_next_ptr_free(Tainted:G_B):Wrong_object_count.Counter_is#but_counted_were | 0 | 69 |
> | BUG_TestSlub_next_ptr_free(Tainted:G_B):Redzone_overwritten | 0 | 69 |
> | BUG_TestSlub_next_ptr_free(Tainted:G_B):Objects_remaining_in_TestSlub_next_ptr_free_on__kmem_cache_shutdown() | 0 | 69 |
> | INFO:Object0x(ptrval)@offset= | 0 | 69 |
> | BUG_TestSlub_1th_word_free(Tainted:G_B):Poison_overwritten | 0 | 69 |
> | BUG_TestSlub_50th_word_free(Tainted:G_B):Poison_overwritten | 0 | 69 |
> | BUG_TestSlub_RZ_free(Tainted:G_B):Redzone_overwritten | 0 | 69 |
> +---------------------------------------------------------------------------------------------------------------+-----------+------------+
>
>
> If you fix the issue, kindly add following tag
> Reported-by: kernel test robot <[email protected]>
>
>
>
> [ 22.154049] random: get_random_u32 called from __kmem_cache_create+0x23/0x3e0 with crng_init=0
> [ 22.154070] random: get_random_u32 called from cache_random_seq_create+0x7c/0x140 with crng_init=0
> [ 22.154167] random: get_random_u32 called from allocate_slab+0x155/0x5e0 with crng_init=0
> [ 22.154690] test_slub: 1. kmem_cache: Clobber Redzone 0x12->0x(ptrval)
> [ 22.164499] =============================================================================
> [ 22.166629] BUG TestSlub_RZ_alloc (Not tainted): Redzone overwritten
> [ 22.168179] -----------------------------------------------------------------------------
> [ 22.168179]
> [ 22.168372] Disabling lock debugging due to kernel taint
> [ 22.168372] INFO: 0x(ptrval)-0x(ptrval) @offset=1064. First byte 0x12 instead of 0xcc
> [ 22.168372] INFO: Allocated in resiliency_test+0x47/0x1be age=3 cpu=0 pid=1
> [ 22.168372] __slab_alloc+0x57/0x80
> [ 22.168372] kmem_cache_alloc (kbuild/src/consumer/mm/slub.c:2871 kbuild/src/consumer/mm/slub.c:2915 kbuild/src/consumer/mm/slub.c:2920)
> [ 22.168372] resiliency_test (kbuild/src/consumer/lib/test_slub.c:34 kbuild/src/consumer/lib/test_slub.c:107)
> [ 22.168372] test_slub_init (kbuild/src/consumer/lib/test_slub.c:124)
> [ 22.168372] do_one_initcall (kbuild/src/consumer/init/main.c:1226)
> [ 22.168372] kernel_init_freeable (kbuild/src/consumer/init/main.c:1298 kbuild/src/consumer/init/main.c:1315 kbuild/src/consumer/init/main.c:1335 kbuild/src/consumer/init/main.c:1537)
> [ 22.168372] kernel_init (kbuild/src/consumer/init/main.c:1426)
> [ 22.168372] ret_from_fork (kbuild/src/consumer/arch/x86/entry/entry_32.S:856)
> [ 22.168372] INFO: Slab 0x(ptrval) objects=16 used=1 fp=0x(ptrval) flags=0x40000201
> [ 22.168372] INFO: Object 0x(ptrval) @offset=1000 fp=0x(ptrval)
> [ 22.168372]
> [ 22.168372] Redzone (ptrval): cc cc cc cc cc cc cc cc ........
> [ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
> [ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
> [ 22.168372] Redzone (ptrval): 12 cc cc cc ....
> [ 22.168372] Padding (ptrval): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
> [ 22.168372] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G B 5.12.0-rc2-00001-ge48d82b67a2b #1
> [ 22.168372] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
> [ 22.168372] Call Trace:
> [ 22.168372] dump_stack (kbuild/src/consumer/lib/dump_stack.c:122)
> [ 22.168372] print_trailer (kbuild/src/consumer/mm/slub.c:737)
> [ 22.168372] check_bytes_and_report.cold (kbuild/src/consumer/mm/slub.c:807)
> [ 22.168372] check_object (kbuild/src/consumer/mm/slub.c:914)
> [ 22.168372] validate_slab (kbuild/src/consumer/mm/slub.c:4635)

Hm but in this case the output means the tested functionality (slub debugging)
is working as intended. So what can we do? Indicate/teach somehow to the bot
that this is OK? Does kselftest have some support for this? Or silence the
validation output for testing purposes? (I would prefer not to)

Thanks,
Vlastimil

2021-03-17 18:55:02

by David Rientjes

[permalink] [raw]
Subject: Re: [selftests] e48d82b67a: BUG_TestSlub_RZ_alloc(Not_tainted):Redzone_overwritten

On Wed, 17 Mar 2021, Vlastimil Babka wrote:

> > Greeting,
> >
> > FYI, we noticed the following commit (built with gcc-9):
> >
> > commit: e48d82b67a2b760eedf7b95ca15f41267496386c ("[PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality")
> > url: https://github.com/0day-ci/linux/commits/glittao-gmail-com/selftests-add-a-kselftest-for-SLUB-debugging-functionality/20210316-204257
> > base: https://git.kernel.org/cgit/linux/kernel/git/shuah/linux-kselftest.git next
> >
> > in testcase: trinity
> > version: trinity-static-i386-x86_64-f93256fb_2019-08-28
> > with following parameters:
> >
> > group: group-04
> >
> > test-description: Trinity is a linux system call fuzz tester.
> > test-url: http://codemonkey.org.uk/projects/trinity/
> >
> >
> > on test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
> >
> > caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> >
> >
> > +---------------------------------------------------------------------------------------------------------------+-----------+------------+
> > | | v5.12-rc2 | e48d82b67a |
> > +---------------------------------------------------------------------------------------------------------------+-----------+------------+
> > | BUG_TestSlub_RZ_alloc(Not_tainted):Redzone_overwritten | 0 | 69 |
> > | INFO:0x(ptrval)-0x(ptrval)@offset=#.First_byte#instead_of | 0 | 69 |
> > | INFO:Allocated_in_resiliency_test_age=#cpu=#pid= | 0 | 69 |
> > | INFO:Slab0x(ptrval)objects=#used=#fp=0x(ptrval)flags= | 0 | 69 |
> > | INFO:Object0x(ptrval)@offset=#fp=0x(ptrval) | 0 | 69 |
> > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Freechain_corrupt | 0 | 69 |
> > | INFO:Freed_in_resiliency_test_age=#cpu=#pid= | 0 | 69 |
> > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Wrong_object_count.Counter_is#but_counted_were | 0 | 69 |
> > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Redzone_overwritten | 0 | 69 |
> > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Objects_remaining_in_TestSlub_next_ptr_free_on__kmem_cache_shutdown() | 0 | 69 |
> > | INFO:Object0x(ptrval)@offset= | 0 | 69 |
> > | BUG_TestSlub_1th_word_free(Tainted:G_B):Poison_overwritten | 0 | 69 |
> > | BUG_TestSlub_50th_word_free(Tainted:G_B):Poison_overwritten | 0 | 69 |
> > | BUG_TestSlub_RZ_free(Tainted:G_B):Redzone_overwritten | 0 | 69 |
> > +---------------------------------------------------------------------------------------------------------------+-----------+------------+
> >
> >
> > If you fix the issue, kindly add following tag
> > Reported-by: kernel test robot <[email protected]>
> >
> >
> >
> > [ 22.154049] random: get_random_u32 called from __kmem_cache_create+0x23/0x3e0 with crng_init=0
> > [ 22.154070] random: get_random_u32 called from cache_random_seq_create+0x7c/0x140 with crng_init=0
> > [ 22.154167] random: get_random_u32 called from allocate_slab+0x155/0x5e0 with crng_init=0
> > [ 22.154690] test_slub: 1. kmem_cache: Clobber Redzone 0x12->0x(ptrval)
> > [ 22.164499] =============================================================================
> > [ 22.166629] BUG TestSlub_RZ_alloc (Not tainted): Redzone overwritten
> > [ 22.168179] -----------------------------------------------------------------------------
> > [ 22.168179]
> > [ 22.168372] Disabling lock debugging due to kernel taint
> > [ 22.168372] INFO: 0x(ptrval)-0x(ptrval) @offset=1064. First byte 0x12 instead of 0xcc
> > [ 22.168372] INFO: Allocated in resiliency_test+0x47/0x1be age=3 cpu=0 pid=1
> > [ 22.168372] __slab_alloc+0x57/0x80
> > [ 22.168372] kmem_cache_alloc (kbuild/src/consumer/mm/slub.c:2871 kbuild/src/consumer/mm/slub.c:2915 kbuild/src/consumer/mm/slub.c:2920)
> > [ 22.168372] resiliency_test (kbuild/src/consumer/lib/test_slub.c:34 kbuild/src/consumer/lib/test_slub.c:107)
> > [ 22.168372] test_slub_init (kbuild/src/consumer/lib/test_slub.c:124)
> > [ 22.168372] do_one_initcall (kbuild/src/consumer/init/main.c:1226)
> > [ 22.168372] kernel_init_freeable (kbuild/src/consumer/init/main.c:1298 kbuild/src/consumer/init/main.c:1315 kbuild/src/consumer/init/main.c:1335 kbuild/src/consumer/init/main.c:1537)
> > [ 22.168372] kernel_init (kbuild/src/consumer/init/main.c:1426)
> > [ 22.168372] ret_from_fork (kbuild/src/consumer/arch/x86/entry/entry_32.S:856)
> > [ 22.168372] INFO: Slab 0x(ptrval) objects=16 used=1 fp=0x(ptrval) flags=0x40000201
> > [ 22.168372] INFO: Object 0x(ptrval) @offset=1000 fp=0x(ptrval)
> > [ 22.168372]
> > [ 22.168372] Redzone (ptrval): cc cc cc cc cc cc cc cc ........
> > [ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
> > [ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
> > [ 22.168372] Redzone (ptrval): 12 cc cc cc ....
> > [ 22.168372] Padding (ptrval): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
> > [ 22.168372] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G B 5.12.0-rc2-00001-ge48d82b67a2b #1
> > [ 22.168372] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
> > [ 22.168372] Call Trace:
> > [ 22.168372] dump_stack (kbuild/src/consumer/lib/dump_stack.c:122)
> > [ 22.168372] print_trailer (kbuild/src/consumer/mm/slub.c:737)
> > [ 22.168372] check_bytes_and_report.cold (kbuild/src/consumer/mm/slub.c:807)
> > [ 22.168372] check_object (kbuild/src/consumer/mm/slub.c:914)
> > [ 22.168372] validate_slab (kbuild/src/consumer/mm/slub.c:4635)
>
> Hm but in this case the output means the tested functionality (slub debugging)
> is working as intended. So what can we do? Indicate/teach somehow to the bot
> that this is OK? Does kselftest have some support for this? Or silence the
> validation output for testing purposes? (I would prefer not to)
>

Unless you're familiar with everything that CONFIG_TEST_SLUB does, maybe
this could be inferred as an actual issue that the test has uncovered that
is unexpected?

I don't have a good way of silencing the check_bytes_and_report() output
other than a big hammer: implement {disable,enable}_slub_warnings() that
the resiliency test could call into before triggering these checks.

2021-03-17 18:55:48

by David Rientjes

[permalink] [raw]
Subject: Re: [PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality

On Tue, 16 Mar 2021, [email protected] wrote:

> From: Oliver Glitta <[email protected]>
>
> SLUB has resiliency_test() function which is hidden behind #ifdef
> SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody
> runs it. Kselftest should proper replacement for it.
>
> Try changing byte in redzone after allocation and changing
> pointer to next free node, first byte, 50th byte and redzone
> byte. Check if validation finds errors.
>
> There are several differences from the original resiliency test:
> Tests create own caches with known state instead of corrupting
> shared kmalloc caches.
>
> The corruption of freepointer uses correct offset, the original
> resiliency test got broken with freepointer changes.
>
> Scratch changing random byte test, because it does not have
> meaning in this form where we need deterministic results.
>
> Add new option CONFIG_TEST_SLUB in Kconfig.
>
> Add parameter to function validate_slab_cache() to return
> number of errors in cache.
>
> Signed-off-by: Oliver Glitta <[email protected]>

Acked-by: David Rientjes <[email protected]>

2021-03-18 11:49:58

by Marco Elver

[permalink] [raw]
Subject: Re: [PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality

On Tue, Mar 16, 2021 at 01:41PM +0100, [email protected] wrote:
> From: Oliver Glitta <[email protected]>
>
> SLUB has resiliency_test() function which is hidden behind #ifdef
> SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody
> runs it. Kselftest should proper replacement for it.
>
> Try changing byte in redzone after allocation and changing
> pointer to next free node, first byte, 50th byte and redzone
> byte. Check if validation finds errors.
>
> There are several differences from the original resiliency test:
> Tests create own caches with known state instead of corrupting
> shared kmalloc caches.
>
> The corruption of freepointer uses correct offset, the original
> resiliency test got broken with freepointer changes.
>
> Scratch changing random byte test, because it does not have
> meaning in this form where we need deterministic results.
>
> Add new option CONFIG_TEST_SLUB in Kconfig.
>
> Add parameter to function validate_slab_cache() to return
> number of errors in cache.
>
> Signed-off-by: Oliver Glitta <[email protected]>

No objection per-se, but have you considered a KUnit-based test instead?

There is no user space portion required to run this test, and a pure
in-kernel KUnit test would be cleaner. Various boiler-plate below,
including pr_err()s, the kselftest script etc. would simply not be
necessary.

This is only a suggestion, but just want to make sure you've considered
the option and weighed its pros/cons.

Thanks,
-- Marco

> ---
> lib/Kconfig.debug | 4 +
> lib/Makefile | 1 +
> lib/test_slub.c | 125 +++++++++++++++++++++++++++
> mm/slab.h | 1 +
> mm/slub.c | 34 +++++---
> tools/testing/selftests/lib/Makefile | 2 +-
> tools/testing/selftests/lib/config | 1 +
> tools/testing/selftests/lib/slub.sh | 3 +
> 8 files changed, 159 insertions(+), 12 deletions(-)
> create mode 100644 lib/test_slub.c
> create mode 100755 tools/testing/selftests/lib/slub.sh
>
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 2779c29d9981..2d56092abbc4 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -2123,6 +2123,10 @@ config TEST_KSTRTOX
> config TEST_PRINTF
> tristate "Test printf() family of functions at runtime"
>
> +config TEST_SLUB
> + tristate "Test SLUB cache errors at runtime"
> + depends on SLUB_DEBUG
> +
> config TEST_BITMAP
> tristate "Test bitmap_*() family of functions at runtime"
> help
> diff --git a/lib/Makefile b/lib/Makefile
> index b5307d3eec1a..b6603803b1c4 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -83,6 +83,7 @@ obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
> obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
> obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
> obj-$(CONFIG_TEST_PRINTF) += test_printf.o
> +obj-$(CONFIG_TEST_SLUB) += test_slub.o
> obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
> obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
> obj-$(CONFIG_TEST_UUID) += test_uuid.o
> diff --git a/lib/test_slub.c b/lib/test_slub.c
> new file mode 100644
> index 000000000000..0075d9b44251
> --- /dev/null
> +++ b/lib/test_slub.c
> @@ -0,0 +1,125 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Test cases for slub facility.
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/mm.h>
> +#include <linux/slab.h>
> +#include <linux/module.h>
> +#include <linux/kernel.h>
> +#include "../mm/slab.h"
> +
> +#include "../tools/testing/selftests/kselftest_module.h"
> +
> +
> +KSTM_MODULE_GLOBALS();
> +
> +
> +static void __init validate_result(struct kmem_cache *s, int expected_errors)
> +{
> + int errors = 0;
> +
> + validate_slab_cache(s, &errors);
> + KSTM_CHECK_ZERO(errors - expected_errors);
> +}
> +
> +static void __init test_clobber_zone(void)
> +{
> + struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
> + SLAB_RED_ZONE, NULL);
> + u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> +
> + p[64] = 0x12;
> + pr_err("1. kmem_cache: Clobber Redzone 0x12->0x%p\n", p + 64);
> +
> + validate_result(s, 1);
> + kmem_cache_free(s, p);
> + kmem_cache_destroy(s);
> +}
> +
> +static void __init test_next_pointer(void)
> +{
> + struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
> + SLAB_RED_ZONE, NULL);
> + u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> +
> + kmem_cache_free(s, p);
> + p[s->offset] = 0x12;
> + pr_err("1. kmem_cache: Clobber next pointer 0x34 -> -0x%p\n", p);
> +
> + validate_result(s, 1);
> + kmem_cache_destroy(s);
> +}
> +
> +static void __init test_first_word(void)
> +{
> + struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
> + SLAB_POISON, NULL);
> + u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> +
> + kmem_cache_free(s, p);
> + *p = 0x78;
> + pr_err("2. kmem_cache: Clobber first word 0x78->0x%p\n", p);
> +
> + validate_result(s, 1);
> + kmem_cache_destroy(s);
> +}
> +
> +static void __init test_clobber_50th_byte(void)
> +{
> + struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
> + SLAB_POISON, NULL);
> + u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> +
> + kmem_cache_free(s, p);
> + p[50] = 0x9a;
> + pr_err("3. kmem_cache: Clobber 50th byte 0x9a->0x%p\n", p);
> +
> + validate_result(s, 1);
> + kmem_cache_destroy(s);
> +}
> +
> +static void __init test_clobber_redzone_free(void)
> +{
> + struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
> + SLAB_RED_ZONE, NULL);
> + u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> +
> + kmem_cache_free(s, p);
> + p[64] = 0xab;
> + pr_err("4. kmem_cache: Clobber redzone 0xab->0x%p\n", p);
> +
> + validate_result(s, 1);
> + kmem_cache_destroy(s);
> +}
> +
> +static void __init resiliency_test(void)
> +{
> +
> + BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
> +
> + pr_err("SLUB resiliency testing\n");
> + pr_err("-----------------------\n");
> + pr_err("A. Corruption after allocation\n");
> +
> + test_clobber_zone();
> +
> + pr_err("\nB. Corruption after free\n");
> +
> + test_next_pointer();
> + test_first_word();
> + test_clobber_50th_byte();
> + test_clobber_redzone_free();
> +}
> +
> +
> +static void __init selftest(void)
> +{
> + resiliency_test();
> +}
> +
> +
> +KSTM_MODULE_LOADERS(test_slub);
> +MODULE_LICENSE("GPL");
> diff --git a/mm/slab.h b/mm/slab.h
> index 076582f58f68..5fc18d506b3b 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -215,6 +215,7 @@ DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
> DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
> #endif
> extern void print_tracking(struct kmem_cache *s, void *object);
> +long validate_slab_cache(struct kmem_cache *s, int *errors);
> #else
> static inline void print_tracking(struct kmem_cache *s, void *object)
> {
> diff --git a/mm/slub.c b/mm/slub.c
> index e26c274b4657..c00e2b263e03 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -4612,7 +4612,8 @@ static int count_total(struct page *page)
> #endif
>
> #ifdef CONFIG_SLUB_DEBUG
> -static void validate_slab(struct kmem_cache *s, struct page *page)
> +static void validate_slab(struct kmem_cache *s, struct page *page,
> + int *errors)
> {
> void *p;
> void *addr = page_address(page);
> @@ -4620,8 +4621,10 @@ static void validate_slab(struct kmem_cache *s, struct page *page)
>
> slab_lock(page);
>
> - if (!check_slab(s, page) || !on_freelist(s, page, NULL))
> + if (!check_slab(s, page) || !on_freelist(s, page, NULL)) {
> + *errors += 1;
> goto unlock;
> + }
>
> /* Now we know that a valid freelist exists */
> map = get_map(s, page);
> @@ -4629,8 +4632,10 @@ static void validate_slab(struct kmem_cache *s, struct page *page)
> u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
> SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
>
> - if (!check_object(s, page, p, val))
> + if (!check_object(s, page, p, val)) {
> + *errors += 1;
> break;
> + }
> }
> put_map(map);
> unlock:
> @@ -4638,7 +4643,7 @@ static void validate_slab(struct kmem_cache *s, struct page *page)
> }
>
> static int validate_slab_node(struct kmem_cache *s,
> - struct kmem_cache_node *n)
> + struct kmem_cache_node *n, int *errors)
> {
> unsigned long count = 0;
> struct page *page;
> @@ -4647,30 +4652,34 @@ static int validate_slab_node(struct kmem_cache *s,
> spin_lock_irqsave(&n->list_lock, flags);
>
> list_for_each_entry(page, &n->partial, slab_list) {
> - validate_slab(s, page);
> + validate_slab(s, page, errors);
> count++;
> }
> - if (count != n->nr_partial)
> + if (count != n->nr_partial) {
> pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
> s->name, count, n->nr_partial);
> + *errors += 1;
> + }
>
> if (!(s->flags & SLAB_STORE_USER))
> goto out;
>
> list_for_each_entry(page, &n->full, slab_list) {
> - validate_slab(s, page);
> + validate_slab(s, page, errors);
> count++;
> }
> - if (count != atomic_long_read(&n->nr_slabs))
> + if (count != atomic_long_read(&n->nr_slabs)) {
> pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
> s->name, count, atomic_long_read(&n->nr_slabs));
> + *errors += 1;
> + }
>
> out:
> spin_unlock_irqrestore(&n->list_lock, flags);
> return count;
> }
>
> -static long validate_slab_cache(struct kmem_cache *s)
> +long validate_slab_cache(struct kmem_cache *s, int *errors)
> {
> int node;
> unsigned long count = 0;
> @@ -4678,10 +4687,12 @@ static long validate_slab_cache(struct kmem_cache *s)
>
> flush_all(s);
> for_each_kmem_cache_node(s, node, n)
> - count += validate_slab_node(s, n);
> + count += validate_slab_node(s, n, errors);
>
> return count;
> }
> +EXPORT_SYMBOL(validate_slab_cache);
> +
> /*
> * Generate lists of code addresses where slabcache objects are allocated
> * and freed.
> @@ -5336,9 +5347,10 @@ static ssize_t validate_store(struct kmem_cache *s,
> const char *buf, size_t length)
> {
> int ret = -EINVAL;
> + int errors = 0;
>
> if (buf[0] == '1') {
> - ret = validate_slab_cache(s);
> + ret = validate_slab_cache(s, &errors);
> if (ret >= 0)
> ret = length;
> }
> diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
> index a105f094676e..f168313b7949 100644
> --- a/tools/testing/selftests/lib/Makefile
> +++ b/tools/testing/selftests/lib/Makefile
> @@ -4,6 +4,6 @@
> # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
> all:
>
> -TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh
> +TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh slub.sh
>
> include ../lib.mk
> diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
> index b80ee3f6e265..4190863032e7 100644
> --- a/tools/testing/selftests/lib/config
> +++ b/tools/testing/selftests/lib/config
> @@ -3,3 +3,4 @@ CONFIG_TEST_BITMAP=m
> CONFIG_PRIME_NUMBERS=m
> CONFIG_TEST_STRSCPY=m
> CONFIG_TEST_BITOPS=m
> +CONFIG_TEST_SLUB=m
> \ No newline at end of file
> diff --git a/tools/testing/selftests/lib/slub.sh b/tools/testing/selftests/lib/slub.sh
> new file mode 100755
> index 000000000000..8b5757702910
> --- /dev/null
> +++ b/tools/testing/selftests/lib/slub.sh
> @@ -0,0 +1,3 @@
> +#!/bin/sh
> +# SPDX-License-Identifier: GPL-2.0+
> +$(dirname $0)/../kselftest/module.sh "slub" test_slub

2021-03-19 10:48:17

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality

On 3/18/21 12:47 PM, Marco Elver wrote:
> On Tue, Mar 16, 2021 at 01:41PM +0100, [email protected] wrote:
>> From: Oliver Glitta <[email protected]>
>>
>> SLUB has resiliency_test() function which is hidden behind #ifdef
>> SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody
>> runs it. Kselftest should proper replacement for it.
>>
>> Try changing byte in redzone after allocation and changing
>> pointer to next free node, first byte, 50th byte and redzone
>> byte. Check if validation finds errors.
>>
>> There are several differences from the original resiliency test:
>> Tests create own caches with known state instead of corrupting
>> shared kmalloc caches.
>>
>> The corruption of freepointer uses correct offset, the original
>> resiliency test got broken with freepointer changes.
>>
>> Scratch changing random byte test, because it does not have
>> meaning in this form where we need deterministic results.
>>
>> Add new option CONFIG_TEST_SLUB in Kconfig.
>>
>> Add parameter to function validate_slab_cache() to return
>> number of errors in cache.
>>
>> Signed-off-by: Oliver Glitta <[email protected]>
>
> No objection per-se, but have you considered a KUnit-based test instead?

To be honest, we didn't realize about that option.

> There is no user space portion required to run this test, and a pure
> in-kernel KUnit test would be cleaner. Various boiler-plate below,
> including pr_err()s, the kselftest script etc. would simply not be
> necessary.
>
> This is only a suggestion, but just want to make sure you've considered
> the option and weighed its pros/cons.

Thanks for the suggestion. But I hope we would expand the tests later to e.g.
check the contents of various SLUB related sysfs files or even write to them,
and for that goal kselftest seems to be a better starting place?

Vlastimil

2021-03-19 11:21:58

by Marco Elver

[permalink] [raw]
Subject: Re: [PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality

On Fri, 19 Mar 2021 at 11:46, Vlastimil Babka <[email protected]> wrote:
> On 3/18/21 12:47 PM, Marco Elver wrote:
> > On Tue, Mar 16, 2021 at 01:41PM +0100, [email protected] wrote:
> >> From: Oliver Glitta <[email protected]>
> >>
> >> SLUB has resiliency_test() function which is hidden behind #ifdef
> >> SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody
> >> runs it. Kselftest should proper replacement for it.
> >>
> >> Try changing byte in redzone after allocation and changing
> >> pointer to next free node, first byte, 50th byte and redzone
> >> byte. Check if validation finds errors.
> >>
> >> There are several differences from the original resiliency test:
> >> Tests create own caches with known state instead of corrupting
> >> shared kmalloc caches.
> >>
> >> The corruption of freepointer uses correct offset, the original
> >> resiliency test got broken with freepointer changes.
> >>
> >> Scratch changing random byte test, because it does not have
> >> meaning in this form where we need deterministic results.
> >>
> >> Add new option CONFIG_TEST_SLUB in Kconfig.
> >>
> >> Add parameter to function validate_slab_cache() to return
> >> number of errors in cache.
> >>
> >> Signed-off-by: Oliver Glitta <[email protected]>
> >
> > No objection per-se, but have you considered a KUnit-based test instead?
>
> To be honest, we didn't realize about that option.
>
> > There is no user space portion required to run this test, and a pure
> > in-kernel KUnit test would be cleaner. Various boiler-plate below,
> > including pr_err()s, the kselftest script etc. would simply not be
> > necessary.
> >
> > This is only a suggestion, but just want to make sure you've considered
> > the option and weighed its pros/cons.
>
> Thanks for the suggestion. But I hope we would expand the tests later to e.g.
> check the contents of various SLUB related sysfs files or even write to them,
> and for that goal kselftest seems to be a better starting place?

Not sure, but I would probably go about it this way:

A. Anything that is purely in-kernel and doesn't require a user space
component should be a KUnit test.

B. For any test that requires a user space component, it'd be a kselftest.

And I think the best design here would also clearly separate those 2
types of tests, and I wouldn't lump tests of type A into modules that
are also used for B. That way, running tests of type A also is a bit
easier, and if somebody wants to just quickly run those it's e.g. very
quick to do so with kunit-tool.

Thanks,
-- Marco

2021-03-22 05:44:51

by Oliver Sang

[permalink] [raw]
Subject: Re: [selftests] e48d82b67a: BUG_TestSlub_RZ_alloc(Not_tainted):Redzone_overwritten

Hi Vlastimil,

On Wed, Mar 17, 2021 at 12:29:40PM +0100, Vlastimil Babka wrote:
> On 3/17/21 9:36 AM, kernel test robot wrote:
> >
> >
> > Greeting,
> >
> > FYI, we noticed the following commit (built with gcc-9):
> >
> > commit: e48d82b67a2b760eedf7b95ca15f41267496386c ("[PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality")
> > url: https://github.com/0day-ci/linux/commits/glittao-gmail-com/selftests-add-a-kselftest-for-SLUB-debugging-functionality/20210316-204257
> > base: https://git.kernel.org/cgit/linux/kernel/git/shuah/linux-kselftest.git next
> >
> > in testcase: trinity
> > version: trinity-static-i386-x86_64-f93256fb_2019-08-28
> > with following parameters:
> >
> > group: group-04
> >
> > test-description: Trinity is a linux system call fuzz tester.
> > test-url: http://codemonkey.org.uk/projects/trinity/
> >
> >
> > on test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
> >
> > caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> >
> >
> > +---------------------------------------------------------------------------------------------------------------+-----------+------------+
> > | | v5.12-rc2 | e48d82b67a |
> > +---------------------------------------------------------------------------------------------------------------+-----------+------------+
> > | BUG_TestSlub_RZ_alloc(Not_tainted):Redzone_overwritten | 0 | 69 |
> > | INFO:0x(ptrval)-0x(ptrval)@offset=#.First_byte#instead_of | 0 | 69 |
> > | INFO:Allocated_in_resiliency_test_age=#cpu=#pid= | 0 | 69 |
> > | INFO:Slab0x(ptrval)objects=#used=#fp=0x(ptrval)flags= | 0 | 69 |
> > | INFO:Object0x(ptrval)@offset=#fp=0x(ptrval) | 0 | 69 |
> > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Freechain_corrupt | 0 | 69 |
> > | INFO:Freed_in_resiliency_test_age=#cpu=#pid= | 0 | 69 |
> > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Wrong_object_count.Counter_is#but_counted_were | 0 | 69 |
> > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Redzone_overwritten | 0 | 69 |
> > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Objects_remaining_in_TestSlub_next_ptr_free_on__kmem_cache_shutdown() | 0 | 69 |
> > | INFO:Object0x(ptrval)@offset= | 0 | 69 |
> > | BUG_TestSlub_1th_word_free(Tainted:G_B):Poison_overwritten | 0 | 69 |
> > | BUG_TestSlub_50th_word_free(Tainted:G_B):Poison_overwritten | 0 | 69 |
> > | BUG_TestSlub_RZ_free(Tainted:G_B):Redzone_overwritten | 0 | 69 |
> > +---------------------------------------------------------------------------------------------------------------+-----------+------------+
> >
> >
> > If you fix the issue, kindly add following tag
> > Reported-by: kernel test robot <[email protected]>
> >
> >
> >
> > [ 22.154049] random: get_random_u32 called from __kmem_cache_create+0x23/0x3e0 with crng_init=0
> > [ 22.154070] random: get_random_u32 called from cache_random_seq_create+0x7c/0x140 with crng_init=0
> > [ 22.154167] random: get_random_u32 called from allocate_slab+0x155/0x5e0 with crng_init=0
> > [ 22.154690] test_slub: 1. kmem_cache: Clobber Redzone 0x12->0x(ptrval)
> > [ 22.164499] =============================================================================
> > [ 22.166629] BUG TestSlub_RZ_alloc (Not tainted): Redzone overwritten
> > [ 22.168179] -----------------------------------------------------------------------------
> > [ 22.168179]
> > [ 22.168372] Disabling lock debugging due to kernel taint
> > [ 22.168372] INFO: 0x(ptrval)-0x(ptrval) @offset=1064. First byte 0x12 instead of 0xcc
> > [ 22.168372] INFO: Allocated in resiliency_test+0x47/0x1be age=3 cpu=0 pid=1
> > [ 22.168372] __slab_alloc+0x57/0x80
> > [ 22.168372] kmem_cache_alloc (kbuild/src/consumer/mm/slub.c:2871 kbuild/src/consumer/mm/slub.c:2915 kbuild/src/consumer/mm/slub.c:2920)
> > [ 22.168372] resiliency_test (kbuild/src/consumer/lib/test_slub.c:34 kbuild/src/consumer/lib/test_slub.c:107)
> > [ 22.168372] test_slub_init (kbuild/src/consumer/lib/test_slub.c:124)
> > [ 22.168372] do_one_initcall (kbuild/src/consumer/init/main.c:1226)
> > [ 22.168372] kernel_init_freeable (kbuild/src/consumer/init/main.c:1298 kbuild/src/consumer/init/main.c:1315 kbuild/src/consumer/init/main.c:1335 kbuild/src/consumer/init/main.c:1537)
> > [ 22.168372] kernel_init (kbuild/src/consumer/init/main.c:1426)
> > [ 22.168372] ret_from_fork (kbuild/src/consumer/arch/x86/entry/entry_32.S:856)
> > [ 22.168372] INFO: Slab 0x(ptrval) objects=16 used=1 fp=0x(ptrval) flags=0x40000201
> > [ 22.168372] INFO: Object 0x(ptrval) @offset=1000 fp=0x(ptrval)
> > [ 22.168372]
> > [ 22.168372] Redzone (ptrval): cc cc cc cc cc cc cc cc ........
> > [ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
> > [ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
> > [ 22.168372] Redzone (ptrval): 12 cc cc cc ....
> > [ 22.168372] Padding (ptrval): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
> > [ 22.168372] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G B 5.12.0-rc2-00001-ge48d82b67a2b #1
> > [ 22.168372] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
> > [ 22.168372] Call Trace:
> > [ 22.168372] dump_stack (kbuild/src/consumer/lib/dump_stack.c:122)
> > [ 22.168372] print_trailer (kbuild/src/consumer/mm/slub.c:737)
> > [ 22.168372] check_bytes_and_report.cold (kbuild/src/consumer/mm/slub.c:807)
> > [ 22.168372] check_object (kbuild/src/consumer/mm/slub.c:914)
> > [ 22.168372] validate_slab (kbuild/src/consumer/mm/slub.c:4635)
>
> Hm but in this case the output means the tested functionality (slub debugging)
> is working as intended. So what can we do? Indicate/teach somehow to the bot
> that this is OK? Does kselftest have some support for this? Or silence the
> validation output for testing purposes? (I would prefer not to)

just FYI.

this report is based on trinity test, upon kconfig attached in orginal report.
we normally will report this kind of extra warnings.

>
> Thanks,
> Vlastimil

2021-03-23 00:06:50

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [selftests] e48d82b67a: BUG_TestSlub_RZ_alloc(Not_tainted):Redzone_overwritten

On 3/17/21 7:53 PM, David Rientjes wrote:
> On Wed, 17 Mar 2021, Vlastimil Babka wrote:
>> >
>> > [ 22.154049] random: get_random_u32 called from __kmem_cache_create+0x23/0x3e0 with crng_init=0
>> > [ 22.154070] random: get_random_u32 called from cache_random_seq_create+0x7c/0x140 with crng_init=0
>> > [ 22.154167] random: get_random_u32 called from allocate_slab+0x155/0x5e0 with crng_init=0
>> > [ 22.154690] test_slub: 1. kmem_cache: Clobber Redzone 0x12->0x(ptrval)
>> > [ 22.164499] =============================================================================
>> > [ 22.166629] BUG TestSlub_RZ_alloc (Not tainted): Redzone overwritten
>> > [ 22.168179] -----------------------------------------------------------------------------
>> > [ 22.168179]
>> > [ 22.168372] Disabling lock debugging due to kernel taint
>> > [ 22.168372] INFO: 0x(ptrval)-0x(ptrval) @offset=1064. First byte 0x12 instead of 0xcc
>> > [ 22.168372] INFO: Allocated in resiliency_test+0x47/0x1be age=3 cpu=0 pid=1
>> > [ 22.168372] __slab_alloc+0x57/0x80
>> > [ 22.168372] kmem_cache_alloc (kbuild/src/consumer/mm/slub.c:2871 kbuild/src/consumer/mm/slub.c:2915 kbuild/src/consumer/mm/slub.c:2920)
>> > [ 22.168372] resiliency_test (kbuild/src/consumer/lib/test_slub.c:34 kbuild/src/consumer/lib/test_slub.c:107)
>> > [ 22.168372] test_slub_init (kbuild/src/consumer/lib/test_slub.c:124)
>> > [ 22.168372] do_one_initcall (kbuild/src/consumer/init/main.c:1226)
>> > [ 22.168372] kernel_init_freeable (kbuild/src/consumer/init/main.c:1298 kbuild/src/consumer/init/main.c:1315 kbuild/src/consumer/init/main.c:1335 kbuild/src/consumer/init/main.c:1537)
>> > [ 22.168372] kernel_init (kbuild/src/consumer/init/main.c:1426)
>> > [ 22.168372] ret_from_fork (kbuild/src/consumer/arch/x86/entry/entry_32.S:856)
>> > [ 22.168372] INFO: Slab 0x(ptrval) objects=16 used=1 fp=0x(ptrval) flags=0x40000201
>> > [ 22.168372] INFO: Object 0x(ptrval) @offset=1000 fp=0x(ptrval)
>> > [ 22.168372]
>> > [ 22.168372] Redzone (ptrval): cc cc cc cc cc cc cc cc ........
>> > [ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
>> > [ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
>> > [ 22.168372] Redzone (ptrval): 12 cc cc cc ....
>> > [ 22.168372] Padding (ptrval): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
>> > [ 22.168372] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G B 5.12.0-rc2-00001-ge48d82b67a2b #1
>> > [ 22.168372] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
>> > [ 22.168372] Call Trace:
>> > [ 22.168372] dump_stack (kbuild/src/consumer/lib/dump_stack.c:122)
>> > [ 22.168372] print_trailer (kbuild/src/consumer/mm/slub.c:737)
>> > [ 22.168372] check_bytes_and_report.cold (kbuild/src/consumer/mm/slub.c:807)
>> > [ 22.168372] check_object (kbuild/src/consumer/mm/slub.c:914)
>> > [ 22.168372] validate_slab (kbuild/src/consumer/mm/slub.c:4635)
>>
>> Hm but in this case the output means the tested functionality (slub debugging)
>> is working as intended. So what can we do? Indicate/teach somehow to the bot
>> that this is OK? Does kselftest have some support for this? Or silence the
>> validation output for testing purposes? (I would prefer not to)
>>
>
> Unless you're familiar with everything that CONFIG_TEST_SLUB does, maybe
> this could be inferred as an actual issue that the test has uncovered that
> is unexpected?
>
> I don't have a good way of silencing the check_bytes_and_report() output
> other than a big hammer: implement {disable,enable}_slub_warnings() that
> the resiliency test could call into before triggering these checks.

So Oliver has implemented this, but now I got a different idea that should be
much cleaner IMHO. We could add a per-cache flag SLAB_SILENT_ERRORS (similar to
SLAB_RED_ZONE etc) instead of a global bool. The test would just create the
caches with this flag.
The flag should be added to the SLAB_NEVER_MERGE, SLAB_DEBUG_FLAGS,
SLAB_FLAGS_PERMITTED macros as well.

A similar suggestion is that adding the errors counter parameter to all
validate_slab_cache() and relevant functions is tedious - there are more that
had to be modified like this than initially expected.
Instead the error counter can be added to SLUB's struct kmem_cache definition,
incremented by the various checks and the tests can look at that after validation.

Thanks,
Vlastimil