2021-06-16 22:51:51

by Georgi Djakov

[permalink] [raw]
Subject: [PATCH] mm/slub: Add taint after the errors are printed

When running the kernel with panic_on_taint, the usual slub debug error
messages are not being printed when object corruption happens. That's
because we panic in add_taint(), which is called before printing the
additional information. This is a bit unfortunate as the error messages
are actually very useful, especially before a panic. Let's fix this by
moving add_taint() after the errors are printed on the console.

Signed-off-by: Georgi Djakov <[email protected]>
---
mm/slub.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index a8b0daa1a307..ce7b8e4551b5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -719,8 +719,6 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
pr_err("=============================================================================\n");
pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
pr_err("-----------------------------------------------------------------------------\n\n");
-
- add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
va_end(args);
}

@@ -801,6 +799,7 @@ void object_err(struct kmem_cache *s, struct page *page,

slab_bug(s, "%s", reason);
print_trailer(s, page, object);
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}

static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
@@ -818,6 +817,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
slab_bug(s, "%s", buf);
print_page_info(page);
dump_stack();
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}

static void init_object(struct kmem_cache *s, void *object, u8 val)
@@ -869,6 +869,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
fault, end - 1, fault - addr,
fault[0], value);
print_trailer(s, page, object);
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);

skip_bug_print:
restore_bytes(s, what, value, fault, end);


2021-06-16 23:57:14

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH] mm/slub: Add taint after the errors are printed

On 6/16/21 6:25 PM, Georgi Djakov wrote:
> When running the kernel with panic_on_taint, the usual slub debug error
> messages are not being printed when object corruption happens. That's
> because we panic in add_taint(), which is called before printing the
> additional information. This is a bit unfortunate as the error messages
> are actually very useful, especially before a panic. Let's fix this by
> moving add_taint() after the errors are printed on the console.
>
> Signed-off-by: Georgi Djakov <[email protected]>

Makes sense.

While at it, I wonder if we should use LOCKDEP_STILL_OK instead of
LOCKDEP_NOW_UNRELIABLE. Isn't it too pessimistic to assume that some slab's
memory corruption hit some lock state?

> ---
> mm/slub.c | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index a8b0daa1a307..ce7b8e4551b5 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -719,8 +719,6 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
> pr_err("=============================================================================\n");
> pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
> pr_err("-----------------------------------------------------------------------------\n\n");
> -
> - add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> va_end(args);
> }
>
> @@ -801,6 +799,7 @@ void object_err(struct kmem_cache *s, struct page *page,
>
> slab_bug(s, "%s", reason);
> print_trailer(s, page, object);
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> }
>
> static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> @@ -818,6 +817,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> slab_bug(s, "%s", buf);
> print_page_info(page);
> dump_stack();
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> }
>
> static void init_object(struct kmem_cache *s, void *object, u8 val)
> @@ -869,6 +869,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
> fault, end - 1, fault - addr,
> fault[0], value);
> print_trailer(s, page, object);
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>
> skip_bug_print:
> restore_bytes(s, what, value, fault, end);
>

2021-06-17 02:27:09

by Rafael Aquini

[permalink] [raw]
Subject: Re: [PATCH] mm/slub: Add taint after the errors are printed

On Wed, Jun 16, 2021 at 09:25:38AM -0700, Georgi Djakov wrote:
> When running the kernel with panic_on_taint, the usual slub debug error
> messages are not being printed when object corruption happens. That's
> because we panic in add_taint(), which is called before printing the
> additional information. This is a bit unfortunate as the error messages
> are actually very useful, especially before a panic. Let's fix this by
> moving add_taint() after the errors are printed on the console.
>
> Signed-off-by: Georgi Djakov <[email protected]>
> ---
> mm/slub.c | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index a8b0daa1a307..ce7b8e4551b5 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -719,8 +719,6 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
> pr_err("=============================================================================\n");
> pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
> pr_err("-----------------------------------------------------------------------------\n\n");
> -
> - add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> va_end(args);
> }
>
> @@ -801,6 +799,7 @@ void object_err(struct kmem_cache *s, struct page *page,
>
> slab_bug(s, "%s", reason);
> print_trailer(s, page, object);
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> }
>
> static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> @@ -818,6 +817,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> slab_bug(s, "%s", buf);
> print_page_info(page);
> dump_stack();
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> }
>
> static void init_object(struct kmem_cache *s, void *object, u8 val)
> @@ -869,6 +869,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
> fault, end - 1, fault - addr,
> fault[0], value);
> print_trailer(s, page, object);
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>
> skip_bug_print:
> restore_bytes(s, what, value, fault, end);
>
Acked-by: Rafael Aquini <[email protected]>

2021-06-17 02:30:24

by Rafael Aquini

[permalink] [raw]
Subject: Re: [PATCH] mm/slub: Add taint after the errors are printed

On Wed, Jun 16, 2021 at 06:34:41PM +0200, Vlastimil Babka wrote:
> On 6/16/21 6:25 PM, Georgi Djakov wrote:
> > When running the kernel with panic_on_taint, the usual slub debug error
> > messages are not being printed when object corruption happens. That's
> > because we panic in add_taint(), which is called before printing the
> > additional information. This is a bit unfortunate as the error messages
> > are actually very useful, especially before a panic. Let's fix this by
> > moving add_taint() after the errors are printed on the console.
> >
> > Signed-off-by: Georgi Djakov <[email protected]>
>
> Makes sense.
>
> While at it, I wonder if we should use LOCKDEP_STILL_OK instead of
> LOCKDEP_NOW_UNRELIABLE. Isn't it too pessimistic to assume that some slab's
> memory corruption hit some lock state?
>

Given there is noted corruption I don't think it's safe to assume otherwise.

2021-06-17 09:27:02

by Aaron Tomlin

[permalink] [raw]
Subject: Re: [PATCH] mm/slub: Add taint after the errors are printed

On Wed 2021-06-16 09:25 -0700, Georgi Djakov wrote:
> When running the kernel with panic_on_taint, the usual slub debug error
> messages are not being printed when object corruption happens. That's
> because we panic in add_taint(), which is called before printing the
> additional information. This is a bit unfortunate as the error messages
> are actually very useful, especially before a panic. Let's fix this by
> moving add_taint() after the errors are printed on the console.
>
> Signed-off-by: Georgi Djakov <[email protected]>
> ---
> mm/slub.c | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index a8b0daa1a307..ce7b8e4551b5 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -719,8 +719,6 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
> pr_err("=============================================================================\n");
> pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
> pr_err("-----------------------------------------------------------------------------\n\n");
> -
> - add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> va_end(args);
> }
>
> @@ -801,6 +799,7 @@ void object_err(struct kmem_cache *s, struct page *page,
>
> slab_bug(s, "%s", reason);
> print_trailer(s, page, object);
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> }
>
> static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> @@ -818,6 +817,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> slab_bug(s, "%s", buf);
> print_page_info(page);
> dump_stack();
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> }
>
> static void init_object(struct kmem_cache *s, void *object, u8 val)
> @@ -869,6 +869,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
> fault, end - 1, fault - addr,
> fault[0], value);
> print_trailer(s, page, object);
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>
> skip_bug_print:
> restore_bytes(s, what, value, fault, end);
>

Good catch. Thanks!

Reviewed-by: Aaron Tomlin <[email protected]>

--
Aaron Tomlin

2021-06-17 18:01:35

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH] mm/slub: Add taint after the errors are printed

On 6/16/21 6:25 PM, Georgi Djakov wrote:
> When running the kernel with panic_on_taint, the usual slub debug error
> messages are not being printed when object corruption happens. That's
> because we panic in add_taint(), which is called before printing the
> additional information. This is a bit unfortunate as the error messages
> are actually very useful, especially before a panic. Let's fix this by
> moving add_taint() after the errors are printed on the console.
>
> Signed-off-by: Georgi Djakov <[email protected]>

Acked-by: Vlastimil Babka <[email protected]>

> ---
> mm/slub.c | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index a8b0daa1a307..ce7b8e4551b5 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -719,8 +719,6 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
> pr_err("=============================================================================\n");
> pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
> pr_err("-----------------------------------------------------------------------------\n\n");
> -
> - add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> va_end(args);
> }
>
> @@ -801,6 +799,7 @@ void object_err(struct kmem_cache *s, struct page *page,
>
> slab_bug(s, "%s", reason);
> print_trailer(s, page, object);
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> }
>
> static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> @@ -818,6 +817,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> slab_bug(s, "%s", buf);
> print_page_info(page);
> dump_stack();
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> }
>
> static void init_object(struct kmem_cache *s, void *object, u8 val)
> @@ -869,6 +869,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
> fault, end - 1, fault - addr,
> fault[0], value);
> print_trailer(s, page, object);
> + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>
> skip_bug_print:
> restore_bytes(s, what, value, fault, end);
>

2021-06-19 02:20:19

by David Rientjes

[permalink] [raw]
Subject: Re: [PATCH] mm/slub: Add taint after the errors are printed

On Wed, 16 Jun 2021, Georgi Djakov wrote:

> When running the kernel with panic_on_taint, the usual slub debug error
> messages are not being printed when object corruption happens. That's
> because we panic in add_taint(), which is called before printing the
> additional information. This is a bit unfortunate as the error messages
> are actually very useful, especially before a panic. Let's fix this by
> moving add_taint() after the errors are printed on the console.
>
> Signed-off-by: Georgi Djakov <[email protected]>

Acked-by: David Rientjes <[email protected]>