2022-04-22 22:40:30

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 0/4] Printbufs & shrinker OOM reporting

Debugging OOMs has been one of my sources of frustration, so this patch series
is an attempt to do something about it.

The first patch in the series is something I've been slowly evolving in bcachefs
for years: simple heap allocated strings meant for appending to and building up
structured log/error messages. They make it easy and straightforward to write
pretty-printers for everything, which in turn makes good logging and error
messages something that just happens naturally.

We want it here because that means the reporting I'm adding to shrinkers can be
used by both OOM reporting, and for the sysfs (or is it debugfs now) interface
that Roman is adding.

This patch series also:
- adds OOM reporting on shrinkers, reporting on top 10 shrinkers (in sorted
order!)
- changes slab reporting to be always-on, also reporting top 10 slabs in sorted
order
- starts centralizing OOM reporting in mm/show_mem.c

The last patch in the series is only a demonstration of how to implement the
shrinker .to_text() method, since bcachefs isn't upstream yet.

Kent Overstreet (4):
lib/printbuf: New data structure for heap-allocated strings
mm: Add a .to_text() method for shrinkers
mm: Centralize & improve oom reporting in show_mem.c
bcachefs: shrinker.to_text() methods

fs/bcachefs/btree_cache.c | 18 ++-
fs/bcachefs/btree_key_cache.c | 18 ++-
include/linux/printbuf.h | 140 ++++++++++++++++++
include/linux/shrinker.h | 5 +
lib/Makefile | 4 +-
lib/printbuf.c | 271 ++++++++++++++++++++++++++++++++++
mm/Makefile | 2 +-
mm/oom_kill.c | 23 ---
{lib => mm}/show_mem.c | 14 ++
mm/slab.h | 6 +-
mm/slab_common.c | 53 ++++++-
mm/vmscan.c | 75 ++++++++++
12 files changed, 587 insertions(+), 42 deletions(-)
create mode 100644 include/linux/printbuf.h
create mode 100644 lib/printbuf.c
rename {lib => mm}/show_mem.c (78%)

--
2.35.2


2022-04-22 22:42:41

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 2/4] mm: Add a .to_text() method for shrinkers

This adds a new callback method to shrinkers which they can use to
describe anything relevant to memory reclaim about their internal state,
for example object dirtyness.

This uses the new printbufs to output to heap allocated strings, so that
the .to_text() methods can be used both for messages logged to the
console, and also sysfs/debugfs.

This patch also adds shrinkers_to_text(), which reports on the top 10
shrinkers - by object count - in sorted order, to be used in OOM
reporting.

Signed-off-by: Kent Overstreet <[email protected]>
---
include/linux/shrinker.h | 5 +++
mm/vmscan.c | 75 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 80 insertions(+)

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 76fbf92b04..b5f411768b 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -2,6 +2,8 @@
#ifndef _LINUX_SHRINKER_H
#define _LINUX_SHRINKER_H

+struct printbuf;
+
/*
* This struct is used to pass information from page reclaim to the shrinkers.
* We consolidate the values for easier extension later.
@@ -58,10 +60,12 @@ struct shrink_control {
* @flags determine the shrinker abilities, like numa awareness
*/
struct shrinker {
+ char name[32];
unsigned long (*count_objects)(struct shrinker *,
struct shrink_control *sc);
unsigned long (*scan_objects)(struct shrinker *,
struct shrink_control *sc);
+ void (*to_text)(struct printbuf *, struct shrinker *);

long batch; /* reclaim batch size, 0 = default */
int seeks; /* seeks to recreate an obj */
@@ -94,4 +98,5 @@ extern int register_shrinker(struct shrinker *shrinker);
extern void unregister_shrinker(struct shrinker *shrinker);
extern void free_prealloced_shrinker(struct shrinker *shrinker);
extern void synchronize_shrinkers(void);
+void shrinkers_to_text(struct printbuf *);
#endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 59b14e0d69..09c483dfd3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -50,6 +50,7 @@
#include <linux/printk.h>
#include <linux/dax.h>
#include <linux/psi.h>
+#include <linux/printbuf.h>

#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -702,6 +703,80 @@ void synchronize_shrinkers(void)
}
EXPORT_SYMBOL(synchronize_shrinkers);

+/**
+ * shrinkers_to_text - Report on shrinkers with highest usage
+ *
+ * This reports on the top 10 shrinkers, by object counts, in sorted order:
+ * intended to be used for OOM reporting.
+ */
+void shrinkers_to_text(struct printbuf *out)
+{
+ struct shrinker *shrinker;
+ struct shrinker_by_mem {
+ struct shrinker *shrinker;
+ unsigned long mem;
+ } shrinkers_by_mem[10];
+ int i, nr = 0;
+
+ if (!down_read_trylock(&shrinker_rwsem)) {
+ pr_buf(out, "(couldn't take shrinker lock)");
+ return;
+ }
+
+ list_for_each_entry(shrinker, &shrinker_list, list) {
+ struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
+ unsigned long mem = shrinker->count_objects(shrinker, &sc);
+
+ if (!mem || mem == SHRINK_STOP || mem == SHRINK_EMPTY)
+ continue;
+
+ for (i = 0; i < nr; i++)
+ if (mem < shrinkers_by_mem[i].mem)
+ break;
+
+ if (nr < ARRAY_SIZE(shrinkers_by_mem)) {
+ memmove(&shrinkers_by_mem[i + 1],
+ &shrinkers_by_mem[i],
+ sizeof(shrinkers_by_mem[0]) * (nr - i));
+ nr++;
+ } else if (i) {
+ i--;
+ memmove(&shrinkers_by_mem[0],
+ &shrinkers_by_mem[1],
+ sizeof(shrinkers_by_mem[0]) * i);
+ } else {
+ continue;
+ }
+
+ shrinkers_by_mem[i] = (struct shrinker_by_mem) {
+ .shrinker = shrinker,
+ .mem = mem,
+ };
+ }
+
+ for (i = nr - 1; i >= 0; --i) {
+ struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
+ shrinker = shrinkers_by_mem[i].shrinker;
+
+ if (shrinker->name[0])
+ pr_buf(out, "%s", shrinker->name);
+ else
+ pr_buf(out, "%ps:", shrinker->scan_objects);
+
+ pr_buf(out, " objects: %lu", shrinker->count_objects(shrinker, &sc));
+ pr_newline(out);
+
+ if (shrinker->to_text) {
+ pr_indent_push(out, 2);
+ shrinker->to_text(out, shrinker);
+ pr_indent_pop(out, 2);
+ pr_newline(out);
+ }
+ }
+
+ up_read(&shrinker_rwsem);
+}
+
#define SHRINK_BATCH 128

static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
--
2.35.2

2022-05-02 23:28:16

by Dave Young

[permalink] [raw]
Subject: Re: [PATCH 0/4] Printbufs & shrinker OOM reporting

Hi Kent,
On Fri, 22 Apr 2022 at 07:56, Kent Overstreet <[email protected]> wrote:
>
> Debugging OOMs has been one of my sources of frustration, so this patch series
> is an attempt to do something about it.
>
> The first patch in the series is something I've been slowly evolving in bcachefs
> for years: simple heap allocated strings meant for appending to and building up
> structured log/error messages. They make it easy and straightforward to write
> pretty-printers for everything, which in turn makes good logging and error
> messages something that just happens naturally.
>
> We want it here because that means the reporting I'm adding to shrinkers can be
> used by both OOM reporting, and for the sysfs (or is it debugfs now) interface
> that Roman is adding.
>

I added the kexec list in cc. It seems like a nice enhancement to oom
reporting.
I suspect kdump tooling need changes to retrieve the kmsg log from
vmcore, could you confirm it? For example makedumpfile, crash, and
kexec-tools (its vmcore-dmesg tool).


> This patch series also:
> - adds OOM reporting on shrinkers, reporting on top 10 shrinkers (in sorted
> order!)
> - changes slab reporting to be always-on, also reporting top 10 slabs in sorted
> order
> - starts centralizing OOM reporting in mm/show_mem.c
>
> The last patch in the series is only a demonstration of how to implement the
> shrinker .to_text() method, since bcachefs isn't upstream yet.
>
> Kent Overstreet (4):
> lib/printbuf: New data structure for heap-allocated strings
> mm: Add a .to_text() method for shrinkers
> mm: Centralize & improve oom reporting in show_mem.c
> bcachefs: shrinker.to_text() methods
>
> fs/bcachefs/btree_cache.c | 18 ++-
> fs/bcachefs/btree_key_cache.c | 18 ++-
> include/linux/printbuf.h | 140 ++++++++++++++++++
> include/linux/shrinker.h | 5 +
> lib/Makefile | 4 +-
> lib/printbuf.c | 271 ++++++++++++++++++++++++++++++++++
> mm/Makefile | 2 +-
> mm/oom_kill.c | 23 ---
> {lib => mm}/show_mem.c | 14 ++
> mm/slab.h | 6 +-
> mm/slab_common.c | 53 ++++++-
> mm/vmscan.c | 75 ++++++++++
> 12 files changed, 587 insertions(+), 42 deletions(-)
> create mode 100644 include/linux/printbuf.h
> create mode 100644 lib/printbuf.c
> rename {lib => mm}/show_mem.c (78%)
>
> --
> 2.35.2
>

Thanks
Dave