LinuxLists.cc - [PATCH v3] mm: slub: move sysfs slab alloc/free interfaces to debugfs

2021-04-06 21:35:11

Subject: [PATCH v3] mm: slub: move sysfs slab alloc/free interfaces to debugfs

alloc_calls and free_calls implementation in sysfs have two issues,
one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
to "one value per file" rule.

To overcome this issues, move the alloc_calls and free_calls implemeation
to debugfs.

Signed-off-by: Faiyaz Mohammed <[email protected]>
---
mm/slub.c | 518 ++++++++++++++++++++++++++++++++++----------------------------
1 file changed, 286 insertions(+), 232 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 3021ce9..4d20ee0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -36,6 +36,7 @@
#include <linux/memcontrol.h>
#include <linux/random.h>

+#include <linux/debugfs.h>
#include <trace/events/kmem.h>

#include "internal.h"
@@ -225,6 +226,12 @@ static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
{ return 0; }
#endif

+#ifdef CONFIG_DEBUG_FS
+static void debugfs_slab_add(struct kmem_cache *);
+#else
+static inline void debugfs_slab_add(struct kmem_cache *) { }
+#endif
+
static inline void stat(const struct kmem_cache *s, enum stat_item si)
{
#ifdef CONFIG_SLUB_STATS
@@ -4542,6 +4549,8 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
if (err)
__kmem_cache_release(s);

+ debugfs_slab_add(s);
+
return err;
}

@@ -4682,221 +4691,6 @@ static long validate_slab_cache(struct kmem_cache *s)

return count;
}
-/*
- * Generate lists of code addresses where slabcache objects are allocated
- * and freed.
- */
-
-struct location {
- unsigned long count;
- unsigned long addr;
- long long sum_time;
- long min_time;
- long max_time;
- long min_pid;
- long max_pid;
- DECLARE_BITMAP(cpus, NR_CPUS);
- nodemask_t nodes;
-};
-
-struct loc_track {
- unsigned long max;
- unsigned long count;
- struct location *loc;
-};
-
-static void free_loc_track(struct loc_track *t)
-{
- if (t->max)
- free_pages((unsigned long)t->loc,
- get_order(sizeof(struct location) * t->max));
-}
-
-static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
-{
- struct location *l;
- int order;
-
- order = get_order(sizeof(struct location) * max);
-
- l = (void *)__get_free_pages(flags, order);
- if (!l)
- return 0;
-
- if (t->count) {
- memcpy(l, t->loc, sizeof(struct location) * t->count);
- free_loc_track(t);
- }
- t->max = max;
- t->loc = l;
- return 1;
-}
-
-static int add_location(struct loc_track *t, struct kmem_cache *s,
- const struct track *track)
-{
- long start, end, pos;
- struct location *l;
- unsigned long caddr;
- unsigned long age = jiffies - track->when;
-
- start = -1;
- end = t->count;
-
- for ( ; ; ) {
- pos = start + (end - start + 1) / 2;
-
- /*
- * There is nothing at "end". If we end up there
- * we need to add something to before end.
- */
- if (pos == end)
- break;
-
- caddr = t->loc[pos].addr;
- if (track->addr == caddr) {
-
- l = &t->loc[pos];
- l->count++;
- if (track->when) {
- l->sum_time += age;
- if (age < l->min_time)
- l->min_time = age;
- if (age > l->max_time)
- l->max_time = age;
-
- if (track->pid < l->min_pid)
- l->min_pid = track->pid;
- if (track->pid > l->max_pid)
- l->max_pid = track->pid;
-
- cpumask_set_cpu(track->cpu,
- to_cpumask(l->cpus));
- }
- node_set(page_to_nid(virt_to_page(track)), l->nodes);
- return 1;
- }
-
- if (track->addr < caddr)
- end = pos;
- else
- start = pos;
- }
-
- /*
- * Not found. Insert new tracking element.
- */
- if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
- return 0;
-
- l = t->loc + pos;
- if (pos < t->count)
- memmove(l + 1, l,
- (t->count - pos) * sizeof(struct location));
- t->count++;
- l->count = 1;
- l->addr = track->addr;
- l->sum_time = age;
- l->min_time = age;
- l->max_time = age;
- l->min_pid = track->pid;
- l->max_pid = track->pid;
- cpumask_clear(to_cpumask(l->cpus));
- cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
- nodes_clear(l->nodes);
- node_set(page_to_nid(virt_to_page(track)), l->nodes);
- return 1;
-}
-
-static void process_slab(struct loc_track *t, struct kmem_cache *s,
- struct page *page, enum track_item alloc)
-{
- void *addr = page_address(page);
- void *p;
- unsigned long *map;
-
- map = get_map(s, page);
- for_each_object(p, s, addr, page->objects)
- if (!test_bit(__obj_to_index(s, addr, p), map))
- add_location(t, s, get_track(s, p, alloc));
- put_map(map);
-}
-
-static int list_locations(struct kmem_cache *s, char *buf,
- enum track_item alloc)
-{
- int len = 0;
- unsigned long i;
- struct loc_track t = { 0, 0, NULL };
- int node;
- struct kmem_cache_node *n;
-
- if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
- GFP_KERNEL)) {
- return sysfs_emit(buf, "Out of memory\n");
- }
- /* Push back cpu slabs */
- flush_all(s);
-
- for_each_kmem_cache_node(s, node, n) {
- unsigned long flags;
- struct page *page;
-
- if (!atomic_long_read(&n->nr_slabs))
- continue;
-
- spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, slab_list)
- process_slab(&t, s, page, alloc);
- list_for_each_entry(page, &n->full, slab_list)
- process_slab(&t, s, page, alloc);
- spin_unlock_irqrestore(&n->list_lock, flags);
- }
-
- for (i = 0; i < t.count; i++) {
- struct location *l = &t.loc[i];
-
- len += sysfs_emit_at(buf, len, "%7ld ", l->count);
-
- if (l->addr)
- len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr);
- else
- len += sysfs_emit_at(buf, len, "<not-available>");
-
- if (l->sum_time != l->min_time)
- len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld",
- l->min_time,
- (long)div_u64(l->sum_time,
- l->count),
- l->max_time);
- else
- len += sysfs_emit_at(buf, len, " age=%ld", l->min_time);
-
- if (l->min_pid != l->max_pid)
- len += sysfs_emit_at(buf, len, " pid=%ld-%ld",
- l->min_pid, l->max_pid);
- else
- len += sysfs_emit_at(buf, len, " pid=%ld",
- l->min_pid);
-
- if (num_online_cpus() > 1 &&
- !cpumask_empty(to_cpumask(l->cpus)))
- len += sysfs_emit_at(buf, len, " cpus=%*pbl",
- cpumask_pr_args(to_cpumask(l->cpus)));
-
- if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
- len += sysfs_emit_at(buf, len, " nodes=%*pbl",
- nodemask_pr_args(&l->nodes));
-
- len += sysfs_emit_at(buf, len, "\n");
- }
-
- free_loc_track(&t);
- if (!t.count)
- len += sysfs_emit_at(buf, len, "No data\n");
-
- return len;
-}
#endif /* CONFIG_SLUB_DEBUG */

#ifdef SLUB_RESILIENCY_TEST
@@ -5346,21 +5140,6 @@ static ssize_t validate_store(struct kmem_cache *s,
}
SLAB_ATTR(validate);

-static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
-{
- if (!(s->flags & SLAB_STORE_USER))
- return -ENOSYS;
- return list_locations(s, buf, TRACK_ALLOC);
-}
-SLAB_ATTR_RO(alloc_calls);
-
-static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
-{
- if (!(s->flags & SLAB_STORE_USER))
- return -ENOSYS;
- return list_locations(s, buf, TRACK_FREE);
-}
-SLAB_ATTR_RO(free_calls);
#endif /* CONFIG_SLUB_DEBUG */

#ifdef CONFIG_FAILSLAB
@@ -5524,8 +5303,6 @@ static struct attribute *slab_attrs[] = {
&poison_attr.attr,
&store_user_attr.attr,
&validate_attr.attr,
- &alloc_calls_attr.attr,
- &free_calls_attr.attr,
#endif
#ifdef CONFIG_ZONE_DMA
&cache_dma_attr.attr,
@@ -5814,6 +5591,283 @@ static int __init slab_sysfs_init(void)
__initcall(slab_sysfs_init);
#endif /* CONFIG_SYSFS */

+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
+/*
+ * Generate lists of code addresses where slabcache objects are allocated
+ * and freed.
+ */
+
+struct location {
+ unsigned long count;
+ unsigned long addr;
+ long long sum_time;
+ long min_time;
+ long max_time;
+ long min_pid;
+ long max_pid;
+ DECLARE_BITMAP(cpus, NR_CPUS);
+ nodemask_t nodes;
+};
+
+struct loc_track {
+ unsigned long max;
+ unsigned long count;
+ struct location *loc;
+};
+
+static struct dentry *slab_debugfs_root;
+
+static struct dentry *slab_debugfs_cache;
+
+static void free_loc_track(struct loc_track *t)
+{
+ if (t->max)
+ free_pages((unsigned long)t->loc,
+ get_order(sizeof(struct location) * t->max));
+}
+
+static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
+{
+ struct location *l;
+ int order;
+
+ order = get_order(sizeof(struct location) * max);
+
+ l = (void *)__get_free_pages(flags, order);
+ if (!l)
+ return 0;
+
+ if (t->count) {
+ memcpy(l, t->loc, sizeof(struct location) * t->count);
+ free_loc_track(t);
+ }
+ t->max = max;
+ t->loc = l;
+ return 1;
+}
+
+static int add_location(struct loc_track *t, struct kmem_cache *s,
+ const struct track *track)
+{
+ long start, end, pos;
+ struct location *l;
+ unsigned long caddr;
+ unsigned long age = jiffies - track->when;
+
+ start = -1;
+ end = t->count;
+
+ for ( ; ; ) {
+ pos = start + (end - start + 1) / 2;
+
+ /*
+ * There is nothing at "end". If we end up there
+ * we need to add something to before end.
+ */
+ if (pos == end)
+ break;
+
+ caddr = t->loc[pos].addr;
+ if (track->addr == caddr) {
+
+ l = &t->loc[pos];
+ l->count++;
+ if (track->when) {
+ l->sum_time += age;
+ if (age < l->min_time)
+ l->min_time = age;
+ if (age > l->max_time)
+ l->max_time = age;
+
+ if (track->pid < l->min_pid)
+ l->min_pid = track->pid;
+ if (track->pid > l->max_pid)
+ l->max_pid = track->pid;
+
+ cpumask_set_cpu(track->cpu,
+ to_cpumask(l->cpus));
+ }
+ node_set(page_to_nid(virt_to_page(track)), l->nodes);
+ return 1;
+ }
+
+ if (track->addr < caddr)
+ end = pos;
+ else
+ start = pos;
+ }
+
+ /*
+ * Not found. Insert new tracking element.
+ */
+ if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
+ return 0;
+
+ l = t->loc + pos;
+ if (pos < t->count)
+ memmove(l + 1, l,
+ (t->count - pos) * sizeof(struct location));
+ t->count++;
+ l->count = 1;
+ l->addr = track->addr;
+ l->sum_time = age;
+ l->min_time = age;
+ l->max_time = age;
+ l->min_pid = track->pid;
+ l->max_pid = track->pid;
+ cpumask_clear(to_cpumask(l->cpus));
+ cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
+ nodes_clear(l->nodes);
+ node_set(page_to_nid(virt_to_page(track)), l->nodes);
+ return 1;
+}
+
+static void process_slab(struct loc_track *t, struct kmem_cache *s,
+ struct page *page, enum track_item alloc)
+{
+ void *addr = page_address(page);
+ void *p;
+ unsigned long *map;
+
+ map = get_map(s, page);
+ for_each_object(p, s, addr, page->objects)
+ if (!test_bit(__obj_to_index(s, addr, p), map))
+ add_location(t, s, get_track(s, p, alloc));
+ put_map(map);
+}
+
+static int list_locations(struct seq_file *seq, struct kmem_cache *s,
+ enum track_item alloc)
+{
+ unsigned long i;
+ struct loc_track t = { 0, 0, NULL };
+ int node;
+ struct kmem_cache_node *n;
+
+ if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
+ GFP_KERNEL)) {
+ seq_puts(seq, "Out of memory\n");
+ return -ENOMEM;
+ }
+ /* Push back cpu slabs */
+ flush_all(s);
+
+ for_each_kmem_cache_node(s, node, n) {
+ unsigned long flags;
+ struct page *page;
+
+ if (!atomic_long_read(&n->nr_slabs))
+ continue;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, slab_list)
+ process_slab(&t, s, page, alloc);
+ list_for_each_entry(page, &n->full, slab_list)
+ process_slab(&t, s, page, alloc);
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ }
+
+ for (i = 0; i < t.count; i++) {
+ struct location *l = &t.loc[i];
+
+ seq_printf(seq, "%7ld ", l->count);
+
+ if (l->addr)
+ seq_printf(seq, "%pS", (void *)l->addr);
+ else
+ seq_puts(seq, "<not-available>");
+
+ if (l->sum_time != l->min_time) {
+ seq_printf(seq, " age=%ld/%ld/%ld",
+ l->min_time,
+ (long)div_u64(l->sum_time, l->count),
+ l->max_time);
+ } else
+ seq_printf(seq, " age=%ld",
+ l->min_time);
+
+ if (l->min_pid != l->max_pid)
+ seq_printf(seq, " pid=%ld-%ld",
+ l->min_pid, l->max_pid);
+ else
+ seq_printf(seq, " pid=%ld",
+ l->min_pid);
+
+ if (num_online_cpus() > 1 &&
+ !cpumask_empty(to_cpumask(l->cpus)))
+ seq_printf(seq, " cpus=%*pbl",
+ cpumask_pr_args(to_cpumask(l->cpus)));
+
+ if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
+ seq_printf(seq, " nodes=%*pbl",
+ nodemask_pr_args(&l->nodes));
+
+ seq_puts(seq, "\n");
+ }
+
+ free_loc_track(&t);
+ if (!t.count)
+ seq_puts(seq, "No data\n");
+ return 0;
+}
+
+static int slab_debug_trace(struct seq_file *seq, void *ignored)
+{
+ struct kmem_cache *s = seq->private;
+
+ if (!(s->flags & SLAB_STORE_USER))
+ return 0;
+
+ if (strcmp(seq->file->f_path.dentry->d_name.name, "alloc_trace") == 0)
+ return list_locations(seq, s, TRACK_ALLOC);
+ else
+ return list_locations(seq, s, TRACK_FREE);
+
+ return 0;
+}
+
+static int slab_debug_trace_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, slab_debug_trace,
+ file_inode(filp)->i_private);
+}
+
+static const struct file_operations slab_debug_fops = {
+ .open = slab_debug_trace_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void debugfs_slab_add(struct kmem_cache *s)
+{
+ if (unlikely(!slab_debugfs_root))
+ return;
+
+ slab_debugfs_cache = debugfs_create_dir(s->name, slab_debugfs_root);
+ if (!IS_ERR(slab_debugfs_cache)) {
+ debugfs_create_file("alloc_trace", 0400,
+ slab_debugfs_cache, s, &slab_debug_fops);
+
+ debugfs_create_file("free_trace", 0400,
+ slab_debugfs_cache, s, &slab_debug_fops);
+ }
+}
+
+static void __init slab_debugfs_init(void)
+{
+ struct kmem_cache *s;
+
+ slab_debugfs_root = debugfs_create_dir("slab", NULL);
+ if (!IS_ERR(slab_debugfs_root))
+ list_for_each_entry(s, &slab_caches, list)
+ debugfs_slab_add(s);
+ else
+ pr_err("Cannot create slab debugfs.\n");
+
+}
+__initcall(slab_debugfs_init);
+#endif
/*
* The /proc/slabinfo ABI
*/
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
member of the Code Aurora Forum, hosted by The Linux Foundation

2021-04-07 06:46:14

by kernel test robot

[permalink] [raw]

Subject: Re: [PATCH v3] mm: slub: move sysfs slab alloc/free interfaces to debugfs

Attachments:

(No filename) (1.67 kB)
.config.gz (13.53 kB)
Download all attachments

2021-04-07 09:51:34

by kernel test robot

[permalink] [raw]

Subject: Re: [PATCH v3] mm: slub: move sysfs slab alloc/free interfaces to debugfs

Hi Faiyaz,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v5.12-rc6]
[cannot apply to hnaz-linux-mm/master next-20210406]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: https://github.com/0day-ci/linux/commits/Faiyaz-Mohammed/mm-slub-move-sysfs-slab-alloc-free-interfaces-to-debugfs/20210406-202954
base: e49d033bddf5b565044e2abe4241353959bc9120
config: sh-randconfig-c024-20210406 (attached as .config)
compiler: sh4-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/00c0bfb05271bdd3e80f4e9c62da9ce5287b8f93
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Faiyaz-Mohammed/mm-slub-move-sysfs-slab-alloc-free-interfaces-to-debugfs/20210406-202954
git checkout 00c0bfb05271bdd3e80f4e9c62da9ce5287b8f93
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=sh

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

In file included from include/linux/printk.h:6,
from include/linux/kernel.h:16,
from include/asm-generic/bug.h:20,
from arch/sh/include/asm/bug.h:112,
from include/linux/bug.h:5,
from include/linux/mmdebug.h:5,
from include/linux/mm.h:9,
from mm/slub.c:13:
>> mm/slub.c:5869:12: error: initialization of 'initcall_t' {aka 'int (*)(void)'} from incompatible pointer type 'void (*)(void)' [-Werror=incompatible-pointer-types]
5869 | __initcall(slab_debugfs_init);
| ^~~~~~~~~~~~~~~~~
include/linux/init.h:249:41: note: in definition of macro '____define_initcall'
249 | __attribute__((__section__(__sec))) = fn;
| ^~
include/linux/init.h:259:2: note: in expansion of macro '__unique_initcall'
259 | __unique_initcall(fn, id, __sec, __initcall_id(fn))
| ^~~~~~~~~~~~~~~~~
include/linux/init.h:261:35: note: in expansion of macro '___define_initcall'
261 | #define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
| ^~~~~~~~~~~~~~~~~~
include/linux/init.h:290:30: note: in expansion of macro '__define_initcall'
290 | #define device_initcall(fn) __define_initcall(fn, 6)
| ^~~~~~~~~~~~~~~~~
include/linux/init.h:295:24: note: in expansion of macro 'device_initcall'
295 | #define __initcall(fn) device_initcall(fn)
| ^~~~~~~~~~~~~~~
mm/slub.c:5869:1: note: in expansion of macro '__initcall'
5869 | __initcall(slab_debugfs_init);
| ^~~~~~~~~~
cc1: some warnings being treated as errors

vim +5869 mm/slub.c

5856
5857 static void __init slab_debugfs_init(void)
5858 {
5859 struct kmem_cache *s;
5860
5861 slab_debugfs_root = debugfs_create_dir("slab", NULL);
5862 if (!IS_ERR(slab_debugfs_root))
5863 list_for_each_entry(s, &slab_caches, list)
5864 debugfs_slab_add(s);
5865 else
5866 pr_err("Cannot create slab debugfs.\n");
5867
5868 }
> 5869 __initcall(slab_debugfs_init);
5870 #endif
5871 /*
5872 * The /proc/slabinfo ABI
5873 */
5874 #ifdef CONFIG_SLUB_DEBUG
5875 void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5876 {
5877 unsigned long nr_slabs = 0;
5878 unsigned long nr_objs = 0;
5879 unsigned long nr_free = 0;
5880 int node;
5881 struct kmem_cache_node *n;
5882
5883 for_each_kmem_cache_node(s, node, n) {
5884 nr_slabs += node_nr_slabs(n);
5885 nr_objs += node_nr_objs(n);
5886 nr_free += count_partial(n, count_free);
5887 }
5888
5889 sinfo->active_objs = nr_objs - nr_free;
5890 sinfo->num_objs = nr_objs;
5891 sinfo->active_slabs = nr_slabs;
5892 sinfo->num_slabs = nr_slabs;
5893 sinfo->objects_per_slab = oo_objects(s->oo);
5894 sinfo->cache_order = oo_order(s->oo);
5895 }
5896

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]

Attachments:

(No filename) (4.59 kB)
.config.gz (22.71 kB)
Download all attachments

2021-04-07 10:25:50

by Vlastimil Babka

[permalink] [raw]

Subject: Re: [PATCH v3] mm: slub: move sysfs slab alloc/free interfaces to debugfs

On 4/6/21 2:27 PM, Faiyaz Mohammed wrote:
> alloc_calls and free_calls implementation in sysfs have two issues,
> one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
> to "one value per file" rule.
>
> To overcome this issues, move the alloc_calls and free_calls implemeation
> to debugfs.
>
> Signed-off-by: Faiyaz Mohammed <[email protected]>

Good direction, thanks. But I'm afraid we need a bit more:

- I don't see debugfs_remove() (or _recursive) used anywhere. When a cache is
destroyed, do the dirs/files just linger in debugfs referencing removed
kmem_cache objects?
- There's a simple debugfs_create_dir(s->name, ...), for each cache while the
sysfs variant handles merged caches with symlinks etc. For consistency, the
hiearchy should look the same in debugfs as it does in sysfs.

Also could we avoid shifting the tracking code around mm/slub.c by leaving it in
place and adding a nested #ifdef CONFIG_DEBUG_FS where needed?

And possibly we could leave the old files around, but their content would be
something along "Deprecated, use the equvalent under <debugfs>/slab/" ? We'll
see if anyone complains. We can remove them completely after few years.

Thanks!
Vlastimil

> ---
> mm/slub.c | 518 ++++++++++++++++++++++++++++++++++----------------------------
> 1 file changed, 286 insertions(+), 232 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 3021ce9..4d20ee0 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -36,6 +36,7 @@
> #include <linux/memcontrol.h>
> #include <linux/random.h>
>
> +#include <linux/debugfs.h>
> #include <trace/events/kmem.h>
>
> #include "internal.h"
> @@ -225,6 +226,12 @@ static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
> { return 0; }
> #endif
>
> +#ifdef CONFIG_DEBUG_FS
> +static void debugfs_slab_add(struct kmem_cache *);
> +#else
> +static inline void debugfs_slab_add(struct kmem_cache *) { }
> +#endif
> +
> static inline void stat(const struct kmem_cache *s, enum stat_item si)
> {
> #ifdef CONFIG_SLUB_STATS
> @@ -4542,6 +4549,8 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
> if (err)
> __kmem_cache_release(s);
>
> + debugfs_slab_add(s);
> +
> return err;
> }
>
> @@ -4682,221 +4691,6 @@ static long validate_slab_cache(struct kmem_cache *s)
>
> return count;
> }
> -/*
> - * Generate lists of code addresses where slabcache objects are allocated
> - * and freed.
> - */
> -
> -struct location {
> - unsigned long count;
> - unsigned long addr;
> - long long sum_time;
> - long min_time;
> - long max_time;
> - long min_pid;
> - long max_pid;
> - DECLARE_BITMAP(cpus, NR_CPUS);
> - nodemask_t nodes;
> -};
> -
> -struct loc_track {
> - unsigned long max;
> - unsigned long count;
> - struct location *loc;
> -};
> -
> -static void free_loc_track(struct loc_track *t)
> -{
> - if (t->max)
> - free_pages((unsigned long)t->loc,
> - get_order(sizeof(struct location) * t->max));
> -}
> -
> -static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
> -{
> - struct location *l;
> - int order;
> -
> - order = get_order(sizeof(struct location) * max);
> -
> - l = (void *)__get_free_pages(flags, order);
> - if (!l)
> - return 0;
> -
> - if (t->count) {
> - memcpy(l, t->loc, sizeof(struct location) * t->count);
> - free_loc_track(t);
> - }
> - t->max = max;
> - t->loc = l;
> - return 1;
> -}
> -
> -static int add_location(struct loc_track *t, struct kmem_cache *s,
> - const struct track *track)
> -{
> - long start, end, pos;
> - struct location *l;
> - unsigned long caddr;
> - unsigned long age = jiffies - track->when;
> -
> - start = -1;
> - end = t->count;
> -
> - for ( ; ; ) {
> - pos = start + (end - start + 1) / 2;
> -
> - /*
> - * There is nothing at "end". If we end up there
> - * we need to add something to before end.
> - */
> - if (pos == end)
> - break;
> -
> - caddr = t->loc[pos].addr;
> - if (track->addr == caddr) {
> -
> - l = &t->loc[pos];
> - l->count++;
> - if (track->when) {
> - l->sum_time += age;
> - if (age < l->min_time)
> - l->min_time = age;
> - if (age > l->max_time)
> - l->max_time = age;
> -
> - if (track->pid < l->min_pid)
> - l->min_pid = track->pid;
> - if (track->pid > l->max_pid)
> - l->max_pid = track->pid;
> -
> - cpumask_set_cpu(track->cpu,
> - to_cpumask(l->cpus));
> - }
> - node_set(page_to_nid(virt_to_page(track)), l->nodes);
> - return 1;
> - }
> -
> - if (track->addr < caddr)
> - end = pos;
> - else
> - start = pos;
> - }
> -
> - /*
> - * Not found. Insert new tracking element.
> - */
> - if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
> - return 0;
> -
> - l = t->loc + pos;
> - if (pos < t->count)
> - memmove(l + 1, l,
> - (t->count - pos) * sizeof(struct location));
> - t->count++;
> - l->count = 1;
> - l->addr = track->addr;
> - l->sum_time = age;
> - l->min_time = age;
> - l->max_time = age;
> - l->min_pid = track->pid;
> - l->max_pid = track->pid;
> - cpumask_clear(to_cpumask(l->cpus));
> - cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
> - nodes_clear(l->nodes);
> - node_set(page_to_nid(virt_to_page(track)), l->nodes);
> - return 1;
> -}
> -
> -static void process_slab(struct loc_track *t, struct kmem_cache *s,
> - struct page *page, enum track_item alloc)
> -{
> - void *addr = page_address(page);
> - void *p;
> - unsigned long *map;
> -
> - map = get_map(s, page);
> - for_each_object(p, s, addr, page->objects)
> - if (!test_bit(__obj_to_index(s, addr, p), map))
> - add_location(t, s, get_track(s, p, alloc));
> - put_map(map);
> -}
> -
> -static int list_locations(struct kmem_cache *s, char *buf,
> - enum track_item alloc)
> -{
> - int len = 0;
> - unsigned long i;
> - struct loc_track t = { 0, 0, NULL };
> - int node;
> - struct kmem_cache_node *n;
> -
> - if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
> - GFP_KERNEL)) {
> - return sysfs_emit(buf, "Out of memory\n");
> - }
> - /* Push back cpu slabs */
> - flush_all(s);
> -
> - for_each_kmem_cache_node(s, node, n) {
> - unsigned long flags;
> - struct page *page;
> -
> - if (!atomic_long_read(&n->nr_slabs))
> - continue;
> -
> - spin_lock_irqsave(&n->list_lock, flags);
> - list_for_each_entry(page, &n->partial, slab_list)
> - process_slab(&t, s, page, alloc);
> - list_for_each_entry(page, &n->full, slab_list)
> - process_slab(&t, s, page, alloc);
> - spin_unlock_irqrestore(&n->list_lock, flags);
> - }
> -
> - for (i = 0; i < t.count; i++) {
> - struct location *l = &t.loc[i];
> -
> - len += sysfs_emit_at(buf, len, "%7ld ", l->count);
> -
> - if (l->addr)
> - len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr);
> - else
> - len += sysfs_emit_at(buf, len, "<not-available>");
> -
> - if (l->sum_time != l->min_time)
> - len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld",
> - l->min_time,
> - (long)div_u64(l->sum_time,
> - l->count),
> - l->max_time);
> - else
> - len += sysfs_emit_at(buf, len, " age=%ld", l->min_time);
> -
> - if (l->min_pid != l->max_pid)
> - len += sysfs_emit_at(buf, len, " pid=%ld-%ld",
> - l->min_pid, l->max_pid);
> - else
> - len += sysfs_emit_at(buf, len, " pid=%ld",
> - l->min_pid);
> -
> - if (num_online_cpus() > 1 &&
> - !cpumask_empty(to_cpumask(l->cpus)))
> - len += sysfs_emit_at(buf, len, " cpus=%*pbl",
> - cpumask_pr_args(to_cpumask(l->cpus)));
> -
> - if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
> - len += sysfs_emit_at(buf, len, " nodes=%*pbl",
> - nodemask_pr_args(&l->nodes));
> -
> - len += sysfs_emit_at(buf, len, "\n");
> - }
> -
> - free_loc_track(&t);
> - if (!t.count)
> - len += sysfs_emit_at(buf, len, "No data\n");
> -
> - return len;
> -}
> #endif /* CONFIG_SLUB_DEBUG */
>
> #ifdef SLUB_RESILIENCY_TEST
> @@ -5346,21 +5140,6 @@ static ssize_t validate_store(struct kmem_cache *s,
> }
> SLAB_ATTR(validate);
>
> -static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
> -{
> - if (!(s->flags & SLAB_STORE_USER))
> - return -ENOSYS;
> - return list_locations(s, buf, TRACK_ALLOC);
> -}
> -SLAB_ATTR_RO(alloc_calls);
> -
> -static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
> -{
> - if (!(s->flags & SLAB_STORE_USER))
> - return -ENOSYS;
> - return list_locations(s, buf, TRACK_FREE);
> -}
> -SLAB_ATTR_RO(free_calls);
> #endif /* CONFIG_SLUB_DEBUG */
>
> #ifdef CONFIG_FAILSLAB
> @@ -5524,8 +5303,6 @@ static struct attribute *slab_attrs[] = {
> &poison_attr.attr,
> &store_user_attr.attr,
> &validate_attr.attr,
> - &alloc_calls_attr.attr,
> - &free_calls_attr.attr,
> #endif
> #ifdef CONFIG_ZONE_DMA
> &cache_dma_attr.attr,
> @@ -5814,6 +5591,283 @@ static int __init slab_sysfs_init(void)
> __initcall(slab_sysfs_init);
> #endif /* CONFIG_SYSFS */
>
> +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
> +/*
> + * Generate lists of code addresses where slabcache objects are allocated
> + * and freed.
> + */
> +
> +struct location {
> + unsigned long count;
> + unsigned long addr;
> + long long sum_time;
> + long min_time;
> + long max_time;
> + long min_pid;
> + long max_pid;
> + DECLARE_BITMAP(cpus, NR_CPUS);
> + nodemask_t nodes;
> +};
> +
> +struct loc_track {
> + unsigned long max;
> + unsigned long count;
> + struct location *loc;
> +};
> +
> +static struct dentry *slab_debugfs_root;
> +
> +static struct dentry *slab_debugfs_cache;
> +
> +static void free_loc_track(struct loc_track *t)
> +{
> + if (t->max)
> + free_pages((unsigned long)t->loc,
> + get_order(sizeof(struct location) * t->max));
> +}
> +
> +static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
> +{
> + struct location *l;
> + int order;
> +
> + order = get_order(sizeof(struct location) * max);
> +
> + l = (void *)__get_free_pages(flags, order);
> + if (!l)
> + return 0;
> +
> + if (t->count) {
> + memcpy(l, t->loc, sizeof(struct location) * t->count);
> + free_loc_track(t);
> + }
> + t->max = max;
> + t->loc = l;
> + return 1;
> +}
> +
> +static int add_location(struct loc_track *t, struct kmem_cache *s,
> + const struct track *track)
> +{
> + long start, end, pos;
> + struct location *l;
> + unsigned long caddr;
> + unsigned long age = jiffies - track->when;
> +
> + start = -1;
> + end = t->count;
> +
> + for ( ; ; ) {
> + pos = start + (end - start + 1) / 2;
> +
> + /*
> + * There is nothing at "end". If we end up there
> + * we need to add something to before end.
> + */
> + if (pos == end)
> + break;
> +
> + caddr = t->loc[pos].addr;
> + if (track->addr == caddr) {
> +
> + l = &t->loc[pos];
> + l->count++;
> + if (track->when) {
> + l->sum_time += age;
> + if (age < l->min_time)
> + l->min_time = age;
> + if (age > l->max_time)
> + l->max_time = age;
> +
> + if (track->pid < l->min_pid)
> + l->min_pid = track->pid;
> + if (track->pid > l->max_pid)
> + l->max_pid = track->pid;
> +
> + cpumask_set_cpu(track->cpu,
> + to_cpumask(l->cpus));
> + }
> + node_set(page_to_nid(virt_to_page(track)), l->nodes);
> + return 1;
> + }
> +
> + if (track->addr < caddr)
> + end = pos;
> + else
> + start = pos;
> + }
> +
> + /*
> + * Not found. Insert new tracking element.
> + */
> + if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
> + return 0;
> +
> + l = t->loc + pos;
> + if (pos < t->count)
> + memmove(l + 1, l,
> + (t->count - pos) * sizeof(struct location));
> + t->count++;
> + l->count = 1;
> + l->addr = track->addr;
> + l->sum_time = age;
> + l->min_time = age;
> + l->max_time = age;
> + l->min_pid = track->pid;
> + l->max_pid = track->pid;
> + cpumask_clear(to_cpumask(l->cpus));
> + cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
> + nodes_clear(l->nodes);
> + node_set(page_to_nid(virt_to_page(track)), l->nodes);
> + return 1;
> +}
> +
> +static void process_slab(struct loc_track *t, struct kmem_cache *s,
> + struct page *page, enum track_item alloc)
> +{
> + void *addr = page_address(page);
> + void *p;
> + unsigned long *map;
> +
> + map = get_map(s, page);
> + for_each_object(p, s, addr, page->objects)
> + if (!test_bit(__obj_to_index(s, addr, p), map))
> + add_location(t, s, get_track(s, p, alloc));
> + put_map(map);
> +}
> +
> +static int list_locations(struct seq_file *seq, struct kmem_cache *s,
> + enum track_item alloc)
> +{
> + unsigned long i;
> + struct loc_track t = { 0, 0, NULL };
> + int node;
> + struct kmem_cache_node *n;
> +
> + if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
> + GFP_KERNEL)) {
> + seq_puts(seq, "Out of memory\n");
> + return -ENOMEM;
> + }
> + /* Push back cpu slabs */
> + flush_all(s);
> +
> + for_each_kmem_cache_node(s, node, n) {
> + unsigned long flags;
> + struct page *page;
> +
> + if (!atomic_long_read(&n->nr_slabs))
> + continue;
> +
> + spin_lock_irqsave(&n->list_lock, flags);
> + list_for_each_entry(page, &n->partial, slab_list)
> + process_slab(&t, s, page, alloc);
> + list_for_each_entry(page, &n->full, slab_list)
> + process_slab(&t, s, page, alloc);
> + spin_unlock_irqrestore(&n->list_lock, flags);
> + }
> +
> + for (i = 0; i < t.count; i++) {
> + struct location *l = &t.loc[i];
> +
> + seq_printf(seq, "%7ld ", l->count);
> +
> + if (l->addr)
> + seq_printf(seq, "%pS", (void *)l->addr);
> + else
> + seq_puts(seq, "<not-available>");
> +
> + if (l->sum_time != l->min_time) {
> + seq_printf(seq, " age=%ld/%ld/%ld",
> + l->min_time,
> + (long)div_u64(l->sum_time, l->count),
> + l->max_time);
> + } else
> + seq_printf(seq, " age=%ld",
> + l->min_time);
> +
> + if (l->min_pid != l->max_pid)
> + seq_printf(seq, " pid=%ld-%ld",
> + l->min_pid, l->max_pid);
> + else
> + seq_printf(seq, " pid=%ld",
> + l->min_pid);
> +
> + if (num_online_cpus() > 1 &&
> + !cpumask_empty(to_cpumask(l->cpus)))
> + seq_printf(seq, " cpus=%*pbl",
> + cpumask_pr_args(to_cpumask(l->cpus)));
> +
> + if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
> + seq_printf(seq, " nodes=%*pbl",
> + nodemask_pr_args(&l->nodes));
> +
> + seq_puts(seq, "\n");
> + }
> +
> + free_loc_track(&t);
> + if (!t.count)
> + seq_puts(seq, "No data\n");
> + return 0;
> +}
> +
> +static int slab_debug_trace(struct seq_file *seq, void *ignored)
> +{
> + struct kmem_cache *s = seq->private;
> +
> + if (!(s->flags & SLAB_STORE_USER))
> + return 0;
> +
> + if (strcmp(seq->file->f_path.dentry->d_name.name, "alloc_trace") == 0)
> + return list_locations(seq, s, TRACK_ALLOC);
> + else
> + return list_locations(seq, s, TRACK_FREE);
> +
> + return 0;
> +}
> +
> +static int slab_debug_trace_open(struct inode *inode, struct file *filp)
> +{
> + return single_open(filp, slab_debug_trace,
> + file_inode(filp)->i_private);
> +}
> +
> +static const struct file_operations slab_debug_fops = {
> + .open = slab_debug_trace_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = single_release,
> +};
> +
> +static void debugfs_slab_add(struct kmem_cache *s)
> +{
> + if (unlikely(!slab_debugfs_root))
> + return;
> +
> + slab_debugfs_cache = debugfs_create_dir(s->name, slab_debugfs_root);
> + if (!IS_ERR(slab_debugfs_cache)) {
> + debugfs_create_file("alloc_trace", 0400,
> + slab_debugfs_cache, s, &slab_debug_fops);
> +
> + debugfs_create_file("free_trace", 0400,
> + slab_debugfs_cache, s, &slab_debug_fops);
> + }
> +}
> +
> +static void __init slab_debugfs_init(void)
> +{
> + struct kmem_cache *s;
> +
> + slab_debugfs_root = debugfs_create_dir("slab", NULL);
> + if (!IS_ERR(slab_debugfs_root))
> + list_for_each_entry(s, &slab_caches, list)
> + debugfs_slab_add(s);
> + else
> + pr_err("Cannot create slab debugfs.\n");
> +
> +}
> +__initcall(slab_debugfs_init);
> +#endif
> /*
> * The /proc/slabinfo ABI
> */
>

2021-04-07 20:50:17

by Vlastimil Babka

[permalink] [raw]

Subject: Re: [PATCH v3] mm: slub: move sysfs slab alloc/free interfaces to debugfs

On 4/6/21 7:15 PM, Vlastimil Babka wrote:
> On 4/6/21 2:27 PM, Faiyaz Mohammed wrote:
>> alloc_calls and free_calls implementation in sysfs have two issues,
>> one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
>> to "one value per file" rule.
>>
>> To overcome this issues, move the alloc_calls and free_calls implemeation
>> to debugfs.
>>
>> Signed-off-by: Faiyaz Mohammed <[email protected]>
>
> Good direction, thanks. But I'm afraid we need a bit more:
>
> - I don't see debugfs_remove() (or _recursive) used anywhere. When a cache is
> destroyed, do the dirs/files just linger in debugfs referencing removed
> kmem_cache objects?
> - There's a simple debugfs_create_dir(s->name, ...), for each cache while the
> sysfs variant handles merged caches with symlinks etc. For consistency, the
> hiearchy should look the same in debugfs as it does in sysfs.

Oh and one more suggestion. With full seq_file API (unlike sysfs) we should
really do the data gathering (to struct loc_track?) part just once per file
open, and cache it between individual reads.