2021-05-25 07:41:28

by Faiyaz Mohammed

[permalink] [raw]
Subject: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs

alloc_calls and free_calls implementation in sysfs have two issues,
one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
to "one value per file" rule.

To overcome this issues, move the alloc_calls and free_calls implemeation
to debugfs.

Rename the alloc_calls/free_calls to alloc_traces/free_traces,
to be inline with what it does.

Reported-by: kernel test robot <[email protected]>
Reported-by: Dan Carpenter <[email protected]>
Signed-off-by: Faiyaz Mohammed <[email protected]>
---
changes in V7:
- Drop the older alloc_calls and free_calls interface.
changes in v6:
- https://lore.kernel.org/linux-mm/[email protected]/

changes in v5:
- https://lore.kernel.org/linux-mm/[email protected]/

changes in v4:
- https://lore.kernel.org/linux-mm/[email protected]/

changes in v3:
- https://lore.kernel.org/linux-mm/[email protected]/

changes in v2:
- https://lore.kernel.org/linux-mm/[email protected]/

changes in v1:
- https://lore.kernel.org/linux-mm/[email protected]/

include/linux/slub_def.h | 8 ++
mm/slab_common.c | 9 ++
mm/slub.c | 353 ++++++++++++++++++++++++++++++++++-------------
3 files changed, 276 insertions(+), 94 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index dcde82a..b413ebe 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -159,6 +159,14 @@ static inline void sysfs_slab_release(struct kmem_cache *s)
}
#endif

+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
+#define SLAB_SUPPORTS_DEBUGFS
+void debugfs_slab_release(struct kmem_cache *);
+#else
+static inline void debugfs_slab_release(struct kmem_cache *s)
+{
+}
+#endif
void object_err(struct kmem_cache *s, struct page *page,
u8 *object, char *reason);

diff --git a/mm/slab_common.c b/mm/slab_common.c
index a4a5714..873dd79 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -455,6 +455,9 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
#else
slab_kmem_cache_release(s);
#endif
+#ifdef SLAB_SUPPORTS_DEBUGFS
+ debugfs_slab_release(s);
+#endif
}
}

@@ -472,6 +475,9 @@ static int shutdown_cache(struct kmem_cache *s)
#ifdef SLAB_SUPPORTS_SYSFS
sysfs_slab_unlink(s);
#endif
+#ifdef SLAB_SUPPORTS_DEBUGFS
+ debugfs_slab_release(s);
+#endif
list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
schedule_work(&slab_caches_to_rcu_destroy_work);
} else {
@@ -482,6 +488,9 @@ static int shutdown_cache(struct kmem_cache *s)
#else
slab_kmem_cache_release(s);
#endif
+#ifdef SLAB_SUPPORTS_DEBUGFS
+ debugfs_slab_release(s);
+#endif
}

return 0;
diff --git a/mm/slub.c b/mm/slub.c
index 3f96e09..e52df43 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -36,6 +36,7 @@
#include <linux/memcontrol.h>
#include <linux/random.h>

+#include <linux/debugfs.h>
#include <trace/events/kmem.h>

#include "internal.h"
@@ -225,6 +226,15 @@ static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
{ return 0; }
#endif

+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
+static void debugfs_slab_add(struct kmem_cache *);
+static int debugfs_slab_alias(struct kmem_cache *, const char *);
+#else
+static inline void debugfs_slab_add(struct kmem_cache *s) { }
+static inline int debugfs_slab_alias(struct kmem_cache *s, const char *p)
+ { return 0; }
+#endif
+
static inline void stat(const struct kmem_cache *s, enum stat_item si)
{
#ifdef CONFIG_SLUB_STATS
@@ -4525,6 +4535,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
s->refcount--;
s = NULL;
}
+
+ debugfs_slab_alias(s, name);
}

return s;
@@ -4546,6 +4558,8 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
if (err)
__kmem_cache_release(s);

+ debugfs_slab_add(s);
+
return err;
}

@@ -4686,6 +4700,8 @@ static long validate_slab_cache(struct kmem_cache *s)

return count;
}
+
+#ifdef CONFIG_DEBUG_FS
/*
* Generate lists of code addresses where slabcache objects are allocated
* and freed.
@@ -4709,6 +4725,9 @@ struct loc_track {
struct location *loc;
};

+static struct dentry *slab_debugfs_root;
+struct loc_track t = { 0, 0, NULL };
+
static void free_loc_track(struct loc_track *t)
{
if (t->max)
@@ -4825,82 +4844,7 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s,
add_location(t, s, get_track(s, p, alloc));
put_map(map);
}
-
-static int list_locations(struct kmem_cache *s, char *buf,
- enum track_item alloc)
-{
- int len = 0;
- unsigned long i;
- struct loc_track t = { 0, 0, NULL };
- int node;
- struct kmem_cache_node *n;
-
- if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
- GFP_KERNEL)) {
- return sysfs_emit(buf, "Out of memory\n");
- }
- /* Push back cpu slabs */
- flush_all(s);
-
- for_each_kmem_cache_node(s, node, n) {
- unsigned long flags;
- struct page *page;
-
- if (!atomic_long_read(&n->nr_slabs))
- continue;
-
- spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, slab_list)
- process_slab(&t, s, page, alloc);
- list_for_each_entry(page, &n->full, slab_list)
- process_slab(&t, s, page, alloc);
- spin_unlock_irqrestore(&n->list_lock, flags);
- }
-
- for (i = 0; i < t.count; i++) {
- struct location *l = &t.loc[i];
-
- len += sysfs_emit_at(buf, len, "%7ld ", l->count);
-
- if (l->addr)
- len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr);
- else
- len += sysfs_emit_at(buf, len, "<not-available>");
-
- if (l->sum_time != l->min_time)
- len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld",
- l->min_time,
- (long)div_u64(l->sum_time,
- l->count),
- l->max_time);
- else
- len += sysfs_emit_at(buf, len, " age=%ld", l->min_time);
-
- if (l->min_pid != l->max_pid)
- len += sysfs_emit_at(buf, len, " pid=%ld-%ld",
- l->min_pid, l->max_pid);
- else
- len += sysfs_emit_at(buf, len, " pid=%ld",
- l->min_pid);
-
- if (num_online_cpus() > 1 &&
- !cpumask_empty(to_cpumask(l->cpus)))
- len += sysfs_emit_at(buf, len, " cpus=%*pbl",
- cpumask_pr_args(to_cpumask(l->cpus)));
-
- if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
- len += sysfs_emit_at(buf, len, " nodes=%*pbl",
- nodemask_pr_args(&l->nodes));
-
- len += sysfs_emit_at(buf, len, "\n");
- }
-
- free_loc_track(&t);
- if (!t.count)
- len += sysfs_emit_at(buf, len, "No data\n");
-
- return len;
-}
+#endif /* CONFIG_DEBUG_FS */
#endif /* CONFIG_SLUB_DEBUG */

#ifdef SLUB_RESILIENCY_TEST
@@ -5349,22 +5293,6 @@ static ssize_t validate_store(struct kmem_cache *s,
return ret;
}
SLAB_ATTR(validate);
-
-static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
-{
- if (!(s->flags & SLAB_STORE_USER))
- return -ENOSYS;
- return list_locations(s, buf, TRACK_ALLOC);
-}
-SLAB_ATTR_RO(alloc_calls);
-
-static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
-{
- if (!(s->flags & SLAB_STORE_USER))
- return -ENOSYS;
- return list_locations(s, buf, TRACK_FREE);
-}
-SLAB_ATTR_RO(free_calls);
#endif /* CONFIG_SLUB_DEBUG */

#ifdef CONFIG_FAILSLAB
@@ -5528,8 +5456,6 @@ static struct attribute *slab_attrs[] = {
&poison_attr.attr,
&store_user_attr.attr,
&validate_attr.attr,
- &alloc_calls_attr.attr,
- &free_calls_attr.attr,
#endif
#ifdef CONFIG_ZONE_DMA
&cache_dma_attr.attr,
@@ -5818,6 +5744,245 @@ static int __init slab_sysfs_init(void)
__initcall(slab_sysfs_init);
#endif /* CONFIG_SYSFS */

+#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
+static int debugfs_slab_alias(struct kmem_cache *s, const char *name)
+{
+ struct saved_alias *al;
+
+ if (slab_state == FULL) {
+ /*
+ * If we have a leftover link then remove it.
+ */
+ debugfs_remove(debugfs_lookup(s->name,
+ slab_debugfs_root));
+ debugfs_create_symlink(name, slab_debugfs_root, NULL);
+ return 0;
+ }
+
+ al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
+
+ al->s = s;
+ al->name = name;
+ al->next = alias_list;
+ alias_list = al;
+ return 0;
+}
+
+static int slab_debugfs_show(struct seq_file *seq, void *v)
+{
+ struct location *l;
+ unsigned int idx = *(unsigned int *)v;
+
+ if (idx < t.count) {
+ l = &t.loc[idx];
+
+ seq_printf(seq, "%7ld ", l->count);
+
+ if (l->addr)
+ seq_printf(seq, "%pS", (void *)l->addr);
+ else
+ seq_puts(seq, "<not-available>");
+
+ if (l->sum_time != l->min_time) {
+ seq_printf(seq, " age=%ld/%ld/%ld",
+ l->min_time,
+ (long)div_u64(l->sum_time, l->count),
+ l->max_time);
+ } else
+ seq_printf(seq, " age=%ld",
+ l->min_time);
+
+ if (l->min_pid != l->max_pid)
+ seq_printf(seq, " pid=%ld-%ld",
+ l->min_pid, l->max_pid);
+ else
+ seq_printf(seq, " pid=%ld",
+ l->min_pid);
+
+ if (num_online_cpus() > 1 &&
+ !cpumask_empty(to_cpumask(l->cpus)))
+ seq_printf(seq, " cpus=%*pbl",
+ cpumask_pr_args(to_cpumask(l->cpus)));
+
+ if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
+ seq_printf(seq, " nodes=%*pbl",
+ nodemask_pr_args(&l->nodes));
+
+ seq_puts(seq, "\n");
+ }
+
+ if (t.count == 0)
+ seq_puts(seq, "No data\n");
+
+ return 0;
+}
+
+static void slab_debugfs_stop(struct seq_file *seq, void *v)
+{
+ if (!v && t.max) {
+ free_loc_track(&t);
+ t.max = 0;
+ }
+}
+
+static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos)
+{
+ loff_t *spos = v;
+
+ if (*ppos < t.count) {
+ *spos = *spos + 1;
+ *ppos = *spos;
+ return spos;
+ }
+
+ return NULL;
+}
+
+static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
+{
+ struct kmem_cache_node *n;
+ struct kmem_cache *s;
+ enum track_item alloc;
+ int node;
+ loff_t *spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
+
+ s = seq->file->f_inode->i_private;
+
+ if (!spos)
+ return NULL;
+
+ if (!(s->flags & SLAB_STORE_USER)) {
+ kfree(spos);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (*ppos == 0) {
+
+ t.count = 0;
+ t.max = 0;
+ t.loc = NULL;
+ if (strcmp(seq->file->f_path.dentry->d_name.name, "alloc_traces") == 0)
+ alloc = TRACK_ALLOC;
+ else
+ alloc = TRACK_FREE;
+
+ if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
+ GFP_KERNEL)) {
+ seq_puts(seq, "Out of memory\n");
+ kfree(spos);
+ return ERR_PTR(-ENOMEM);
+ }
+ /* Push back cpu slabs */
+ flush_all(s);
+
+ for_each_kmem_cache_node(s, node, n) {
+ unsigned long flags;
+ struct page *page;
+
+ if (!atomic_long_read(&n->nr_slabs))
+ continue;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, slab_list)
+ process_slab(&t, s, page, alloc);
+ list_for_each_entry(page, &n->full, slab_list)
+ process_slab(&t, s, page, alloc);
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ }
+ }
+
+ if (*ppos < t.count) {
+ *spos = *ppos;
+ return spos;
+ }
+
+ kfree(spos);
+ return NULL;
+}
+
+static const struct seq_operations slab_debugfs_sops = {
+ .start = slab_debugfs_start,
+ .next = slab_debugfs_next,
+ .stop = slab_debugfs_stop,
+ .show = slab_debugfs_show
+};
+DEFINE_SEQ_ATTRIBUTE(slab_debugfs);
+
+static void debugfs_slab_add(struct kmem_cache *s)
+{
+ const char *name;
+ struct dentry *slab_cache_dir;
+ int unmergeable = slab_unmergeable(s);
+
+ if (unlikely(!slab_debugfs_root))
+ return;
+
+ if (!unmergeable && disable_higher_order_debug &&
+ (slub_debug & DEBUG_METADATA_FLAGS))
+ unmergeable = 1;
+
+ if (unmergeable) {
+ /*
+ * Slabcache can never be merged so we can use the name proper.
+ * This is typically the case for debug situations. In that
+ * case we can catch duplicate names easily.
+ */
+ debugfs_remove_recursive(debugfs_lookup(s->name,
+ slab_debugfs_root));
+ name = s->name;
+ } else {
+ /*
+ * Create a unique name for the slab as a target
+ * for the symlinks.
+ */
+ name = create_unique_id(s);
+ }
+
+ slab_cache_dir = debugfs_create_dir(name, slab_debugfs_root);
+
+ debugfs_create_file("alloc_traces", 0400,
+ slab_cache_dir, s, &slab_debugfs_fops);
+
+ debugfs_create_file("free_traces", 0400,
+ slab_cache_dir, s, &slab_debugfs_fops);
+
+ if (!unmergeable)
+ /* Setup first alias */
+ debugfs_slab_alias(s, s->name);
+}
+
+void debugfs_slab_release(struct kmem_cache *s)
+{
+ debugfs_remove_recursive(debugfs_lookup(s->name,
+ slab_debugfs_root));
+}
+
+static int __init slab_debugfs_init(void)
+{
+ struct kmem_cache *s;
+
+ slab_debugfs_root = debugfs_create_dir("slab", NULL);
+
+ slab_state = FULL;
+
+ list_for_each_entry(s, &slab_caches, list)
+ debugfs_slab_add(s);
+
+ while (alias_list) {
+ struct saved_alias *al = alias_list;
+
+ alias_list = alias_list->next;
+
+ debugfs_slab_alias(al->s, al->name);
+
+ kfree(al);
+ }
+
+ return 0;
+
+}
+__initcall(slab_debugfs_init);
+#endif
/*
* The /proc/slabinfo ABI
*/
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
member of the Code Aurora Forum, hosted by The Linux Foundation


2021-05-25 08:35:14

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs

On Tue, May 25, 2021 at 01:08:05PM +0530, Faiyaz Mohammed wrote:
> alloc_calls and free_calls implementation in sysfs have two issues,
> one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
> to "one value per file" rule.
>
> To overcome this issues, move the alloc_calls and free_calls implemeation
> to debugfs.
>
> Rename the alloc_calls/free_calls to alloc_traces/free_traces,
> to be inline with what it does.
>
> Reported-by: kernel test robot <[email protected]>
> Reported-by: Dan Carpenter <[email protected]>
> Signed-off-by: Faiyaz Mohammed <[email protected]>
> ---
> changes in V7:
> - Drop the older alloc_calls and free_calls interface.
> changes in v6:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> changes in v5:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> changes in v4:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> changes in v3:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> changes in v2:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> changes in v1:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> include/linux/slub_def.h | 8 ++
> mm/slab_common.c | 9 ++
> mm/slub.c | 353 ++++++++++++++++++++++++++++++++++-------------
> 3 files changed, 276 insertions(+), 94 deletions(-)
>
> diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
> index dcde82a..b413ebe 100644
> --- a/include/linux/slub_def.h
> +++ b/include/linux/slub_def.h
> @@ -159,6 +159,14 @@ static inline void sysfs_slab_release(struct kmem_cache *s)
> }
> #endif
>
> +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
> +#define SLAB_SUPPORTS_DEBUGFS
> +void debugfs_slab_release(struct kmem_cache *);
> +#else
> +static inline void debugfs_slab_release(struct kmem_cache *s)
> +{
> +}
> +#endif
> void object_err(struct kmem_cache *s, struct page *page,
> u8 *object, char *reason);
>
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index a4a5714..873dd79 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -455,6 +455,9 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
> #else
> slab_kmem_cache_release(s);
> #endif
> +#ifdef SLAB_SUPPORTS_DEBUGFS
> + debugfs_slab_release(s);
> +#endif

Why do you need these #ifdef if your slub_dev.h file already provides an
"empty" function for this?

> }
> }
>
> @@ -472,6 +475,9 @@ static int shutdown_cache(struct kmem_cache *s)
> #ifdef SLAB_SUPPORTS_SYSFS
> sysfs_slab_unlink(s);
> #endif
> +#ifdef SLAB_SUPPORTS_DEBUGFS
> + debugfs_slab_release(s);
> +#endif

Same here.

> list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
> schedule_work(&slab_caches_to_rcu_destroy_work);
> } else {
> @@ -482,6 +488,9 @@ static int shutdown_cache(struct kmem_cache *s)
> #else
> slab_kmem_cache_release(s);
> #endif
> +#ifdef SLAB_SUPPORTS_DEBUGFS
> + debugfs_slab_release(s);
> +#endif

And here.

What is wrong with your .h file that keeps the need for #ifdef in the .c
file?

I thought I've asked about this a number of times in the past, what am I
missing?

thanks,

greg k-h

2021-05-25 09:06:12

by Faiyaz Mohammed

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs



On 5/25/2021 1:23 PM, Greg KH wrote:
> On Tue, May 25, 2021 at 01:08:05PM +0530, Faiyaz Mohammed wrote:
>> alloc_calls and free_calls implementation in sysfs have two issues,
>> one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
>> to "one value per file" rule.
>>
>> To overcome this issues, move the alloc_calls and free_calls implemeation
>> to debugfs.
>>
>> Rename the alloc_calls/free_calls to alloc_traces/free_traces,
>> to be inline with what it does.
>>
>> Reported-by: kernel test robot <[email protected]>
>> Reported-by: Dan Carpenter <[email protected]>
>> Signed-off-by: Faiyaz Mohammed <[email protected]>
>> ---
>> changes in V7:
>> - Drop the older alloc_calls and free_calls interface.
>> changes in v6:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> changes in v5:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> changes in v4:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> changes in v3:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> changes in v2:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> changes in v1:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> include/linux/slub_def.h | 8 ++
>> mm/slab_common.c | 9 ++
>> mm/slub.c | 353 ++++++++++++++++++++++++++++++++++-------------
>> 3 files changed, 276 insertions(+), 94 deletions(-)
>>
>> diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
>> index dcde82a..b413ebe 100644
>> --- a/include/linux/slub_def.h
>> +++ b/include/linux/slub_def.h
>> @@ -159,6 +159,14 @@ static inline void sysfs_slab_release(struct kmem_cache *s)
>> }
>> #endif
>>
>> +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
>> +#define SLAB_SUPPORTS_DEBUGFS
>> +void debugfs_slab_release(struct kmem_cache *);
>> +#else
>> +static inline void debugfs_slab_release(struct kmem_cache *s)
>> +{
>> +}
>> +#endif
>> void object_err(struct kmem_cache *s, struct page *page,
>> u8 *object, char *reason);
>>
>> diff --git a/mm/slab_common.c b/mm/slab_common.c
>> index a4a5714..873dd79 100644
>> --- a/mm/slab_common.c
>> +++ b/mm/slab_common.c
>> @@ -455,6 +455,9 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
>> #else
>> slab_kmem_cache_release(s);
>> #endif
>> +#ifdef SLAB_SUPPORTS_DEBUGFS
>> + debugfs_slab_release(s);
>> +#endif
>
> Why do you need these #ifdef if your slub_dev.h file already provides an
> "empty" function for this?
>
We are not including slub_def.h directly. mm/slab.h includes the
slub_def.h if CONFIG_SLUB enable,

from mm/slab.h
#ifdef CONFIG_SLAB
#include <linux/slab_def.h>
#endif

#ifdef CONFIG_SLUB
#include <linux/slub_def.h>
#endif

so if CONFIG_SLAB is enable then mm/slab.h includes slab_def.h, to avoid
undefined reference error added SLAB_SUPPORTS_DEBUGFS like
SLAB_SUPPORTS_SYSFS.
>> }
>> }
>>
>> @@ -472,6 +475,9 @@ static int shutdown_cache(struct kmem_cache *s)
>> #ifdef SLAB_SUPPORTS_SYSFS
>> sysfs_slab_unlink(s);
>> #endif
>> +#ifdef SLAB_SUPPORTS_DEBUGFS
>> + debugfs_slab_release(s);
>> +#endif
>
> Same here.
>
>> list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
>> schedule_work(&slab_caches_to_rcu_destroy_work);
>> } else {
>> @@ -482,6 +488,9 @@ static int shutdown_cache(struct kmem_cache *s)
>> #else
>> slab_kmem_cache_release(s);
>> #endif
>> +#ifdef SLAB_SUPPORTS_DEBUGFS
>> + debugfs_slab_release(s);
>> +#endif
>
> And here.
>
> What is wrong with your .h file that keeps the need for #ifdef in the .c
> file?
>
> I thought I've asked about this a number of times in the past, what am I
> missing?
>
> thanks,
>
> greg k-h
>

2021-05-25 11:56:22

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs

On Tue, May 25, 2021 at 02:27:15PM +0530, Faiyaz Mohammed wrote:
>
>
> On 5/25/2021 1:23 PM, Greg KH wrote:
> > On Tue, May 25, 2021 at 01:08:05PM +0530, Faiyaz Mohammed wrote:
> >> alloc_calls and free_calls implementation in sysfs have two issues,
> >> one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
> >> to "one value per file" rule.
> >>
> >> To overcome this issues, move the alloc_calls and free_calls implemeation
> >> to debugfs.
> >>
> >> Rename the alloc_calls/free_calls to alloc_traces/free_traces,
> >> to be inline with what it does.
> >>
> >> Reported-by: kernel test robot <[email protected]>
> >> Reported-by: Dan Carpenter <[email protected]>
> >> Signed-off-by: Faiyaz Mohammed <[email protected]>
> >> ---
> >> changes in V7:
> >> - Drop the older alloc_calls and free_calls interface.
> >> changes in v6:
> >> - https://lore.kernel.org/linux-mm/[email protected]/
> >>
> >> changes in v5:
> >> - https://lore.kernel.org/linux-mm/[email protected]/
> >>
> >> changes in v4:
> >> - https://lore.kernel.org/linux-mm/[email protected]/
> >>
> >> changes in v3:
> >> - https://lore.kernel.org/linux-mm/[email protected]/
> >>
> >> changes in v2:
> >> - https://lore.kernel.org/linux-mm/[email protected]/
> >>
> >> changes in v1:
> >> - https://lore.kernel.org/linux-mm/[email protected]/
> >>
> >> include/linux/slub_def.h | 8 ++
> >> mm/slab_common.c | 9 ++
> >> mm/slub.c | 353 ++++++++++++++++++++++++++++++++++-------------
> >> 3 files changed, 276 insertions(+), 94 deletions(-)
> >>
> >> diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
> >> index dcde82a..b413ebe 100644
> >> --- a/include/linux/slub_def.h
> >> +++ b/include/linux/slub_def.h
> >> @@ -159,6 +159,14 @@ static inline void sysfs_slab_release(struct kmem_cache *s)
> >> }
> >> #endif
> >>
> >> +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
> >> +#define SLAB_SUPPORTS_DEBUGFS
> >> +void debugfs_slab_release(struct kmem_cache *);
> >> +#else
> >> +static inline void debugfs_slab_release(struct kmem_cache *s)
> >> +{
> >> +}
> >> +#endif
> >> void object_err(struct kmem_cache *s, struct page *page,
> >> u8 *object, char *reason);
> >>
> >> diff --git a/mm/slab_common.c b/mm/slab_common.c
> >> index a4a5714..873dd79 100644
> >> --- a/mm/slab_common.c
> >> +++ b/mm/slab_common.c
> >> @@ -455,6 +455,9 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
> >> #else
> >> slab_kmem_cache_release(s);
> >> #endif
> >> +#ifdef SLAB_SUPPORTS_DEBUGFS
> >> + debugfs_slab_release(s);
> >> +#endif
> >
> > Why do you need these #ifdef if your slub_dev.h file already provides an
> > "empty" function for this?
> >
> We are not including slub_def.h directly. mm/slab.h includes the
> slub_def.h if CONFIG_SLUB enable,
>
> from mm/slab.h
> #ifdef CONFIG_SLAB
> #include <linux/slab_def.h>
> #endif
>
> #ifdef CONFIG_SLUB
> #include <linux/slub_def.h>
> #endif
>
> so if CONFIG_SLAB is enable then mm/slab.h includes slab_def.h, to avoid
> undefined reference error added SLAB_SUPPORTS_DEBUGFS like
> SLAB_SUPPORTS_SYSFS.

Ick, ok, messy code, I'll stop complaining now if this really is the
only way to do it (still feels wrong to me...)

greg k-h

2021-05-26 12:47:44

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs

On 5/25/21 1:54 PM, Greg KH wrote:
> On Tue, May 25, 2021 at 02:27:15PM +0530, Faiyaz Mohammed wrote:
> >> --- a/mm/slab_common.c
>> >> +++ b/mm/slab_common.c
>> >> @@ -455,6 +455,9 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
>> >> #else
>> >> slab_kmem_cache_release(s);
>> >> #endif
>> >> +#ifdef SLAB_SUPPORTS_DEBUGFS
>> >> + debugfs_slab_release(s);
>> >> +#endif
>> >
>> > Why do you need these #ifdef if your slub_dev.h file already provides an
>> > "empty" function for this?
>> >
>> We are not including slub_def.h directly. mm/slab.h includes the
>> slub_def.h if CONFIG_SLUB enable,
>>
>> from mm/slab.h
>> #ifdef CONFIG_SLAB
>> #include <linux/slab_def.h>
>> #endif
>>
>> #ifdef CONFIG_SLUB
>> #include <linux/slub_def.h>
>> #endif
>>
>> so if CONFIG_SLAB is enable then mm/slab.h includes slab_def.h, to avoid
>> undefined reference error added SLAB_SUPPORTS_DEBUGFS like
>> SLAB_SUPPORTS_SYSFS.
>
> Ick, ok, messy code, I'll stop complaining now if this really is the
> only way to do it (still feels wrong to me...)

How about simply replicating the empty function in
include/linux/slab_def.h

We could do the same with SYSFS, except the SLAB (and SLUB w/o SYSFS) versions
of sysfs_slab_release() would not be empty, but just call
slab_kmem_cache_release(s);
Then we could get rid of the #ifdef's completely?

> greg k-h
>

2021-05-26 12:54:42

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs

On 5/25/21 9:38 AM, Faiyaz Mohammed wrote:
> alloc_calls and free_calls implementation in sysfs have two issues,
> one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
> to "one value per file" rule.
>
> To overcome this issues, move the alloc_calls and free_calls implemeation
> to debugfs.
>
> Rename the alloc_calls/free_calls to alloc_traces/free_traces,
> to be inline with what it does.
>
> Reported-by: kernel test robot <[email protected]>
> Reported-by: Dan Carpenter <[email protected]>

These were IIRC bot reports for some bugs in the previous versions, so keeping
the Reported-by: for the whole patch is misleading - these were not reports for
the sysfs issues this patch fixes by moving the files to debugfs.

> Signed-off-by: Faiyaz Mohammed <[email protected]>
> ---
> changes in V7:
> - Drop the older alloc_calls and free_calls interface.
> changes in v6:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> changes in v5:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> changes in v4:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> changes in v3:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> changes in v2:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> changes in v1:
> - https://lore.kernel.org/linux-mm/[email protected]/
>
> include/linux/slub_def.h | 8 ++
> mm/slab_common.c | 9 ++
> mm/slub.c | 353 ++++++++++++++++++++++++++++++++++-------------
> 3 files changed, 276 insertions(+), 94 deletions(-)

I don't see any of the symlinks under /sys/kernel/debug/slab/, so I think the
aliases handling code is wrong, and I can see at least two reasons why it could be:

> @@ -4525,6 +4535,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
> s->refcount--;
> s = NULL;
> }
> +
> + debugfs_slab_alias(s, name);

Here you might be calling debugfs_slab_alias() with NULL if the
sysfs_slab_alias() above returned true.

> }
>
> return s;

...

> +static int __init slab_debugfs_init(void)
> +{
> + struct kmem_cache *s;
> +
> + slab_debugfs_root = debugfs_create_dir("slab", NULL);
> +
> + slab_state = FULL;
> +
> + list_for_each_entry(s, &slab_caches, list)
> + debugfs_slab_add(s);
> +
> + while (alias_list) {
> + struct saved_alias *al = alias_list;

alias_list a single list and both slab_sysfs_init() and slab_debugfs_init()
flush it. So only the init call that happens to be called first, does actually
find an unflushed list. I think you
need to use a separate list for debugfs (simpler) or a shared list with both
sysfs and debugfs processing (probably more complicated).

And finally a question, perhaps also for Greg. With sysfs, we hand out the
lifecycle of struct kmem_cache to sysfs, to ensure we are not reading sysfs
files of a cache that has been removed.

But with debugfs, what are the guarantees that things won't blow up when a
debugfs file is being read while somebody calls kmem_cache_destroy() on the cache?

> +
> + alias_list = alias_list->next;
> +
> + debugfs_slab_alias(al->s, al->name);
> +
> + kfree(al);
> + }
> +
> + return 0;
> +
> +}
> +__initcall(slab_debugfs_init);
> +#endif
> /*
> * The /proc/slabinfo ABI
> */
>

2021-05-26 13:08:44

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs

On 5/26/21 1:48 PM, Greg KH wrote:
> On Wed, May 26, 2021 at 01:38:55PM +0200, Vlastimil Babka wrote:
>>
>> alias_list a single list and both slab_sysfs_init() and slab_debugfs_init()
>> flush it. So only the init call that happens to be called first, does actually
>> find an unflushed list. I think you
>> need to use a separate list for debugfs (simpler) or a shared list with both
>> sysfs and debugfs processing (probably more complicated).
>>
>> And finally a question, perhaps also for Greg. With sysfs, we hand out the
>> lifecycle of struct kmem_cache to sysfs, to ensure we are not reading sysfs
>> files of a cache that has been removed.
>>
>> But with debugfs, what are the guarantees that things won't blow up when a
>> debugfs file is being read while somebody calls kmem_cache_destroy() on the cache?
>
> It's much harder, but usually the default debugfs_file_create() will
> handle this for you. See the debugfs_file_create_unsafe() for the
> "other" variant where you know you can tear things down "safely".

Right, so IIUC debugfs will guarantee that while somebody reads the files, the
debugfs cleanup will block, as debugfs_file_get() comment explains.

In that case I think we have the cleanup order wrong in this patch:

shutdown_cache() should first do debugfs_slab_release() (which would block) and
only then proceed with slab_kmem_cache_release() which destroys the fundamental
structures such as kmem_cache_node, which are also accessed by the debugfs file
handlers.

> That being said, yes there are still issues in this area, be careful
> about what tools you expect to be constantly hitting debugfs files.

FWIW, the files are accessible only to root.

> thanks,
>
> greg k-h
>

2021-05-26 17:23:10

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs

On Wed, May 26, 2021 at 01:38:55PM +0200, Vlastimil Babka wrote:
> On 5/25/21 9:38 AM, Faiyaz Mohammed wrote:
> > alloc_calls and free_calls implementation in sysfs have two issues,
> > one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
> > to "one value per file" rule.
> >
> > To overcome this issues, move the alloc_calls and free_calls implemeation
> > to debugfs.
> >
> > Rename the alloc_calls/free_calls to alloc_traces/free_traces,
> > to be inline with what it does.
> >
> > Reported-by: kernel test robot <[email protected]>
> > Reported-by: Dan Carpenter <[email protected]>
>
> These were IIRC bot reports for some bugs in the previous versions, so keeping
> the Reported-by: for the whole patch is misleading - these were not reports for
> the sysfs issues this patch fixes by moving the files to debugfs.
>
> > Signed-off-by: Faiyaz Mohammed <[email protected]>
> > ---
> > changes in V7:
> > - Drop the older alloc_calls and free_calls interface.
> > changes in v6:
> > - https://lore.kernel.org/linux-mm/[email protected]/
> >
> > changes in v5:
> > - https://lore.kernel.org/linux-mm/[email protected]/
> >
> > changes in v4:
> > - https://lore.kernel.org/linux-mm/[email protected]/
> >
> > changes in v3:
> > - https://lore.kernel.org/linux-mm/[email protected]/
> >
> > changes in v2:
> > - https://lore.kernel.org/linux-mm/[email protected]/
> >
> > changes in v1:
> > - https://lore.kernel.org/linux-mm/[email protected]/
> >
> > include/linux/slub_def.h | 8 ++
> > mm/slab_common.c | 9 ++
> > mm/slub.c | 353 ++++++++++++++++++++++++++++++++++-------------
> > 3 files changed, 276 insertions(+), 94 deletions(-)
>
> I don't see any of the symlinks under /sys/kernel/debug/slab/, so I think the
> aliases handling code is wrong, and I can see at least two reasons why it could be:
>
> > @@ -4525,6 +4535,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
> > s->refcount--;
> > s = NULL;
> > }
> > +
> > + debugfs_slab_alias(s, name);
>
> Here you might be calling debugfs_slab_alias() with NULL if the
> sysfs_slab_alias() above returned true.
>
> > }
> >
> > return s;
>
> ...
>
> > +static int __init slab_debugfs_init(void)
> > +{
> > + struct kmem_cache *s;
> > +
> > + slab_debugfs_root = debugfs_create_dir("slab", NULL);
> > +
> > + slab_state = FULL;
> > +
> > + list_for_each_entry(s, &slab_caches, list)
> > + debugfs_slab_add(s);
> > +
> > + while (alias_list) {
> > + struct saved_alias *al = alias_list;
>
> alias_list a single list and both slab_sysfs_init() and slab_debugfs_init()
> flush it. So only the init call that happens to be called first, does actually
> find an unflushed list. I think you
> need to use a separate list for debugfs (simpler) or a shared list with both
> sysfs and debugfs processing (probably more complicated).
>
> And finally a question, perhaps also for Greg. With sysfs, we hand out the
> lifecycle of struct kmem_cache to sysfs, to ensure we are not reading sysfs
> files of a cache that has been removed.
>
> But with debugfs, what are the guarantees that things won't blow up when a
> debugfs file is being read while somebody calls kmem_cache_destroy() on the cache?

It's much harder, but usually the default debugfs_file_create() will
handle this for you. See the debugfs_file_create_unsafe() for the
"other" variant where you know you can tear things down "safely".

That being said, yes there are still issues in this area, be careful
about what tools you expect to be constantly hitting debugfs files.

thanks,

greg k-h

2021-05-26 18:59:52

by Faiyaz Mohammed

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs



On 5/26/2021 4:33 PM, Vlastimil Babka wrote:
> On 5/25/21 1:54 PM, Greg KH wrote:
>> On Tue, May 25, 2021 at 02:27:15PM +0530, Faiyaz Mohammed wrote:
>>>> --- a/mm/slab_common.c
>>>>> +++ b/mm/slab_common.c
>>>>> @@ -455,6 +455,9 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
>>>>> #else
>>>>> slab_kmem_cache_release(s);
>>>>> #endif
>>>>> +#ifdef SLAB_SUPPORTS_DEBUGFS
>>>>> + debugfs_slab_release(s);
>>>>> +#endif
>>>>
>>>> Why do you need these #ifdef if your slub_dev.h file already provides an
>>>> "empty" function for this?
>>>>
>>> We are not including slub_def.h directly. mm/slab.h includes the
>>> slub_def.h if CONFIG_SLUB enable,
>>>
>>> from mm/slab.h
>>> #ifdef CONFIG_SLAB
>>> #include <linux/slab_def.h>
>>> #endif
>>>
>>> #ifdef CONFIG_SLUB
>>> #include <linux/slub_def.h>
>>> #endif
>>>
>>> so if CONFIG_SLAB is enable then mm/slab.h includes slab_def.h, to avoid
>>> undefined reference error added SLAB_SUPPORTS_DEBUGFS like
>>> SLAB_SUPPORTS_SYSFS.
>>
>> Ick, ok, messy code, I'll stop complaining now if this really is the
>> only way to do it (still feels wrong to me...)
>
> How about simply replicating the empty function in
> include/linux/slab_def.h
>
Yes, we can add empty function in include/linux/slab_def.h.
I will add in next patch version.

> We could do the same with SYSFS, except the SLAB (and SLUB w/o SYSFS) versions
> of sysfs_slab_release() would not be empty, but just call
> slab_kmem_cache_release(s);
> Then we could get rid of the #ifdef's completely?
>
Is it okay, if I raise separate patch for sysfs by adding empty function
in slab_def.h?

Thanks and regards,
Mohammed Faiyaz
>> greg k-h
>>
>

2021-05-26 22:25:51

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs

On 5/26/21 5:06 PM, Faiyaz Mohammed wrote:
>> How about simply replicating the empty function in
>> include/linux/slab_def.h
>>
> Yes, we can add empty function in include/linux/slab_def.h.
> I will add in next patch version.
>
>> We could do the same with SYSFS, except the SLAB (and SLUB w/o SYSFS) versions
>> of sysfs_slab_release() would not be empty, but just call
>> slab_kmem_cache_release(s);
>> Then we could get rid of the #ifdef's completely?
>>
> Is it okay, if I raise separate patch for sysfs by adding empty function
> in slab_def.h?

Yeah that would be cleaner. Thanks.

> Thanks and regards,
> Mohammed Faiyaz
>>> greg k-h
>>>
>>
>

2021-05-31 06:56:26

by Faiyaz Mohammed

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs



On 5/26/2021 5:08 PM, Vlastimil Babka wrote:
> On 5/25/21 9:38 AM, Faiyaz Mohammed wrote:
>> alloc_calls and free_calls implementation in sysfs have two issues,
>> one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
>> to "one value per file" rule.
>>
>> To overcome this issues, move the alloc_calls and free_calls implemeation
>> to debugfs.
>>
>> Rename the alloc_calls/free_calls to alloc_traces/free_traces,
>> to be inline with what it does.
>>
>> Reported-by: kernel test robot <[email protected]>
>> Reported-by: Dan Carpenter <[email protected]>
>
> These were IIRC bot reports for some bugs in the previous versions, so keeping
> the Reported-by: for the whole patch is misleading - these were not reports for
> the sysfs issues this patch fixes by moving the files to debugfs.
>
Yes, I will update in next patch version.
>> Signed-off-by: Faiyaz Mohammed <[email protected]>
>> ---
>> changes in V7:
>> - Drop the older alloc_calls and free_calls interface.
>> changes in v6:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> changes in v5:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> changes in v4:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> changes in v3:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> changes in v2:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> changes in v1:
>> - https://lore.kernel.org/linux-mm/[email protected]/
>>
>> include/linux/slub_def.h | 8 ++
>> mm/slab_common.c | 9 ++
>> mm/slub.c | 353 ++++++++++++++++++++++++++++++++++-------------
>> 3 files changed, 276 insertions(+), 94 deletions(-)
>
> I don't see any of the symlinks under /sys/kernel/debug/slab/, so I think the
> aliases handling code is wrong, and I can see at least two reasons why it could be:
>

I think I missed one thing, when CONFIG_SLUB_DEBUG_ON enable or
slub_debug is pass through command line __kmem_cache_alias() will return
null, so no symlinks will be created even if CONFIG_SLAB_MERGE_DEFAULT
is enable and to store user data we need to enable CONFIG_SLUB_DEBUG_ON
or pass slub_debug through command line.

>> @@ -4525,6 +4535,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
>> s->refcount--;
>> s = NULL;
>> }
>> +
>> + debugfs_slab_alias(s, name);
>
> Here you might be calling debugfs_slab_alias() with NULL if the
> sysfs_slab_alias() above returned true.
>
I think we can drop debugfs_slab_alias implementation.
>> }
>>
>> return s;
>
> ...
>
>> +static int __init slab_debugfs_init(void)
>> +{
>> + struct kmem_cache *s;
>> +
>> + slab_debugfs_root = debugfs_create_dir("slab", NULL);
>> +
>> + slab_state = FULL;
>> +
>> + list_for_each_entry(s, &slab_caches, list)
>> + debugfs_slab_add(s);
>> +
>> + while (alias_list) {
>> + struct saved_alias *al = alias_list;
>
> alias_list a single list and both slab_sysfs_init() and slab_debugfs_init()
> flush it. So only the init call that happens to be called first, does actually
> find an unflushed list. I think you
> need to use a separate list for debugfs (simpler) or a shared list with both
> sysfs and debugfs processing (probably more complicated).
>
same here, I think we can drop the debugfs alias change.

> And finally a question, perhaps also for Greg. With sysfs, we hand out the
> lifecycle of struct kmem_cache to sysfs, to ensure we are not reading sysfs
> files of a cache that has been removed.
>
> But with debugfs, what are the guarantees that things won't blow up when a
> debugfs file is being read while somebody calls kmem_cache_destroy() on the cache?
>
>> +
>> + alias_list = alias_list->next;
>> +
>> + debugfs_slab_alias(al->s, al->name);
>> +
>> + kfree(al);
>> + }
>> +
>> + return 0;
>> +
>> +}
>> +__initcall(slab_debugfs_init);
>> +#endif
>> /*
>> * The /proc/slabinfo ABI
>> */
>>
>

2021-05-31 07:13:05

by Faiyaz Mohammed

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs



On 5/26/2021 5:43 PM, Vlastimil Babka wrote:
> On 5/26/21 1:48 PM, Greg KH wrote:
>> On Wed, May 26, 2021 at 01:38:55PM +0200, Vlastimil Babka wrote:
>>>
>>> alias_list a single list and both slab_sysfs_init() and slab_debugfs_init()
>>> flush it. So only the init call that happens to be called first, does actually
>>> find an unflushed list. I think you
>>> need to use a separate list for debugfs (simpler) or a shared list with both
>>> sysfs and debugfs processing (probably more complicated).
>>>
>>> And finally a question, perhaps also for Greg. With sysfs, we hand out the
>>> lifecycle of struct kmem_cache to sysfs, to ensure we are not reading sysfs
>>> files of a cache that has been removed.
>>>
>>> But with debugfs, what are the guarantees that things won't blow up when a
>>> debugfs file is being read while somebody calls kmem_cache_destroy() on the cache?
>>
>> It's much harder, but usually the default debugfs_file_create() will
>> handle this for you. See the debugfs_file_create_unsafe() for the
>> "other" variant where you know you can tear things down "safely".
>
> Right, so IIUC debugfs will guarantee that while somebody reads the files, the
> debugfs cleanup will block, as debugfs_file_get() comment explains.
>
> In that case I think we have the cleanup order wrong in this patch:
>
> shutdown_cache() should first do debugfs_slab_release() (which would block) and
> only then proceed with slab_kmem_cache_release() which destroys the fundamental
> structures such as kmem_cache_node, which are also accessed by the debugfs file
> handlers.
>
If user is trying to read the data during shutdown_cache(), then I think
it's possible user will get empty data, to avoid that we can call
debugfs_slab_release() first and then do other stuff in shutdown_cache().

>> That being said, yes there are still issues in this area, be careful
>> about what tools you expect to be constantly hitting debugfs files.
>
> FWIW, the files are accessible only to root.
>
>> thanks,
>>
>> greg k-h
>>
>

2021-05-31 08:20:51

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs

On 5/31/21 9:11 AM, Faiyaz Mohammed wrote:
>
>
> On 5/26/2021 5:43 PM, Vlastimil Babka wrote:
>> On 5/26/21 1:48 PM, Greg KH wrote:
>>> On Wed, May 26, 2021 at 01:38:55PM +0200, Vlastimil Babka wrote:
>>>>
>>>> alias_list a single list and both slab_sysfs_init() and slab_debugfs_init()
>>>> flush it. So only the init call that happens to be called first, does actually
>>>> find an unflushed list. I think you
>>>> need to use a separate list for debugfs (simpler) or a shared list with both
>>>> sysfs and debugfs processing (probably more complicated).
>>>>
>>>> And finally a question, perhaps also for Greg. With sysfs, we hand out the
>>>> lifecycle of struct kmem_cache to sysfs, to ensure we are not reading sysfs
>>>> files of a cache that has been removed.
>>>>
>>>> But with debugfs, what are the guarantees that things won't blow up when a
>>>> debugfs file is being read while somebody calls kmem_cache_destroy() on the cache?
>>>
>>> It's much harder, but usually the default debugfs_file_create() will
>>> handle this for you. See the debugfs_file_create_unsafe() for the
>>> "other" variant where you know you can tear things down "safely".
>>
>> Right, so IIUC debugfs will guarantee that while somebody reads the files, the
>> debugfs cleanup will block, as debugfs_file_get() comment explains.
>>
>> In that case I think we have the cleanup order wrong in this patch:
>>
>> shutdown_cache() should first do debugfs_slab_release() (which would block) and
>> only then proceed with slab_kmem_cache_release() which destroys the fundamental
>> structures such as kmem_cache_node, which are also accessed by the debugfs file
>> handlers.
>>
> If user is trying to read the data during shutdown_cache(), then I think
> it's possible user will get empty data, to avoid that we can call

Empty data is fine, when the cache is going away anyway.

> debugfs_slab_release() first and then do other stuff in shutdown_cache().

Everything above list_del(&s->list) should be OK, it's equivalent to normal
cache operations which the debugfs files must cope with anyway.
list_del(&s->list) is OK as the debugfs handlers don't go through the list. It's
slab_kmem_cache_release() that matters.

>>> That being said, yes there are still issues in this area, be careful
>>> about what tools you expect to be constantly hitting debugfs files.
>>
>> FWIW, the files are accessible only to root.
>>
>>> thanks,
>>>
>>> greg k-h
>>>
>>
>

2021-05-31 09:52:48

by Faiyaz Mohammed

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs



On 5/31/2021 1:49 PM, Vlastimil Babka wrote:
> On 5/31/21 9:11 AM, Faiyaz Mohammed wrote:
>>
>>
>> On 5/26/2021 5:43 PM, Vlastimil Babka wrote:
>>> On 5/26/21 1:48 PM, Greg KH wrote:
>>>> On Wed, May 26, 2021 at 01:38:55PM +0200, Vlastimil Babka wrote:
>>>>>
>>>>> alias_list a single list and both slab_sysfs_init() and slab_debugfs_init()
>>>>> flush it. So only the init call that happens to be called first, does actually
>>>>> find an unflushed list. I think you
>>>>> need to use a separate list for debugfs (simpler) or a shared list with both
>>>>> sysfs and debugfs processing (probably more complicated).
>>>>>
>>>>> And finally a question, perhaps also for Greg. With sysfs, we hand out the
>>>>> lifecycle of struct kmem_cache to sysfs, to ensure we are not reading sysfs
>>>>> files of a cache that has been removed.
>>>>>
>>>>> But with debugfs, what are the guarantees that things won't blow up when a
>>>>> debugfs file is being read while somebody calls kmem_cache_destroy() on the cache?
>>>>
>>>> It's much harder, but usually the default debugfs_file_create() will
>>>> handle this for you. See the debugfs_file_create_unsafe() for the
>>>> "other" variant where you know you can tear things down "safely".
>>>
>>> Right, so IIUC debugfs will guarantee that while somebody reads the files, the
>>> debugfs cleanup will block, as debugfs_file_get() comment explains.
>>>
>>> In that case I think we have the cleanup order wrong in this patch:
>>>
>>> shutdown_cache() should first do debugfs_slab_release() (which would block) and
>>> only then proceed with slab_kmem_cache_release() which destroys the fundamental
>>> structures such as kmem_cache_node, which are also accessed by the debugfs file
>>> handlers.
>>>
>> If user is trying to read the data during shutdown_cache(), then I think
>> it's possible user will get empty data, to avoid that we can call
>
> Empty data is fine, when the cache is going away anyway.
>
>> debugfs_slab_release() first and then do other stuff in shutdown_cache().
>
> Everything above list_del(&s->list) should be OK, it's equivalent to normal
> cache operations which the debugfs files must cope with anyway.
> list_del(&s->list) is OK as the debugfs handlers don't go through the list. It's
> slab_kmem_cache_release() that matters.
>
okay, I will move debugfs_slab_release() before the
slab_kmem_cache_release() in next patch version.
>>>> That being said, yes there are still issues in this area, be careful
>>>> about what tools you expect to be constantly hitting debugfs files.
>>>
>>> FWIW, the files are accessible only to root.
>>>
>>>> thanks,
>>>>
>>>> greg k-h
>>>>
>>>
>>
>

2021-05-31 09:57:03

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs

On 5/31/21 8:55 AM, Faiyaz Mohammed wrote:

>> I don't see any of the symlinks under /sys/kernel/debug/slab/, so I think the
>> aliases handling code is wrong, and I can see at least two reasons why it could be:
>>
>
> I think I missed one thing, when CONFIG_SLUB_DEBUG_ON enable or
> slub_debug is pass through command line __kmem_cache_alias() will return
> null, so no symlinks will be created even if CONFIG_SLAB_MERGE_DEFAULT
> is enable and to store user data we need to enable CONFIG_SLUB_DEBUG_ON
> or pass slub_debug through command line.

So you're saying that caches with SLAB_STORE_USER can never be aliases as the
merging logic will prevent merging with any debug flag, including STORE_USER. So
if we ignore aliases, it means we will not create the debugfs files for caches,
where opening the files would just return error, so we don't lose anything by
not creating the files in the first place.

In that case, for consistency I would recommend to skip debugfs creation for all
caches without SLAB_STORE_USER (even if the caches are not an alias). I think we
can make this decision now as it's a whole new debugfs subtree so we don't break
any pre-existing code.

2021-05-31 11:09:07

by Faiyaz Mohammed

[permalink] [raw]
Subject: Re: [PATCH v7] mm: slub: move sysfs slab alloc/free interfaces to debugfs



On 5/31/2021 3:25 PM, Vlastimil Babka wrote:
> On 5/31/21 8:55 AM, Faiyaz Mohammed wrote:
>
>>> I don't see any of the symlinks under /sys/kernel/debug/slab/, so I think the
>>> aliases handling code is wrong, and I can see at least two reasons why it could be:
>>>
>>
>> I think I missed one thing, when CONFIG_SLUB_DEBUG_ON enable or
>> slub_debug is pass through command line __kmem_cache_alias() will return
>> null, so no symlinks will be created even if CONFIG_SLAB_MERGE_DEFAULT
>> is enable and to store user data we need to enable CONFIG_SLUB_DEBUG_ON
>> or pass slub_debug through command line.
>
> So you're saying that caches with SLAB_STORE_USER can never be aliases as the
> merging logic will prevent merging with any debug flag, including STORE_USER. So
> if we ignore aliases, it means we will not create the debugfs files for caches,
> where opening the files would just return error, so we don't lose anything by
> not creating the files in the first place.
>
> In that case, for consistency I would recommend to skip debugfs creation for all
> caches without SLAB_STORE_USER (even if the caches are not an alias). I think we
> can make this decision now as it's a whole new debugfs subtree so we don't break
> any pre-existing code.
>

Hmmm, I think yes we can skip debugfs creation for all cache without
SLAB_STORE_USER flag set instead of returning error after opening of
file. I will do the change in next patch version.

Thanks and regards,
Mohammed Faiyaz