For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs
allocated per node for a kmem_cache. Thus, slabs_node() in
__kmem_cache_empty(), __kmem_cache_shrink() and __kmem_cache_destroy()
will always return 0 for such config. This is wrong and can cause issues
for all users of these functions.
Infact in [1] Jason has reported a system crash while using SLUB without
CONFIG_SLUB_DEBUG. The reason was the usage of slabs_node() by
__kmem_cache_empty().
The right solution is to make slabs_node() work even for
!CONFIG_SLUB_DEBUG. The commit 0f389ec63077 ("slub: No need for per node
slab counters if !SLUB_DEBUG") had put the per node slab counter under
CONFIG_SLUB_DEBUG because it was only read through sysfs API and the
sysfs API was disabled on !CONFIG_SLUB_DEBUG. However the users of the
per node slab counter assumed that it will work in the absence of
CONFIG_SLUB_DEBUG. So, make the counter work for !CONFIG_SLUB_DEBUG.
Please note that commit f9e13c0a5a33 ("slab, slub: skip unnecessary
kasan_cache_shutdown()") exposed this issue but it is present even
before.
[1] http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQg@mail.gmail.com
Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()")
Signed-off-by: Shakeel Butt <[email protected]>
Suggested-by: David Rientjes <[email protected]>
Reported-by: Jason A . Donenfeld <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: Pekka Enberg <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Andrey Ryabinin <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
---
mm/slab.h | 2 +-
mm/slub.c | 80 +++++++++++++++++++++++++------------------------------
2 files changed, 38 insertions(+), 44 deletions(-)
diff --git a/mm/slab.h b/mm/slab.h
index 68bdf498da3b..a6545332cc86 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -473,8 +473,8 @@ struct kmem_cache_node {
#ifdef CONFIG_SLUB
unsigned long nr_partial;
struct list_head partial;
-#ifdef CONFIG_SLUB_DEBUG
atomic_long_t nr_slabs;
+#ifdef CONFIG_SLUB_DEBUG
atomic_long_t total_objects;
struct list_head full;
#endif
diff --git a/mm/slub.c b/mm/slub.c
index a3b8467c14af..c9c190d54687 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1030,42 +1030,6 @@ static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct
list_del(&page->lru);
}
-/* Tracking of the number of slabs for debugging purposes */
-static inline unsigned long slabs_node(struct kmem_cache *s, int node)
-{
- struct kmem_cache_node *n = get_node(s, node);
-
- return atomic_long_read(&n->nr_slabs);
-}
-
-static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
-{
- return atomic_long_read(&n->nr_slabs);
-}
-
-static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
-{
- struct kmem_cache_node *n = get_node(s, node);
-
- /*
- * May be called early in order to allocate a slab for the
- * kmem_cache_node structure. Solve the chicken-egg
- * dilemma by deferring the increment of the count during
- * bootstrap (see early_kmem_cache_node_alloc).
- */
- if (likely(n)) {
- atomic_long_inc(&n->nr_slabs);
- atomic_long_add(objects, &n->total_objects);
- }
-}
-static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
-{
- struct kmem_cache_node *n = get_node(s, node);
-
- atomic_long_dec(&n->nr_slabs);
- atomic_long_sub(objects, &n->total_objects);
-}
-
/* Object debug checks for alloc/free paths */
static void setup_object_debug(struct kmem_cache *s, struct page *page,
void *object)
@@ -1321,16 +1285,46 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
#define disable_higher_order_debug 0
+#endif /* CONFIG_SLUB_DEBUG */
+
static inline unsigned long slabs_node(struct kmem_cache *s, int node)
- { return 0; }
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ return atomic_long_read(&n->nr_slabs);
+}
+
static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
- { return 0; }
-static inline void inc_slabs_node(struct kmem_cache *s, int node,
- int objects) {}
-static inline void dec_slabs_node(struct kmem_cache *s, int node,
- int objects) {}
+{
+ return atomic_long_read(&n->nr_slabs);
+}
-#endif /* CONFIG_SLUB_DEBUG */
+static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ /*
+ * May be called early in order to allocate a slab for the
+ * kmem_cache_node structure. Solve the chicken-egg
+ * dilemma by deferring the increment of the count during
+ * bootstrap (see early_kmem_cache_node_alloc).
+ */
+ if (likely(n)) {
+ atomic_long_inc(&n->nr_slabs);
+#ifdef CONFIG_SLUB_DEBUG
+ atomic_long_add(objects, &n->total_objects);
+#endif
+ }
+}
+static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ atomic_long_dec(&n->nr_slabs);
+#ifdef CONFIG_SLUB_DEBUG
+ atomic_long_sub(objects, &n->total_objects);
+#endif
+}
/*
* Hooks for other subsystems that check memory allocations. In a typical
--
2.18.0.rc1.244.gcf134e6275-goog
On Wed, 20 Jun 2018, Shakeel Butt wrote:
> For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs
> allocated per node for a kmem_cache. Thus, slabs_node() in
> __kmem_cache_empty(), __kmem_cache_shrink() and __kmem_cache_destroy()
> will always return 0 for such config. This is wrong and can cause issues
> for all users of these functions.
CONFIG_SLUB_DEBUG is set by default on almost all builds. The only case
where CONFIG_SLUB_DEBUG is switched off is when we absolutely need to use
the minimum amount of memory (embedded or some such thing).
> The right solution is to make slabs_node() work even for
> !CONFIG_SLUB_DEBUG. The commit 0f389ec63077 ("slub: No need for per node
> slab counters if !SLUB_DEBUG") had put the per node slab counter under
> CONFIG_SLUB_DEBUG because it was only read through sysfs API and the
> sysfs API was disabled on !CONFIG_SLUB_DEBUG. However the users of the
> per node slab counter assumed that it will work in the absence of
> CONFIG_SLUB_DEBUG. So, make the counter work for !CONFIG_SLUB_DEBUG.
Please do not do this. Find a way to avoid these checks. The
objective of a !CONFIG_SLUB_DEBUG configuration is to not compile in
debuggin checks etc etc in order to reduce the code/data footprint to the
minimum necessary while sacrificing debuggability etc etc.
Maybe make it impossible to disable CONFIG_SLUB_DEBUG if CGROUPs are in
use?
On Wed, Jun 20, 2018 at 6:15 PM Christopher Lameter <[email protected]> wrote:
>
> On Wed, 20 Jun 2018, Shakeel Butt wrote:
>
> > For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs
> > allocated per node for a kmem_cache. Thus, slabs_node() in
> > __kmem_cache_empty(), __kmem_cache_shrink() and __kmem_cache_destroy()
> > will always return 0 for such config. This is wrong and can cause issues
> > for all users of these functions.
>
>
> CONFIG_SLUB_DEBUG is set by default on almost all builds. The only case
> where CONFIG_SLUB_DEBUG is switched off is when we absolutely need to use
> the minimum amount of memory (embedded or some such thing).
>
> > The right solution is to make slabs_node() work even for
> > !CONFIG_SLUB_DEBUG. The commit 0f389ec63077 ("slub: No need for per node
> > slab counters if !SLUB_DEBUG") had put the per node slab counter under
> > CONFIG_SLUB_DEBUG because it was only read through sysfs API and the
> > sysfs API was disabled on !CONFIG_SLUB_DEBUG. However the users of the
> > per node slab counter assumed that it will work in the absence of
> > CONFIG_SLUB_DEBUG. So, make the counter work for !CONFIG_SLUB_DEBUG.
>
> Please do not do this. Find a way to avoid these checks. The
> objective of a !CONFIG_SLUB_DEBUG configuration is to not compile in
> debuggin checks etc etc in order to reduce the code/data footprint to the
> minimum necessary while sacrificing debuggability etc etc.
>
> Maybe make it impossible to disable CONFIG_SLUB_DEBUG if CGROUPs are in
> use?
>
Copying from the other thread:
On Wed, Jun 20, 2018 at 6:22 PM Jason A. Donenfeld <[email protected]> wrote:
>
> On Thu, Jun 21, 2018 at 3:20 AM Christopher Lameter <[email protected]> wrote:
> >
> > NAK. Its easier to simply not allow !CONFIG_SLUB_DEBUG for cgroups based
> > configs because in that case you certainly have enough memory to include
> > the runtime debug code as well as the extended counters.
> >
>
> FWIW, I ran into issues with a combination of KASAN+CONFIG_SLUB
> without having CONFIG_SLUB_DEBUG, because KASAN was using functions
> that were broken without CONFIG_SLUB_DEBUG, so while you're at it with
> creating dependencies, you might want to also say KASAN+CONFIG_SLUB
> ==> CONFIG_SLUB_DEBUG.
KASAN is the only user of __kmem_cache_empty(). So, enforcing
KASAN+CONFIG_SLUB => CONFIG_SLUB_DEBUG makes sense but not sure about
cgroups or memcg. Though is it ok let __kmem_cache_shrink() &
__kmem_cache_shutdown() be broken for !CONFIG_SLUB_DEBUG?
For __kmem_cache_shutdown(), I can understand that shutting down a
kmem_cache when there are still objects allocated from it, is broken
and wrong. For __kmem_cache_shrink(), maybe wrong answer from it is
tolerable.
Shakeel
On Thu 21-06-18 01:15:30, Cristopher Lameter wrote:
> On Wed, 20 Jun 2018, Shakeel Butt wrote:
>
> > For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs
> > allocated per node for a kmem_cache. Thus, slabs_node() in
> > __kmem_cache_empty(), __kmem_cache_shrink() and __kmem_cache_destroy()
> > will always return 0 for such config. This is wrong and can cause issues
> > for all users of these functions.
>
>
> CONFIG_SLUB_DEBUG is set by default on almost all builds. The only case
> where CONFIG_SLUB_DEBUG is switched off is when we absolutely need to use
> the minimum amount of memory (embedded or some such thing).
I thought those would be using SLOB rather than SLUB.
>
> > The right solution is to make slabs_node() work even for
> > !CONFIG_SLUB_DEBUG. The commit 0f389ec63077 ("slub: No need for per node
> > slab counters if !SLUB_DEBUG") had put the per node slab counter under
> > CONFIG_SLUB_DEBUG because it was only read through sysfs API and the
> > sysfs API was disabled on !CONFIG_SLUB_DEBUG. However the users of the
> > per node slab counter assumed that it will work in the absence of
> > CONFIG_SLUB_DEBUG. So, make the counter work for !CONFIG_SLUB_DEBUG.
>
> Please do not do this. Find a way to avoid these checks. The
> objective of a !CONFIG_SLUB_DEBUG configuration is to not compile in
> debuggin checks etc etc in order to reduce the code/data footprint to the
> minimum necessary while sacrificing debuggability etc etc.
>
> Maybe make it impossible to disable CONFIG_SLUB_DEBUG if CGROUPs are in
> use?
Why don't we simply remove the config option altogether and make it
enabled effectively.
--
Michal Hocko
SUSE Labs
On Thu, Jun 21, 2018 at 8:01 AM Michal Hocko <[email protected]> wrote:
>
> On Thu 21-06-18 01:15:30, Cristopher Lameter wrote:
> > On Wed, 20 Jun 2018, Shakeel Butt wrote:
> >
> > > For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs
> > > allocated per node for a kmem_cache. Thus, slabs_node() in
> > > __kmem_cache_empty(), __kmem_cache_shrink() and __kmem_cache_destroy()
> > > will always return 0 for such config. This is wrong and can cause issues
> > > for all users of these functions.
> >
> >
> > CONFIG_SLUB_DEBUG is set by default on almost all builds. The only case
> > where CONFIG_SLUB_DEBUG is switched off is when we absolutely need to use
> > the minimum amount of memory (embedded or some such thing).
>
> I thought those would be using SLOB rather than SLUB.
>
> >
> > > The right solution is to make slabs_node() work even for
> > > !CONFIG_SLUB_DEBUG. The commit 0f389ec63077 ("slub: No need for per node
> > > slab counters if !SLUB_DEBUG") had put the per node slab counter under
> > > CONFIG_SLUB_DEBUG because it was only read through sysfs API and the
> > > sysfs API was disabled on !CONFIG_SLUB_DEBUG. However the users of the
> > > per node slab counter assumed that it will work in the absence of
> > > CONFIG_SLUB_DEBUG. So, make the counter work for !CONFIG_SLUB_DEBUG.
> >
> > Please do not do this. Find a way to avoid these checks. The
> > objective of a !CONFIG_SLUB_DEBUG configuration is to not compile in
> > debuggin checks etc etc in order to reduce the code/data footprint to the
> > minimum necessary while sacrificing debuggability etc etc.
> >
> > Maybe make it impossible to disable CONFIG_SLUB_DEBUG if CGROUPs are in
> > use?
>
> Why don't we simply remove the config option altogether and make it
> enabled effectively.
>
Christopher, how do you want to proceed? I don't have any strong
opinion. I just don't want KASAN users kept broken for SLUB.
thanks,
Shakeel
KASAN depends on having access to some of the accounting that SLUB_DEBUG
does; without it, there are immediate crashes [1]. So, the natural thing
to do is to make KASAN select SLUB_DEBUG.
[1] http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQg@mail.gmail.com
Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()")
Cc: Shakeel Butt <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: Pekka Enberg <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Andrey Ryabinin <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Signed-off-by: Jason A. Donenfeld <[email protected]>
---
lib/Kconfig.kasan | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 3d35d062970d..c253c1b46c6b 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN
config KASAN
bool "KASan: runtime memory debugger"
depends on SLUB || (SLAB && !DEBUG_SLAB)
+ select SLUB_DEBUG if SLUB
select CONSTRUCTORS
select STACKDEPOT
help
--
2.17.1
On Fri 22-06-18 17:46:23, Jason A. Donenfeld wrote:
> KASAN depends on having access to some of the accounting that SLUB_DEBUG
> does; without it, there are immediate crashes [1]. So, the natural thing
> to do is to make KASAN select SLUB_DEBUG.
>
> [1] http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQg@mail.gmail.com
>
> Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()")
> Cc: Shakeel Butt <[email protected]>
> Cc: David Rientjes <[email protected]>
> Cc: Christoph Lameter <[email protected]>
> Cc: Pekka Enberg <[email protected]>
> Cc: Joonsoo Kim <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Andrey Ryabinin <[email protected]>
> Cc: <[email protected]>
> Cc: <[email protected]>
> Cc: <[email protected]>
> Signed-off-by: Jason A. Donenfeld <[email protected]>
This is the simplest way to do but I strongly suspect that the whole
SLUB_DEBUG is not really necessary
Acked-by: Michal Hocko <[email protected]>
> ---
> lib/Kconfig.kasan | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
> index 3d35d062970d..c253c1b46c6b 100644
> --- a/lib/Kconfig.kasan
> +++ b/lib/Kconfig.kasan
> @@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN
> config KASAN
> bool "KASan: runtime memory debugger"
> depends on SLUB || (SLAB && !DEBUG_SLAB)
> + select SLUB_DEBUG if SLUB
> select CONSTRUCTORS
> select STACKDEPOT
> help
> --
> 2.17.1
--
Michal Hocko
SUSE Labs
On Fri, Jun 22, 2018 at 8:46 AM Jason A. Donenfeld <[email protected]> wrote:
>
> KASAN depends on having access to some of the accounting that SLUB_DEBUG
> does; without it, there are immediate crashes [1]. So, the natural thing
> to do is to make KASAN select SLUB_DEBUG.
>
> [1] http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQg@mail.gmail.com
>
> Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()")
> Cc: Shakeel Butt <[email protected]>
> Cc: David Rientjes <[email protected]>
> Cc: Christoph Lameter <[email protected]>
> Cc: Pekka Enberg <[email protected]>
> Cc: Joonsoo Kim <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Andrey Ryabinin <[email protected]>
> Cc: <[email protected]>
> Cc: <[email protected]>
> Cc: <[email protected]>
> Signed-off-by: Jason A. Donenfeld <[email protected]>
Reviewed-by: Shakeel Butt <[email protected]>
> ---
> lib/Kconfig.kasan | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
> index 3d35d062970d..c253c1b46c6b 100644
> --- a/lib/Kconfig.kasan
> +++ b/lib/Kconfig.kasan
> @@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN
> config KASAN
> bool "KASan: runtime memory debugger"
> depends on SLUB || (SLAB && !DEBUG_SLAB)
> + select SLUB_DEBUG if SLUB
> select CONSTRUCTORS
> select STACKDEPOT
> help
> --
> 2.17.1
>