2024-03-26 10:38:23

by Vlastimil Babka

[permalink] [raw]
Subject: [PATCH mm-unstable v3 0/2] memcg_kmem hooks refactoring

Hi,

sorry for a v3 that soon after v2. I initially planned to just merge the
v2 to the slab tree later this week, but then I checked and the
conflicts with mm tree would be too tedious (mainly due to memory
allocation profiling series). Also large part of this is in memcontrol.c
and further work in there might be based on this, so it's easier to just
go via mm tree.

So this is just a rebase on top of mm-unstable so it can be included
there. Thanks.

Vlastimil

Signed-off-by: Vlastimil Babka <[email protected]>
---
Changes in v3:
- rebased on v2
- Link to v2: https://lore.kernel.org/r/[email protected]

Changes in v2:
- rebase to v6.9-rc1
- add reviewed-by's to patches 1+2
- drop patches 3+4 (kmem_cache_charge() and usage in vfs)
- Link to v1: https://lore.kernel.org/r/[email protected]

---
Vlastimil Babka (2):
mm, slab: move memcg charging to post-alloc hook
mm, slab: move slab_memcg hooks to mm/memcontrol.c

mm/memcontrol.c | 90 +++++++++++++++++++++++++
mm/slab.h | 13 ++++
mm/slub.c | 205 +++++++++++---------------------------------------------
3 files changed, 143 insertions(+), 165 deletions(-)
---
base-commit: 4aaccadb5c04dd4d4519c8762a38010a32d904a3
change-id: 20240229-slab-memcg-ae6b3789c924

Best regards,
--
Vlastimil Babka <[email protected]>



2024-03-26 10:38:46

by Vlastimil Babka

[permalink] [raw]
Subject: [PATCH mm-unstable v3 2/2] mm, slab: move slab_memcg hooks to mm/memcontrol.c

The hooks make multiple calls to functions in mm/memcontrol.c, including
to th current_obj_cgroup() marked __always_inline. It might be faster to
make a single call to the hook in mm/memcontrol.c instead. The hooks
also don't use almost anything from mm/slub.c. obj_full_size() can move
with the hooks and cache_vmstat_idx() to the internal mm/slab.h

Reviewed-by: Roman Gushchin <[email protected]>
Signed-off-by: Vlastimil Babka <[email protected]>
---
mm/memcontrol.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++
mm/slab.h | 13 +++++++
mm/slub.c | 103 ++------------------------------------------------------
3 files changed, 105 insertions(+), 101 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0a0720858ddb..1b3c3394a2ba 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3558,6 +3558,96 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
refill_obj_stock(objcg, size, true);
}

+static inline size_t obj_full_size(struct kmem_cache *s)
+{
+ /*
+ * For each accounted object there is an extra space which is used
+ * to store obj_cgroup membership. Charge it too.
+ */
+ return s->size + sizeof(struct obj_cgroup *);
+}
+
+bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
+ gfp_t flags, size_t size, void **p)
+{
+ struct obj_cgroup *objcg;
+ struct slab *slab;
+ unsigned long off;
+ size_t i;
+
+ /*
+ * The obtained objcg pointer is safe to use within the current scope,
+ * defined by current task or set_active_memcg() pair.
+ * obj_cgroup_get() is used to get a permanent reference.
+ */
+ objcg = current_obj_cgroup();
+ if (!objcg)
+ return true;
+
+ /*
+ * slab_alloc_node() avoids the NULL check, so we might be called with a
+ * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
+ * the whole requested size.
+ * return success as there's nothing to free back
+ */
+ if (unlikely(*p == NULL))
+ return true;
+
+ flags &= gfp_allowed_mask;
+
+ if (lru) {
+ int ret;
+ struct mem_cgroup *memcg;
+
+ memcg = get_mem_cgroup_from_objcg(objcg);
+ ret = memcg_list_lru_alloc(memcg, lru, flags);
+ css_put(&memcg->css);
+
+ if (ret)
+ return false;
+ }
+
+ if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
+ return false;
+
+ for (i = 0; i < size; i++) {
+ slab = virt_to_slab(p[i]);
+
+ if (!slab_obj_exts(slab) &&
+ alloc_slab_obj_exts(slab, s, flags, false)) {
+ obj_cgroup_uncharge(objcg, obj_full_size(s));
+ continue;
+ }
+
+ off = obj_to_index(s, slab, p[i]);
+ obj_cgroup_get(objcg);
+ slab_obj_exts(slab)[off].objcg = objcg;
+ mod_objcg_state(objcg, slab_pgdat(slab),
+ cache_vmstat_idx(s), obj_full_size(s));
+ }
+
+ return true;
+}
+
+void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
+ void **p, int objects, struct slabobj_ext *obj_exts)
+{
+ for (int i = 0; i < objects; i++) {
+ struct obj_cgroup *objcg;
+ unsigned int off;
+
+ off = obj_to_index(s, slab, p[i]);
+ objcg = obj_exts[off].objcg;
+ if (!objcg)
+ continue;
+
+ obj_exts[off].objcg = NULL;
+ obj_cgroup_uncharge(objcg, obj_full_size(s));
+ mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
+ -obj_full_size(s));
+ obj_cgroup_put(objcg);
+ }
+}
#endif /* CONFIG_MEMCG_KMEM */

/*
diff --git a/mm/slab.h b/mm/slab.h
index 1343bfa12cee..411251b9bdd1 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -558,6 +558,9 @@ static inline struct slabobj_ext *slab_obj_exts(struct slab *slab)
return (struct slabobj_ext *)(obj_exts & ~OBJEXTS_FLAGS_MASK);
}

+int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
+ gfp_t gfp, bool new_slab);
+
#else /* CONFIG_SLAB_OBJ_EXT */

static inline struct slabobj_ext *slab_obj_exts(struct slab *slab)
@@ -567,7 +570,17 @@ static inline struct slabobj_ext *slab_obj_exts(struct slab *slab)

#endif /* CONFIG_SLAB_OBJ_EXT */

+static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
+{
+ return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
+ NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
+}
+
#ifdef CONFIG_MEMCG_KMEM
+bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
+ gfp_t flags, size_t size, void **p);
+void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
+ void **p, int objects, struct slabobj_ext *obj_exts);
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr);
#endif
diff --git a/mm/slub.c b/mm/slub.c
index 263ff2a9f251..f5b151a58b7d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1865,12 +1865,6 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
#endif
#endif /* CONFIG_SLUB_DEBUG */

-static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
-{
- return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
- NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
-}
-
#ifdef CONFIG_SLAB_OBJ_EXT

#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
@@ -1929,8 +1923,8 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \
__GFP_ACCOUNT | __GFP_NOFAIL)

-static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
- gfp_t gfp, bool new_slab)
+int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
+ gfp_t gfp, bool new_slab)
{
unsigned int objects = objs_per_slab(s, slab);
unsigned long new_exts;
@@ -2089,78 +2083,6 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
#endif /* CONFIG_SLAB_OBJ_EXT */

#ifdef CONFIG_MEMCG_KMEM
-static inline size_t obj_full_size(struct kmem_cache *s)
-{
- /*
- * For each accounted object there is an extra space which is used
- * to store obj_cgroup membership. Charge it too.
- */
- return s->size + sizeof(struct obj_cgroup *);
-}
-
-static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s,
- struct list_lru *lru,
- gfp_t flags, size_t size,
- void **p)
-{
- struct obj_cgroup *objcg;
- struct slab *slab;
- unsigned long off;
- size_t i;
-
- /*
- * The obtained objcg pointer is safe to use within the current scope,
- * defined by current task or set_active_memcg() pair.
- * obj_cgroup_get() is used to get a permanent reference.
- */
- objcg = current_obj_cgroup();
- if (!objcg)
- return true;
-
- /*
- * slab_alloc_node() avoids the NULL check, so we might be called with a
- * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
- * the whole requested size.
- * return success as there's nothing to free back
- */
- if (unlikely(*p == NULL))
- return true;
-
- flags &= gfp_allowed_mask;
-
- if (lru) {
- int ret;
- struct mem_cgroup *memcg;
-
- memcg = get_mem_cgroup_from_objcg(objcg);
- ret = memcg_list_lru_alloc(memcg, lru, flags);
- css_put(&memcg->css);
-
- if (ret)
- return false;
- }
-
- if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
- return false;
-
- for (i = 0; i < size; i++) {
- slab = virt_to_slab(p[i]);
-
- if (!slab_obj_exts(slab) &&
- alloc_slab_obj_exts(slab, s, flags, false)) {
- obj_cgroup_uncharge(objcg, obj_full_size(s));
- continue;
- }
-
- off = obj_to_index(s, slab, p[i]);
- obj_cgroup_get(objcg);
- slab_obj_exts(slab)[off].objcg = objcg;
- mod_objcg_state(objcg, slab_pgdat(slab),
- cache_vmstat_idx(s), obj_full_size(s));
- }
-
- return true;
-}

static void memcg_alloc_abort_single(struct kmem_cache *s, void *object);

@@ -2187,27 +2109,6 @@ bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
return false;
}

-static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
- void **p, int objects,
- struct slabobj_ext *obj_exts)
-{
- for (int i = 0; i < objects; i++) {
- struct obj_cgroup *objcg;
- unsigned int off;
-
- off = obj_to_index(s, slab, p[i]);
- objcg = obj_exts[off].objcg;
- if (!objcg)
- continue;
-
- obj_exts[off].objcg = NULL;
- obj_cgroup_uncharge(objcg, obj_full_size(s));
- mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
- -obj_full_size(s));
- obj_cgroup_put(objcg);
- }
-}
-
static __fastpath_inline
void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
int objects)

--
2.44.0


2024-03-26 10:38:49

by Vlastimil Babka

[permalink] [raw]
Subject: [PATCH mm-unstable v3 1/2] mm, slab: move memcg charging to post-alloc hook

The MEMCG_KMEM integration with slab currently relies on two hooks
during allocation. memcg_slab_pre_alloc_hook() determines the objcg and
charges it, and memcg_slab_post_alloc_hook() assigns the objcg pointer
to the allocated object(s).

As Linus pointed out, this is unnecessarily complex. Failing to charge
due to memcg limits should be rare, so we can optimistically allocate
the object(s) and do the charging together with assigning the objcg
pointer in a single post_alloc hook. In the rare case the charging
fails, we can free the object(s) back.

This simplifies the code (no need to pass around the objcg pointer) and
potentially allows to separate charging from allocation in cases where
it's common that the allocation would be immediately freed, and the
memcg handling overhead could be saved.

Suggested-by: Linus Torvalds <[email protected]>
Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/
Reviewed-by: Roman Gushchin <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
Signed-off-by: Vlastimil Babka <[email protected]>
---
mm/slub.c | 180 +++++++++++++++++++++++++++-----------------------------------
1 file changed, 77 insertions(+), 103 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 7b68a3451eb9..263ff2a9f251 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2098,23 +2098,36 @@ static inline size_t obj_full_size(struct kmem_cache *s)
return s->size + sizeof(struct obj_cgroup *);
}

-/*
- * Returns false if the allocation should fail.
- */
-static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
- struct list_lru *lru,
- struct obj_cgroup **objcgp,
- size_t objects, gfp_t flags)
+static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s,
+ struct list_lru *lru,
+ gfp_t flags, size_t size,
+ void **p)
{
+ struct obj_cgroup *objcg;
+ struct slab *slab;
+ unsigned long off;
+ size_t i;
+
/*
* The obtained objcg pointer is safe to use within the current scope,
* defined by current task or set_active_memcg() pair.
* obj_cgroup_get() is used to get a permanent reference.
*/
- struct obj_cgroup *objcg = current_obj_cgroup();
+ objcg = current_obj_cgroup();
if (!objcg)
return true;

+ /*
+ * slab_alloc_node() avoids the NULL check, so we might be called with a
+ * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
+ * the whole requested size.
+ * return success as there's nothing to free back
+ */
+ if (unlikely(*p == NULL))
+ return true;
+
+ flags &= gfp_allowed_mask;
+
if (lru) {
int ret;
struct mem_cgroup *memcg;
@@ -2127,71 +2140,51 @@ static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
return false;
}

- if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
+ if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
return false;

- *objcgp = objcg;
+ for (i = 0; i < size; i++) {
+ slab = virt_to_slab(p[i]);
+
+ if (!slab_obj_exts(slab) &&
+ alloc_slab_obj_exts(slab, s, flags, false)) {
+ obj_cgroup_uncharge(objcg, obj_full_size(s));
+ continue;
+ }
+
+ off = obj_to_index(s, slab, p[i]);
+ obj_cgroup_get(objcg);
+ slab_obj_exts(slab)[off].objcg = objcg;
+ mod_objcg_state(objcg, slab_pgdat(slab),
+ cache_vmstat_idx(s), obj_full_size(s));
+ }
+
return true;
}

-/*
- * Returns false if the allocation should fail.
- */
+static void memcg_alloc_abort_single(struct kmem_cache *s, void *object);
+
static __fastpath_inline
-bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
- struct obj_cgroup **objcgp, size_t objects,
- gfp_t flags)
+bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
+ gfp_t flags, size_t size, void **p)
{
- if (!memcg_kmem_online())
+ if (likely(!memcg_kmem_online()))
return true;

if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
return true;

- return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects,
- flags));
-}
-
-static void __memcg_slab_post_alloc_hook(struct kmem_cache *s,
- struct obj_cgroup *objcg,
- gfp_t flags, size_t size,
- void **p)
-{
- struct slab *slab;
- unsigned long off;
- size_t i;
-
- flags &= gfp_allowed_mask;
-
- for (i = 0; i < size; i++) {
- if (likely(p[i])) {
- slab = virt_to_slab(p[i]);
-
- if (!slab_obj_exts(slab) &&
- alloc_slab_obj_exts(slab, s, flags, false)) {
- obj_cgroup_uncharge(objcg, obj_full_size(s));
- continue;
- }
+ if (likely(__memcg_slab_post_alloc_hook(s, lru, flags, size, p)))
+ return true;

- off = obj_to_index(s, slab, p[i]);
- obj_cgroup_get(objcg);
- slab_obj_exts(slab)[off].objcg = objcg;
- mod_objcg_state(objcg, slab_pgdat(slab),
- cache_vmstat_idx(s), obj_full_size(s));
- } else {
- obj_cgroup_uncharge(objcg, obj_full_size(s));
- }
+ if (likely(size == 1)) {
+ memcg_alloc_abort_single(s, p);
+ *p = NULL;
+ } else {
+ kmem_cache_free_bulk(s, size, p);
}
-}
-
-static __fastpath_inline
-void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
- gfp_t flags, size_t size, void **p)
-{
- if (likely(!memcg_kmem_online() || !objcg))
- return;

- return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
+ return false;
}

static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
@@ -2230,40 +2223,19 @@ void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,

__memcg_slab_free_hook(s, slab, p, objects, obj_exts);
}
-
-static inline
-void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
- struct obj_cgroup *objcg)
-{
- if (objcg)
- obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
-}
#else /* CONFIG_MEMCG_KMEM */
-static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
- struct list_lru *lru,
- struct obj_cgroup **objcgp,
- size_t objects, gfp_t flags)
-{
- return true;
-}
-
-static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
- struct obj_cgroup *objcg,
+static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s,
+ struct list_lru *lru,
gfp_t flags, size_t size,
void **p)
{
+ return true;
}

static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects)
{
}
-
-static inline
-void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
- struct obj_cgroup *objcg)
-{
-}
#endif /* CONFIG_MEMCG_KMEM */

/*
@@ -3943,10 +3915,7 @@ noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);

static __fastpath_inline
-struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
- struct list_lru *lru,
- struct obj_cgroup **objcgp,
- size_t size, gfp_t flags)
+struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
{
flags &= gfp_allowed_mask;

@@ -3955,14 +3924,11 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
if (unlikely(should_failslab(s, flags)))
return NULL;

- if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)))
- return NULL;
-
return s;
}

static __fastpath_inline
-void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
+bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
gfp_t flags, size_t size, void **p, bool init,
unsigned int orig_size)
{
@@ -4024,7 +3990,7 @@ void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
}
}

- memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
+ return memcg_slab_post_alloc_hook(s, lru, flags, size, p);
}

/*
@@ -4041,10 +4007,9 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
void *object;
- struct obj_cgroup *objcg = NULL;
bool init = false;

- s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
+ s = slab_pre_alloc_hook(s, gfpflags);
if (unlikely(!s))
return NULL;

@@ -4061,8 +4026,10 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
/*
* When init equals 'true', like for kzalloc() family, only
* @orig_size bytes might be zeroed instead of s->object_size
+ * In case this fails due to memcg_slab_post_alloc_hook(),
+ * object is set to NULL
*/
- slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
+ slab_post_alloc_hook(s, lru, gfpflags, 1, &object, init, orig_size);

return object;
}
@@ -4502,6 +4469,16 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
do_slab_free(s, slab, object, object, 1, addr);
}

+#ifdef CONFIG_MEMCG_KMEM
+/* Do not inline the rare memcg charging failed path into the allocation path */
+static noinline
+void memcg_alloc_abort_single(struct kmem_cache *s, void *object)
+{
+ if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
+ do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_);
+}
+#endif
+
static __fastpath_inline
void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
void *tail, void **p, int cnt, unsigned long addr)
@@ -4838,29 +4815,26 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{
int i;
- struct obj_cgroup *objcg = NULL;

if (!size)
return 0;

- /* memcg and kmem_cache debug support */
- s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
+ s = slab_pre_alloc_hook(s, flags);
if (unlikely(!s))
return 0;

i = __kmem_cache_alloc_bulk(s, flags, size, p);
+ if (unlikely(i == 0))
+ return 0;

/*
* memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop.
*/
- if (likely(i != 0)) {
- slab_post_alloc_hook(s, objcg, flags, size, p,
- slab_want_init_on_alloc(flags, s), s->object_size);
- } else {
- memcg_slab_alloc_error_hook(s, size, objcg);
+ if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p,
+ slab_want_init_on_alloc(flags, s), s->object_size))) {
+ return 0;
}
-
return i;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof);

--
2.44.0