2022-05-18 06:26:06

by Vasily Averin

[permalink] [raw]
Subject: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints

Slab caches marked with SLAB_ACCOUNT force accounting for every
allocation from this cache even if __GFP_ACCOUNT flag is not passed.
Unfortunately, at the moment this flag is not visible in ftrace output,
and this makes it difficult to analyze the accounted allocations.

This patch adds boolean "allocated" entry into trace output,
and set it to 'true' for calls used __GFP_ACCOUNT flag and
for allocations from caches marked with SLAB_ACCOUNT.

Signed-off-by: Vasily Averin <[email protected]>
---
v3:
1) rework kmem_cache_alloc* tracepoints once again,
added struct kmem_cache argument into existing templates,
thanks to Matthew Wilcox
2) updated the corresponding ding trace_* calls
3) added boolean "allocated" entry into trace output,
thanks to Roman
4) updated patch subject and description

v2:
1) handle kmem_cache_alloc_node(), thanks to Shakeel
2) rework kmem_cache_alloc* tracepoints to use cachep instead
of current cachep->*size parameters.
NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
and therefore it was replaced by kmalloc_node.
---
Now kmem tracing output looks like this:

kmem_cache_alloc: (getname_flags.part.0+0x2c) call_site=getname_flags.part.0+0x2c ptr=0xffff8fff022e9000 bytes_req=4096 bytes_alloc=4096 gfp_flags=GFP_KERNEL accounted=false
kmalloc: (alloc_bprm+0x32) call_site=alloc_bprm+0x32 ptr=0xffff8fff2b408a00 bytes_req=416 bytes_alloc=512 gfp_flags=GFP_KERNEL|__GFP_ZERO accounted=false
kmem_cache_alloc: (mm_alloc+0x16) call_site=mm_alloc+0x16 ptr=0xffff8fff0894d500 bytes_req=1048 bytes_alloc=1088 gfp_flags=GFP_KERNEL accounted=true
mm_page_alloc: page=0xffffffffa4ab8d42 pfn=0x12ad72 order=1 migratetype=0 gfp_flags=GFP_USER|__GFP_ZERO|__GFP_ACCOUNT
kmem_cache_alloc: (vm_area_alloc+0x1a) call_site=vm_area_alloc+0x1a ptr=0xffff8fff2af27000 bytes_req=200 bytes_alloc=200 gfp_flags=GFP_KERNEL accounted=true
---
include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
mm/slab.c | 10 +++++-----
mm/slab_common.c | 9 ++++-----
mm/slob.c | 8 ++++----
mm/slub.c | 20 +++++++++----------
5 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 71c141804222..5bfeb6f276f1 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,

TP_PROTO(unsigned long call_site,
const void *ptr,
+ struct kmem_cache *s,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),

TP_STRUCT__entry(
__field( unsigned long, call_site )
@@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__field( size_t, bytes_req )
__field( size_t, bytes_alloc )
__field( unsigned long, gfp_flags )
+ __field( bool, accounted )
),

TP_fast_assign(
@@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__entry->bytes_req = bytes_req;
__entry->bytes_alloc = bytes_alloc;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
+ __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
+ (s && s->flags & SLAB_ACCOUNT);
),

- TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+ TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
(void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
- show_gfp_flags(__entry->gfp_flags))
+ show_gfp_flags(__entry->gfp_flags),
+ __entry->accounted ? "true" : "false")
);

DEFINE_EVENT(kmem_alloc, kmalloc,

- TP_PROTO(unsigned long call_site, const void *ptr,
+ TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
);

DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,

- TP_PROTO(unsigned long call_site, const void *ptr,
+ TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
);

DECLARE_EVENT_CLASS(kmem_alloc_node,

TP_PROTO(unsigned long call_site,
const void *ptr,
+ struct kmem_cache *s,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags,
int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),

TP_STRUCT__entry(
__field( unsigned long, call_site )
@@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__field( size_t, bytes_alloc )
__field( unsigned long, gfp_flags )
__field( int, node )
+ __field( bool, accounted )
),

TP_fast_assign(
@@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__entry->bytes_alloc = bytes_alloc;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
__entry->node = node;
+ __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
+ (s && s->flags & SLAB_ACCOUNT);
),

- TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+ TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
(void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
show_gfp_flags(__entry->gfp_flags),
- __entry->node)
+ __entry->node,
+ __entry->accounted ? "true" : "false")
);

DEFINE_EVENT(kmem_alloc_node, kmalloc_node,

TP_PROTO(unsigned long call_site, const void *ptr,
- size_t bytes_req, size_t bytes_alloc,
+ struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
gfp_t gfp_flags, int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
);

DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,

TP_PROTO(unsigned long call_site, const void *ptr,
- size_t bytes_req, size_t bytes_alloc,
+ struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
gfp_t gfp_flags, int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
);

TRACE_EVENT(kfree,
diff --git a/mm/slab.c b/mm/slab.c
index 0edb474edef1..e5802445c7d6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3492,7 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
{
void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);

- trace_kmem_cache_alloc(_RET_IP_, ret,
+ trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
cachep->object_size, cachep->size, flags);

return ret;
@@ -3581,7 +3581,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc(_RET_IP_, ret,
+ trace_kmalloc(_RET_IP_, ret, cachep,
size, cachep->size, flags);
return ret;
}
@@ -3606,7 +3606,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);

- trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
cachep->object_size, cachep->size,
flags, nodeid);

@@ -3625,7 +3625,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, cachep,
size, cachep->size,
flags, nodeid);
return ret;
@@ -3708,7 +3708,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
ret = slab_alloc(cachep, NULL, flags, size, caller);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc(caller, ret,
+ trace_kmalloc(caller, ret, cachep,
size, cachep->size, flags);

return ret;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2b3206a2c3b5..a345e8600e00 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -25,13 +25,12 @@
#include <asm/page.h>
#include <linux/memcontrol.h>

-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
#include "internal.h"
-
#include "slab.h"

+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
enum slab_state slab_state;
LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
@@ -967,7 +966,7 @@ EXPORT_SYMBOL(kmalloc_order);
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
void *ret = kmalloc_order(size, flags, order);
- trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+ trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order_trace);
diff --git a/mm/slob.c b/mm/slob.c
index 40ea6e2d4ccd..dbefa0da0dfc 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -505,7 +505,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
*m = size;
ret = (void *)m + minalign;

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, size + minalign, gfp, node);
} else {
unsigned int order = get_order(size);
@@ -514,7 +514,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
gfp |= __GFP_COMP;
ret = slob_new_pages(gfp, order, node);

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, PAGE_SIZE << order, gfp, node);
}

@@ -610,12 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)

if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node, 0);
- trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+ trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
SLOB_UNITS(c->size) * SLOB_UNIT,
flags, node);
} else {
b = slob_new_pages(flags, get_order(c->size), node);
- trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+ trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
PAGE_SIZE << get_order(c->size),
flags, node);
}
diff --git a/mm/slub.c b/mm/slub.c
index ed5c2c03a47a..9b10591646dd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3231,7 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
{
void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);

- trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+ trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
s->size, gfpflags);

return ret;
@@ -3254,7 +3254,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
- trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+ trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
@@ -3266,7 +3266,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);

- trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
s->object_size, s->size, gfpflags, node);

return ret;
@@ -3280,7 +3280,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);

- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, s,
size, s->size, gfpflags, node);

ret = kasan_kmalloc(s, ret, size, gfpflags);
@@ -4409,7 +4409,7 @@ void *__kmalloc(size_t size, gfp_t flags)

ret = slab_alloc(s, NULL, flags, _RET_IP_, size);

- trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+ trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);

ret = kasan_kmalloc(s, ret, size, flags);

@@ -4443,7 +4443,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, flags, node);

- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, NULL,
size, PAGE_SIZE << get_order(size),
flags, node);

@@ -4457,7 +4457,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)

ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);

- trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+ trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);

ret = kasan_kmalloc(s, ret, size, flags);

@@ -4916,7 +4916,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
ret = slab_alloc(s, NULL, gfpflags, caller, size);

/* Honor the call site pointer we received. */
- trace_kmalloc(caller, ret, size, s->size, gfpflags);
+ trace_kmalloc(caller, ret, s, size, s->size, gfpflags);

return ret;
}
@@ -4932,7 +4932,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, gfpflags, node);

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, PAGE_SIZE << get_order(size),
gfpflags, node);

@@ -4947,7 +4947,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);

/* Honor the call site pointer we received. */
- trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+ trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);

return ret;
}
--
2.31.1



2022-05-18 15:10:23

by Shakeel Butt

[permalink] [raw]
Subject: Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints

On Tue, May 17, 2022 at 11:24 PM Vasily Averin <[email protected]> wrote:
>
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "allocated" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
>
> Signed-off-by: Vasily Averin <[email protected]>

Acked-by: Shakeel Butt <[email protected]>

2022-05-18 15:48:56

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints

On 5/18/22 09:24, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "allocated" entry into trace output,

not "allocated" but "accounted"

> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
>
> Signed-off-by: Vasily Averin <[email protected]>

2022-05-18 20:05:02

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints

On Wed, 18 May 2022 09:24:51 +0300
Vasily Averin <[email protected]> wrote:

FYI, the subject should be something like: mm/tracing:

Because "tracing:" is reserved for tracing infrastructure updates.

> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "allocated" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
>
> Signed-off-by: Vasily Averin <[email protected]>
> ---
> v3:
> 1) rework kmem_cache_alloc* tracepoints once again,
> added struct kmem_cache argument into existing templates,
> thanks to Matthew Wilcox
> 2) updated the corresponding ding trace_* calls
> 3) added boolean "allocated" entry into trace output,
> thanks to Roman
> 4) updated patch subject and description
>
> v2:
> 1) handle kmem_cache_alloc_node(), thanks to Shakeel
> 2) rework kmem_cache_alloc* tracepoints to use cachep instead
> of current cachep->*size parameters.
> NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
> and therefore it was replaced by kmalloc_node.
> ---
> Now kmem tracing output looks like this:
>
> kmem_cache_alloc: (getname_flags.part.0+0x2c) call_site=getname_flags.part.0+0x2c ptr=0xffff8fff022e9000 bytes_req=4096 bytes_alloc=4096 gfp_flags=GFP_KERNEL accounted=false
> kmalloc: (alloc_bprm+0x32) call_site=alloc_bprm+0x32 ptr=0xffff8fff2b408a00 bytes_req=416 bytes_alloc=512 gfp_flags=GFP_KERNEL|__GFP_ZERO accounted=false
> kmem_cache_alloc: (mm_alloc+0x16) call_site=mm_alloc+0x16 ptr=0xffff8fff0894d500 bytes_req=1048 bytes_alloc=1088 gfp_flags=GFP_KERNEL accounted=true
> mm_page_alloc: page=0xffffffffa4ab8d42 pfn=0x12ad72 order=1 migratetype=0 gfp_flags=GFP_USER|__GFP_ZERO|__GFP_ACCOUNT
> kmem_cache_alloc: (vm_area_alloc+0x1a) call_site=vm_area_alloc+0x1a ptr=0xffff8fff2af27000 bytes_req=200 bytes_alloc=200 gfp_flags=GFP_KERNEL accounted=true
> ---
> include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
> mm/slab.c | 10 +++++-----
> mm/slab_common.c | 9 ++++-----
> mm/slob.c | 8 ++++----
> mm/slub.c | 20 +++++++++----------
> 5 files changed, 47 insertions(+), 38 deletions(-)
>
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 71c141804222..5bfeb6f276f1 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>
> TP_PROTO(unsigned long call_site,
> const void *ptr,
> + struct kmem_cache *s,
> size_t bytes_req,
> size_t bytes_alloc,
> gfp_t gfp_flags),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
>
> TP_STRUCT__entry(
> __field( unsigned long, call_site )
> @@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> __field( size_t, bytes_req )
> __field( size_t, bytes_alloc )
> __field( unsigned long, gfp_flags )
> + __field( bool, accounted )
> ),
>
> TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> __entry->bytes_req = bytes_req;
> __entry->bytes_alloc = bytes_alloc;
> __entry->gfp_flags = (__force unsigned long)gfp_flags;
> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
> + (s && s->flags & SLAB_ACCOUNT);

Now you could make this even faster in the fast path and save just the
s->flags.

__entry->sflags = s ? s->flags : 0;

> ),
>
> - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
> (void *)__entry->call_site,
> __entry->ptr,
> __entry->bytes_req,
> __entry->bytes_alloc,
> - show_gfp_flags(__entry->gfp_flags))
> + show_gfp_flags(__entry->gfp_flags),
> + __entry->accounted ? "true" : "false")

And then have: "accounted=%s":

(__entry->gfp_flags & __GFP_ACCOUNT) ||
(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"


-- Steve

> );
>

2022-05-19 15:44:25

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints

On 5/18/22 23:04, Steven Rostedt wrote:
> On Wed, 18 May 2022 09:24:51 +0300
> Vasily Averin <[email protected]> wrote:
>
> FYI, the subject should be something like: mm/tracing:
> Because "tracing:" is reserved for tracing infrastructure updates.

Thank you for noticing.

>> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>> __entry->bytes_req = bytes_req;
>> __entry->bytes_alloc = bytes_alloc;
>> __entry->gfp_flags = (__force unsigned long)gfp_flags;
>> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
>> + (s && s->flags & SLAB_ACCOUNT);
>
> Now you could make this even faster in the fast path and save just the
> s->flags.
>
> __entry->sflags = s ? s->flags : 0;
>
>> ),
>>
>> - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
>> + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>> (void *)__entry->call_site,
>> __entry->ptr,
>> __entry->bytes_req,
>> __entry->bytes_alloc,
>> - show_gfp_flags(__entry->gfp_flags))
>> + show_gfp_flags(__entry->gfp_flags),
>> + __entry->accounted ? "true" : "false")
>
> And then have: "accounted=%s":
>
> (__entry->gfp_flags & __GFP_ACCOUNT) ||
> (__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"

Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
Could you please explain why your variant is faster?

Thank you,
Vasily Averin

2022-05-19 21:54:02

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints

On Thu, 19 May 2022 14:35:46 +0300
Vasily Averin <[email protected]> wrote:

> >> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> >> __entry->bytes_req = bytes_req;
> >> __entry->bytes_alloc = bytes_alloc;
> >> __entry->gfp_flags = (__force unsigned long)gfp_flags;
> >> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
> >> + (s && s->flags & SLAB_ACCOUNT);
> >
> > Now you could make this even faster in the fast path and save just the
> > s->flags.
> >
> > __entry->sflags = s ? s->flags : 0;
> >
> >> ),
> >>
> >> - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> >> + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
> >> (void *)__entry->call_site,
> >> __entry->ptr,
> >> __entry->bytes_req,
> >> __entry->bytes_alloc,
> >> - show_gfp_flags(__entry->gfp_flags))
> >> + show_gfp_flags(__entry->gfp_flags),
> >> + __entry->accounted ? "true" : "false")
> >
> > And then have: "accounted=%s":
> >
> > (__entry->gfp_flags & __GFP_ACCOUNT) ||
> > (__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"
>
> Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
> Could you please explain why your variant is faster?

Micro-optimization, grant you, but it is faster because it moves some of
the logic into the slow path (the read side), and takes it out of the fast
path (the write side).

The idea of tracing is to squeeze out every cycle we can to keep the
tracing overhead down.

But it's really up to you if you need that. I'm not going to let this be a
blocker. This is more of an FYI than anything else.

-- Steve

2022-05-20 08:14:51

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints

On Thu, 19 May 2022 19:29:36 +0300
Vasily Averin <[email protected]> wrote:

> Frankly speaking I vote for performance with both hands.
> However I'm still would like to avoid new sparse warnings.
> Christoph Hellwig just recently taught me, "never add '__force' before
> thinking hard about them", but in this case I would need to use it three times.
>
> I found that bitwise typecasts can be avoided by using translation unions.
>
> What do you think about following trick?

It's really up to you memory management folks. Although I may need to
update libtraceevent to handle the union case. That may be a bit tricky.

-- Steve

2022-05-23 05:43:01

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints

On 5/19/22 19:32, Steven Rostedt wrote:
> On Thu, 19 May 2022 19:29:36 +0300
> Vasily Averin <[email protected]> wrote:
>
>> Frankly speaking I vote for performance with both hands.
>> However I'm still would like to avoid new sparse warnings.
>> Christoph Hellwig just recently taught me, "never add '__force' before
>> thinking hard about them", but in this case I would need to use it three times.
>>
>> I found that bitwise typecasts can be avoided by using translation unions.
>>
>> What do you think about following trick?
>
> It's really up to you memory management folks. Although I may need to
> update libtraceevent to handle the union case. That may be a bit tricky.

In this case, I would like to keep sub-optimal v3 patch version.
If necessary, we can improve the performance later, and right now
I will send v4 with minor patch description changes and ask
Andrew Morton to include it in the mm tree.

Thank you,
Vasily Averin


2022-05-23 05:48:16

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints

On 5/19/22 17:03, Steven Rostedt wrote:
> On Thu, 19 May 2022 14:35:46 +0300
> Vasily Averin <[email protected]> wrote:
>
>>>> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>>>> __entry->bytes_req = bytes_req;
>>>> __entry->bytes_alloc = bytes_alloc;
>>>> __entry->gfp_flags = (__force unsigned long)gfp_flags;
>>>> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
>>>> + (s && s->flags & SLAB_ACCOUNT);
>>>
>>> Now you could make this even faster in the fast path and save just the
>>> s->flags.
>>>
>>> __entry->sflags = s ? s->flags : 0;
>>>
>>>> ),
>>>>
>>>> - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
>>>> + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>>>> (void *)__entry->call_site,
>>>> __entry->ptr,
>>>> __entry->bytes_req,
>>>> __entry->bytes_alloc,
>>>> - show_gfp_flags(__entry->gfp_flags))
>>>> + show_gfp_flags(__entry->gfp_flags),
>>>> + __entry->accounted ? "true" : "false")
>>>
>>> And then have: "accounted=%s":
>>>
>>> (__entry->gfp_flags & __GFP_ACCOUNT) ||
>>> (__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"
>>
>> Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
>> Could you please explain why your variant is faster?
>
> Micro-optimization, grant you, but it is faster because it moves some of
> the logic into the slow path (the read side), and takes it out of the fast
> path (the write side).
>
> The idea of tracing is to squeeze out every cycle we can to keep the
> tracing overhead down.
>
> But it's really up to you if you need that. I'm not going to let this be a
> blocker. This is more of an FYI than anything else.

Frankly speaking I vote for performance with both hands.
However I'm still would like to avoid new sparse warnings.
Christoph Hellwig just recently taught me, "never add '__force' before
thinking hard about them", but in this case I would need to use it three times.

I found that bitwise typecasts can be avoided by using translation unions.

What do you think about following trick?

diff --git a/mm/slab.h b/mm/slab.h
index 95eb34174c1b..f676612ca40f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -882,4 +882,14 @@ void __check_heap_object(const void *ptr, unsigned long n,
}
#endif

+union gfp_flags_u {
+ unsigned long ulong;
+ gfp_t flags;
+};
+
+union slab_flags_u {
+ unsigned int uint;
+ slab_flags_t sflags;
+};
+
#endif /* MM_SLAB_H */

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 71c141804222..91632a61e16d 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,18 +13,20 @@ DECLARE_EVENT_CLASS(kmem_alloc,

TP_PROTO(unsigned long call_site,
const void *ptr,
+ struct kmem_cache *s,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),

TP_STRUCT__entry(
__field( unsigned long, call_site )
__field( const void *, ptr )
__field( size_t, bytes_req )
__field( size_t, bytes_alloc )
- __field( unsigned long, gfp_flags )
+ __field_struct( union gfp_flags_u, gfp )
+ __field_struct( union slab_flags_u, s )
),

TP_fast_assign(
@@ -32,51 +34,57 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__entry->ptr = ptr;
__entry->bytes_req = bytes_req;
__entry->bytes_alloc = bytes_alloc;
- __entry->gfp_flags = (__force unsigned long)gfp_flags;
+ __entry->gfp.flags = gfp_flags;
+ __entry->s.sflags = s ? s->flags : 0;
),

- TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+ TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
(void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
- show_gfp_flags(__entry->gfp_flags))
+ show_gfp_flags(__entry->gfp.ulong),
+ ((__entry->gfp.flags & __GFP_ACCOUNT) ||
+ (__entry->s.sflags & SLAB_ACCOUNT)) ? "true" : "false")
);

Thank you,
Vasily Averin

2022-05-23 06:28:37

by Vasily Averin

[permalink] [raw]
Subject: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints

Slab caches marked with SLAB_ACCOUNT force accounting for every
allocation from this cache even if __GFP_ACCOUNT flag is not passed.
Unfortunately, at the moment this flag is not visible in ftrace output,
and this makes it difficult to analyze the accounted allocations.

This patch adds boolean "accounted" entry into trace output,
and set it to 'true' for calls used __GFP_ACCOUNT flag and
for allocations from caches marked with SLAB_ACCOUNT.

Signed-off-by: Vasily Averin <[email protected]>
Acked-by: Shakeel Butt <[email protected]>
---
v4:
1) replaced in patch descripion: "accounted" instead of "allocated"
2) added "Acked-by" from Shakeel,
3) re-addressed to akpm@

v3:
1) rework kmem_cache_alloc* tracepoints once again,
added struct kmem_cache argument into existing templates,
thanks to Matthew Wilcox
2) updated according trace_* calls
3) added boolean "allocated" entry into trace output,
thanks to Roman
4) updated patch subject and description

v2:
1) handle kmem_cache_alloc_node(), thanks to Shakeel
2) rework kmem_cache_alloc* tracepoints to use cachep instead
of current cachep->*size parameters.
NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
and therefore it was replaced by kmalloc_node.
---
include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
mm/slab.c | 10 +++++-----
mm/slab_common.c | 9 ++++-----
mm/slob.c | 8 ++++----
mm/slub.c | 20 +++++++++----------
5 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 71c141804222..5bfeb6f276f1 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,

TP_PROTO(unsigned long call_site,
const void *ptr,
+ struct kmem_cache *s,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),

TP_STRUCT__entry(
__field( unsigned long, call_site )
@@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__field( size_t, bytes_req )
__field( size_t, bytes_alloc )
__field( unsigned long, gfp_flags )
+ __field( bool, accounted )
),

TP_fast_assign(
@@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__entry->bytes_req = bytes_req;
__entry->bytes_alloc = bytes_alloc;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
+ __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
+ (s && s->flags & SLAB_ACCOUNT);
),

- TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+ TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
(void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
- show_gfp_flags(__entry->gfp_flags))
+ show_gfp_flags(__entry->gfp_flags),
+ __entry->accounted ? "true" : "false")
);

DEFINE_EVENT(kmem_alloc, kmalloc,

- TP_PROTO(unsigned long call_site, const void *ptr,
+ TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
);

DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,

- TP_PROTO(unsigned long call_site, const void *ptr,
+ TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
);

DECLARE_EVENT_CLASS(kmem_alloc_node,

TP_PROTO(unsigned long call_site,
const void *ptr,
+ struct kmem_cache *s,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags,
int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),

TP_STRUCT__entry(
__field( unsigned long, call_site )
@@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__field( size_t, bytes_alloc )
__field( unsigned long, gfp_flags )
__field( int, node )
+ __field( bool, accounted )
),

TP_fast_assign(
@@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__entry->bytes_alloc = bytes_alloc;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
__entry->node = node;
+ __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
+ (s && s->flags & SLAB_ACCOUNT);
),

- TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+ TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
(void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
show_gfp_flags(__entry->gfp_flags),
- __entry->node)
+ __entry->node,
+ __entry->accounted ? "true" : "false")
);

DEFINE_EVENT(kmem_alloc_node, kmalloc_node,

TP_PROTO(unsigned long call_site, const void *ptr,
- size_t bytes_req, size_t bytes_alloc,
+ struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
gfp_t gfp_flags, int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
);

DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,

TP_PROTO(unsigned long call_site, const void *ptr,
- size_t bytes_req, size_t bytes_alloc,
+ struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
gfp_t gfp_flags, int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
);

TRACE_EVENT(kfree,
diff --git a/mm/slab.c b/mm/slab.c
index 0edb474edef1..e5802445c7d6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3492,7 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
{
void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);

- trace_kmem_cache_alloc(_RET_IP_, ret,
+ trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
cachep->object_size, cachep->size, flags);

return ret;
@@ -3581,7 +3581,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc(_RET_IP_, ret,
+ trace_kmalloc(_RET_IP_, ret, cachep,
size, cachep->size, flags);
return ret;
}
@@ -3606,7 +3606,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);

- trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
cachep->object_size, cachep->size,
flags, nodeid);

@@ -3625,7 +3625,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, cachep,
size, cachep->size,
flags, nodeid);
return ret;
@@ -3708,7 +3708,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
ret = slab_alloc(cachep, NULL, flags, size, caller);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc(caller, ret,
+ trace_kmalloc(caller, ret, cachep,
size, cachep->size, flags);

return ret;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2b3206a2c3b5..a345e8600e00 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -25,13 +25,12 @@
#include <asm/page.h>
#include <linux/memcontrol.h>

-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
#include "internal.h"
-
#include "slab.h"

+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
enum slab_state slab_state;
LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
@@ -967,7 +966,7 @@ EXPORT_SYMBOL(kmalloc_order);
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
void *ret = kmalloc_order(size, flags, order);
- trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+ trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order_trace);
diff --git a/mm/slob.c b/mm/slob.c
index 40ea6e2d4ccd..dbefa0da0dfc 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -505,7 +505,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
*m = size;
ret = (void *)m + minalign;

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, size + minalign, gfp, node);
} else {
unsigned int order = get_order(size);
@@ -514,7 +514,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
gfp |= __GFP_COMP;
ret = slob_new_pages(gfp, order, node);

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, PAGE_SIZE << order, gfp, node);
}

@@ -610,12 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)

if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node, 0);
- trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+ trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
SLOB_UNITS(c->size) * SLOB_UNIT,
flags, node);
} else {
b = slob_new_pages(flags, get_order(c->size), node);
- trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+ trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
PAGE_SIZE << get_order(c->size),
flags, node);
}
diff --git a/mm/slub.c b/mm/slub.c
index ed5c2c03a47a..9b10591646dd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3231,7 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
{
void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);

- trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+ trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
s->size, gfpflags);

return ret;
@@ -3254,7 +3254,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
- trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+ trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
@@ -3266,7 +3266,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);

- trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
s->object_size, s->size, gfpflags, node);

return ret;
@@ -3280,7 +3280,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);

- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, s,
size, s->size, gfpflags, node);

ret = kasan_kmalloc(s, ret, size, gfpflags);
@@ -4409,7 +4409,7 @@ void *__kmalloc(size_t size, gfp_t flags)

ret = slab_alloc(s, NULL, flags, _RET_IP_, size);

- trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+ trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);

ret = kasan_kmalloc(s, ret, size, flags);

@@ -4443,7 +4443,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, flags, node);

- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, NULL,
size, PAGE_SIZE << get_order(size),
flags, node);

@@ -4457,7 +4457,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)

ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);

- trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+ trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);

ret = kasan_kmalloc(s, ret, size, flags);

@@ -4916,7 +4916,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
ret = slab_alloc(s, NULL, gfpflags, caller, size);

/* Honor the call site pointer we received. */
- trace_kmalloc(caller, ret, size, s->size, gfpflags);
+ trace_kmalloc(caller, ret, s, size, s->size, gfpflags);

return ret;
}
@@ -4932,7 +4932,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, gfpflags, node);

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, PAGE_SIZE << get_order(size),
gfpflags, node);

@@ -4947,7 +4947,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);

/* Honor the call site pointer we received. */
- trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+ trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);

return ret;
}
--
2.31.1


2022-05-23 06:50:45

by Hyeonggon Yoo

[permalink] [raw]
Subject: Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints

On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
>
> Signed-off-by: Vasily Averin <[email protected]>
> Acked-by: Shakeel Butt <[email protected]>

May I ask what information do you want to collect
using this patch?

I pointed it in another thread but I'm not sure
printing SLAB_* flags in these tracepoint is good :(

If we decide to do that, it would be better to print
something like:
slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
instead of just printing 'accounted=true/false'. This patch is too
specific to SLAB_ACCOUNT.

And if what you want to know is just total slab memory that is accounted,
what about adding something like SlabAccounted in /proc/meminfo?

Thanks.

> ---
> v4:
> 1) replaced in patch descripion: "accounted" instead of "allocated"
> 2) added "Acked-by" from Shakeel,
> 3) re-addressed to akpm@
>
> v3:
> 1) rework kmem_cache_alloc* tracepoints once again,
> added struct kmem_cache argument into existing templates,
> thanks to Matthew Wilcox
> 2) updated according trace_* calls
> 3) added boolean "allocated" entry into trace output,
> thanks to Roman
> 4) updated patch subject and description
>
> v2:
> 1) handle kmem_cache_alloc_node(), thanks to Shakeel
> 2) rework kmem_cache_alloc* tracepoints to use cachep instead
> of current cachep->*size parameters.
> NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
> and therefore it was replaced by kmalloc_node.
> ---
> include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
> mm/slab.c | 10 +++++-----
> mm/slab_common.c | 9 ++++-----
> mm/slob.c | 8 ++++----
> mm/slub.c | 20 +++++++++----------
> 5 files changed, 47 insertions(+), 38 deletions(-)
>
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 71c141804222..5bfeb6f276f1 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>
> TP_PROTO(unsigned long call_site,
> const void *ptr,
> + struct kmem_cache *s,
> size_t bytes_req,
> size_t bytes_alloc,
> gfp_t gfp_flags),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
>
> TP_STRUCT__entry(
> __field( unsigned long, call_site )
> @@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> __field( size_t, bytes_req )
> __field( size_t, bytes_alloc )
> __field( unsigned long, gfp_flags )
> + __field( bool, accounted )
> ),
>
> TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> __entry->bytes_req = bytes_req;
> __entry->bytes_alloc = bytes_alloc;
> __entry->gfp_flags = (__force unsigned long)gfp_flags;
> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||

I doubt someone will pass __GFP_ACCOUNT in gfpflags
when calling kmem_cache_alloc*().


> + (s && s->flags & SLAB_ACCOUNT);
> ),
>
> - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
> (void *)__entry->call_site,
> __entry->ptr,
> __entry->bytes_req,
> __entry->bytes_alloc,
> - show_gfp_flags(__entry->gfp_flags))
> + show_gfp_flags(__entry->gfp_flags),
> + __entry->accounted ? "true" : "false")
> );
>
> DEFINE_EVENT(kmem_alloc, kmalloc,
>
> - TP_PROTO(unsigned long call_site, const void *ptr,
> + TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
> size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
> );
>
> DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
>
> - TP_PROTO(unsigned long call_site, const void *ptr,
> + TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
> size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
> );
>
> DECLARE_EVENT_CLASS(kmem_alloc_node,
>
> TP_PROTO(unsigned long call_site,
> const void *ptr,
> + struct kmem_cache *s,
> size_t bytes_req,
> size_t bytes_alloc,
> gfp_t gfp_flags,
> int node),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
>
> TP_STRUCT__entry(
> __field( unsigned long, call_site )
> @@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
> __field( size_t, bytes_alloc )
> __field( unsigned long, gfp_flags )
> __field( int, node )
> + __field( bool, accounted )
> ),
>
> TP_fast_assign(
> @@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
> __entry->bytes_alloc = bytes_alloc;
> __entry->gfp_flags = (__force unsigned long)gfp_flags;
> __entry->node = node;
> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
> + (s && s->flags & SLAB_ACCOUNT);
> ),
>
> - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
> + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
> (void *)__entry->call_site,
> __entry->ptr,
> __entry->bytes_req,
> __entry->bytes_alloc,
> show_gfp_flags(__entry->gfp_flags),
> - __entry->node)
> + __entry->node,
> + __entry->accounted ? "true" : "false")
> );
>
> DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
>
> TP_PROTO(unsigned long call_site, const void *ptr,
> - size_t bytes_req, size_t bytes_alloc,
> + struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
> gfp_t gfp_flags, int node),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
> );
>
> DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
>
> TP_PROTO(unsigned long call_site, const void *ptr,
> - size_t bytes_req, size_t bytes_alloc,
> + struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
> gfp_t gfp_flags, int node),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
> );
>
> TRACE_EVENT(kfree,
> diff --git a/mm/slab.c b/mm/slab.c
> index 0edb474edef1..e5802445c7d6 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3492,7 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
> {
> void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
>
> - trace_kmem_cache_alloc(_RET_IP_, ret,
> + trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
> cachep->object_size, cachep->size, flags);
>
> return ret;
> @@ -3581,7 +3581,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
> ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
>
> ret = kasan_kmalloc(cachep, ret, size, flags);
> - trace_kmalloc(_RET_IP_, ret,
> + trace_kmalloc(_RET_IP_, ret, cachep,
> size, cachep->size, flags);
> return ret;
> }
> @@ -3606,7 +3606,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
> {
> void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
>
> - trace_kmem_cache_alloc_node(_RET_IP_, ret,
> + trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
> cachep->object_size, cachep->size,
> flags, nodeid);
>
> @@ -3625,7 +3625,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
> ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
>
> ret = kasan_kmalloc(cachep, ret, size, flags);
> - trace_kmalloc_node(_RET_IP_, ret,
> + trace_kmalloc_node(_RET_IP_, ret, cachep,
> size, cachep->size,
> flags, nodeid);
> return ret;
> @@ -3708,7 +3708,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
> ret = slab_alloc(cachep, NULL, flags, size, caller);
>
> ret = kasan_kmalloc(cachep, ret, size, flags);
> - trace_kmalloc(caller, ret,
> + trace_kmalloc(caller, ret, cachep,
> size, cachep->size, flags);
>
> return ret;
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 2b3206a2c3b5..a345e8600e00 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -25,13 +25,12 @@
> #include <asm/page.h>
> #include <linux/memcontrol.h>
>
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/kmem.h>
> -
> #include "internal.h"
> -
> #include "slab.h"
>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/kmem.h>
> +
> enum slab_state slab_state;
> LIST_HEAD(slab_caches);
> DEFINE_MUTEX(slab_mutex);
> @@ -967,7 +966,7 @@ EXPORT_SYMBOL(kmalloc_order);
> void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
> {
> void *ret = kmalloc_order(size, flags, order);
> - trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
> + trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
> return ret;
> }
> EXPORT_SYMBOL(kmalloc_order_trace);
> diff --git a/mm/slob.c b/mm/slob.c
> index 40ea6e2d4ccd..dbefa0da0dfc 100644
> --- a/mm/slob.c
> +++ b/mm/slob.c
> @@ -505,7 +505,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
> *m = size;
> ret = (void *)m + minalign;
>
> - trace_kmalloc_node(caller, ret,
> + trace_kmalloc_node(caller, ret, NULL,
> size, size + minalign, gfp, node);
> } else {
> unsigned int order = get_order(size);
> @@ -514,7 +514,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
> gfp |= __GFP_COMP;
> ret = slob_new_pages(gfp, order, node);
>
> - trace_kmalloc_node(caller, ret,
> + trace_kmalloc_node(caller, ret, NULL,
> size, PAGE_SIZE << order, gfp, node);
> }
>
> @@ -610,12 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
>
> if (c->size < PAGE_SIZE) {
> b = slob_alloc(c->size, flags, c->align, node, 0);
> - trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> + trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
> SLOB_UNITS(c->size) * SLOB_UNIT,
> flags, node);
> } else {
> b = slob_new_pages(flags, get_order(c->size), node);
> - trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> + trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
> PAGE_SIZE << get_order(c->size),
> flags, node);
> }
> diff --git a/mm/slub.c b/mm/slub.c
> index ed5c2c03a47a..9b10591646dd 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3231,7 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
> {
> void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
>
> - trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
> + trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
> s->size, gfpflags);
>
> return ret;
> @@ -3254,7 +3254,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
> void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
> {
> void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
> - trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
> + trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
> ret = kasan_kmalloc(s, ret, size, gfpflags);
> return ret;
> }
> @@ -3266,7 +3266,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
> {
> void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
>
> - trace_kmem_cache_alloc_node(_RET_IP_, ret,
> + trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
> s->object_size, s->size, gfpflags, node);
>
> return ret;
> @@ -3280,7 +3280,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
> {
> void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
>
> - trace_kmalloc_node(_RET_IP_, ret,
> + trace_kmalloc_node(_RET_IP_, ret, s,
> size, s->size, gfpflags, node);
>
> ret = kasan_kmalloc(s, ret, size, gfpflags);
> @@ -4409,7 +4409,7 @@ void *__kmalloc(size_t size, gfp_t flags)
>
> ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
>
> - trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
> + trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
>
> ret = kasan_kmalloc(s, ret, size, flags);
>
> @@ -4443,7 +4443,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
> if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
> ret = kmalloc_large_node(size, flags, node);
>
> - trace_kmalloc_node(_RET_IP_, ret,
> + trace_kmalloc_node(_RET_IP_, ret, NULL,
> size, PAGE_SIZE << get_order(size),
> flags, node);
>
> @@ -4457,7 +4457,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
>
> ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
>
> - trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
> + trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
>
> ret = kasan_kmalloc(s, ret, size, flags);
>
> @@ -4916,7 +4916,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
> ret = slab_alloc(s, NULL, gfpflags, caller, size);
>
> /* Honor the call site pointer we received. */
> - trace_kmalloc(caller, ret, size, s->size, gfpflags);
> + trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
>
> return ret;
> }
> @@ -4932,7 +4932,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
> if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
> ret = kmalloc_large_node(size, gfpflags, node);
>
> - trace_kmalloc_node(caller, ret,
> + trace_kmalloc_node(caller, ret, NULL,
> size, PAGE_SIZE << get_order(size),
> gfpflags, node);
>
> @@ -4947,7 +4947,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
> ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
>
> /* Honor the call site pointer we received. */
> - trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
> + trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
>
> return ret;
> }
> --
> 2.31.1
>

2022-05-23 07:40:51

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints

On 5/22/22 08:19, Hyeonggon Yoo wrote:
> On Sun, May 22, 2022 at 07:33:08AM +0300, Vasily Averin wrote:
>> On 5/22/22 06:51, Hyeonggon Yoo wrote:
>>> If we decide to do that, it would be better to print
>>> something like:
>>> slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
>>> instead of just printing 'accounted=true/false'. This patch is too
>>> specific to SLAB_ACCOUNT.
>>
>> Any extra output degrades performance.
>
> No strong opinion but just a concern that maybe later someone want add
> something similar like 'reclaimable=true/false', 'dma=true/false', ...
> and I would prefer more general solution. (especially if we'll not
> change tracepoints after release because of backward compatibility)

I would like to quote Steven Rostedt:
https://lore.kernel.org/all/[email protected]/
"
> Trace events are not ABI, but if we don't have a strong reason to break it,
> I'd preserve the old order.

Ideally everyone should be using libtraceevent which will parse the format
file for the needed entries.

Nothing (important) should be parsing the raw ascii from the trace files.
It's slow and unreliable. The raw format (trace_pipe_raw) files, along with
libtraceevent will handle fining the fields you are looking for, even if
the fields move around (internally or externally).

Then there's trace-cruncher (a python script that uses libtracefs and
libtraceevent) that will work too.

https://github.com/vmware/trace-cruncher
"

Thank you,
Vasily Averin

2022-05-23 13:14:32

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints

On 5/21/22 20:36, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
>
> Signed-off-by: Vasily Averin <[email protected]>
> Acked-by: Shakeel Butt <[email protected]>
> ---
> v4:
> 1) replaced in patch descripion: "accounted" instead of "allocated"
> 2) added "Acked-by" from Shakeel,
> 3) re-addressed to akpm@
>
> v3:
> 1) rework kmem_cache_alloc* tracepoints once again,
> added struct kmem_cache argument into existing templates,
> thanks to Matthew Wilcox
> 2) updated according trace_* calls
> 3) added boolean "allocated" entry into trace output,
> thanks to Roman
> 4) updated patch subject and description
>
> v2:
> 1) handle kmem_cache_alloc_node(), thanks to Shakeel
> 2) rework kmem_cache_alloc* tracepoints to use cachep instead
> of current cachep->*size parameters.
> NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
> and therefore it was replaced by kmalloc_node.
> ---
> include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
> mm/slab.c | 10 +++++-----
> mm/slab_common.c | 9 ++++-----
> mm/slob.c | 8 ++++----
> mm/slub.c | 20 +++++++++----------
> 5 files changed, 47 insertions(+), 38 deletions(-)

I'd prefer the slab tree, given the files it touches and expected conflict
with Hyeonggon Yoo's v3 of kmalloc unification series. BTW the suggested
split of kmalloc/kmem_cache_alloc tracepoints [1] will be useful if
implemented, as we won't have to pass NULL kmem_cache pointers from some of
the kmalloc callers.
I would add it there after 5.19-rc1 to avoid conflict with kmem.h changes in
mm-stable, that should be part of mainline before rc1.

Thanks!

[1] https://lore.kernel.org/all/[email protected]/

> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 71c141804222..5bfeb6f276f1 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>
> TP_PROTO(unsigned long call_site,
> const void *ptr,
> + struct kmem_cache *s,
> size_t bytes_req,
> size_t bytes_alloc,
> gfp_t gfp_flags),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
>
> TP_STRUCT__entry(
> __field( unsigned long, call_site )
> @@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> __field( size_t, bytes_req )
> __field( size_t, bytes_alloc )
> __field( unsigned long, gfp_flags )
> + __field( bool, accounted )
> ),
>
> TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> __entry->bytes_req = bytes_req;
> __entry->bytes_alloc = bytes_alloc;
> __entry->gfp_flags = (__force unsigned long)gfp_flags;
> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
> + (s && s->flags & SLAB_ACCOUNT);
> ),
>
> - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
> (void *)__entry->call_site,
> __entry->ptr,
> __entry->bytes_req,
> __entry->bytes_alloc,
> - show_gfp_flags(__entry->gfp_flags))
> + show_gfp_flags(__entry->gfp_flags),
> + __entry->accounted ? "true" : "false")
> );
>
> DEFINE_EVENT(kmem_alloc, kmalloc,
>
> - TP_PROTO(unsigned long call_site, const void *ptr,
> + TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
> size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
> );
>
> DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
>
> - TP_PROTO(unsigned long call_site, const void *ptr,
> + TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
> size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
> );
>
> DECLARE_EVENT_CLASS(kmem_alloc_node,
>
> TP_PROTO(unsigned long call_site,
> const void *ptr,
> + struct kmem_cache *s,
> size_t bytes_req,
> size_t bytes_alloc,
> gfp_t gfp_flags,
> int node),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
>
> TP_STRUCT__entry(
> __field( unsigned long, call_site )
> @@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
> __field( size_t, bytes_alloc )
> __field( unsigned long, gfp_flags )
> __field( int, node )
> + __field( bool, accounted )
> ),
>
> TP_fast_assign(
> @@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
> __entry->bytes_alloc = bytes_alloc;
> __entry->gfp_flags = (__force unsigned long)gfp_flags;
> __entry->node = node;
> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
> + (s && s->flags & SLAB_ACCOUNT);
> ),
>
> - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
> + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
> (void *)__entry->call_site,
> __entry->ptr,
> __entry->bytes_req,
> __entry->bytes_alloc,
> show_gfp_flags(__entry->gfp_flags),
> - __entry->node)
> + __entry->node,
> + __entry->accounted ? "true" : "false")
> );
>
> DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
>
> TP_PROTO(unsigned long call_site, const void *ptr,
> - size_t bytes_req, size_t bytes_alloc,
> + struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
> gfp_t gfp_flags, int node),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
> );
>
> DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
>
> TP_PROTO(unsigned long call_site, const void *ptr,
> - size_t bytes_req, size_t bytes_alloc,
> + struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
> gfp_t gfp_flags, int node),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
> );
>
> TRACE_EVENT(kfree,
> diff --git a/mm/slab.c b/mm/slab.c
> index 0edb474edef1..e5802445c7d6 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3492,7 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
> {
> void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
>
> - trace_kmem_cache_alloc(_RET_IP_, ret,
> + trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
> cachep->object_size, cachep->size, flags);
>
> return ret;
> @@ -3581,7 +3581,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
> ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
>
> ret = kasan_kmalloc(cachep, ret, size, flags);
> - trace_kmalloc(_RET_IP_, ret,
> + trace_kmalloc(_RET_IP_, ret, cachep,
> size, cachep->size, flags);
> return ret;
> }
> @@ -3606,7 +3606,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
> {
> void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
>
> - trace_kmem_cache_alloc_node(_RET_IP_, ret,
> + trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
> cachep->object_size, cachep->size,
> flags, nodeid);
>
> @@ -3625,7 +3625,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
> ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
>
> ret = kasan_kmalloc(cachep, ret, size, flags);
> - trace_kmalloc_node(_RET_IP_, ret,
> + trace_kmalloc_node(_RET_IP_, ret, cachep,
> size, cachep->size,
> flags, nodeid);
> return ret;
> @@ -3708,7 +3708,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
> ret = slab_alloc(cachep, NULL, flags, size, caller);
>
> ret = kasan_kmalloc(cachep, ret, size, flags);
> - trace_kmalloc(caller, ret,
> + trace_kmalloc(caller, ret, cachep,
> size, cachep->size, flags);
>
> return ret;
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 2b3206a2c3b5..a345e8600e00 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -25,13 +25,12 @@
> #include <asm/page.h>
> #include <linux/memcontrol.h>
>
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/kmem.h>
> -
> #include "internal.h"
> -
> #include "slab.h"
>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/kmem.h>
> +
> enum slab_state slab_state;
> LIST_HEAD(slab_caches);
> DEFINE_MUTEX(slab_mutex);
> @@ -967,7 +966,7 @@ EXPORT_SYMBOL(kmalloc_order);
> void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
> {
> void *ret = kmalloc_order(size, flags, order);
> - trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
> + trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
> return ret;
> }
> EXPORT_SYMBOL(kmalloc_order_trace);
> diff --git a/mm/slob.c b/mm/slob.c
> index 40ea6e2d4ccd..dbefa0da0dfc 100644
> --- a/mm/slob.c
> +++ b/mm/slob.c
> @@ -505,7 +505,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
> *m = size;
> ret = (void *)m + minalign;
>
> - trace_kmalloc_node(caller, ret,
> + trace_kmalloc_node(caller, ret, NULL,
> size, size + minalign, gfp, node);
> } else {
> unsigned int order = get_order(size);
> @@ -514,7 +514,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
> gfp |= __GFP_COMP;
> ret = slob_new_pages(gfp, order, node);
>
> - trace_kmalloc_node(caller, ret,
> + trace_kmalloc_node(caller, ret, NULL,
> size, PAGE_SIZE << order, gfp, node);
> }
>
> @@ -610,12 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
>
> if (c->size < PAGE_SIZE) {
> b = slob_alloc(c->size, flags, c->align, node, 0);
> - trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> + trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
> SLOB_UNITS(c->size) * SLOB_UNIT,
> flags, node);
> } else {
> b = slob_new_pages(flags, get_order(c->size), node);
> - trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> + trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
> PAGE_SIZE << get_order(c->size),
> flags, node);
> }
> diff --git a/mm/slub.c b/mm/slub.c
> index ed5c2c03a47a..9b10591646dd 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3231,7 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
> {
> void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
>
> - trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
> + trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
> s->size, gfpflags);
>
> return ret;
> @@ -3254,7 +3254,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
> void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
> {
> void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
> - trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
> + trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
> ret = kasan_kmalloc(s, ret, size, gfpflags);
> return ret;
> }
> @@ -3266,7 +3266,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
> {
> void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
>
> - trace_kmem_cache_alloc_node(_RET_IP_, ret,
> + trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
> s->object_size, s->size, gfpflags, node);
>
> return ret;
> @@ -3280,7 +3280,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
> {
> void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
>
> - trace_kmalloc_node(_RET_IP_, ret,
> + trace_kmalloc_node(_RET_IP_, ret, s,
> size, s->size, gfpflags, node);
>
> ret = kasan_kmalloc(s, ret, size, gfpflags);
> @@ -4409,7 +4409,7 @@ void *__kmalloc(size_t size, gfp_t flags)
>
> ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
>
> - trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
> + trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
>
> ret = kasan_kmalloc(s, ret, size, flags);
>
> @@ -4443,7 +4443,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
> if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
> ret = kmalloc_large_node(size, flags, node);
>
> - trace_kmalloc_node(_RET_IP_, ret,
> + trace_kmalloc_node(_RET_IP_, ret, NULL,
> size, PAGE_SIZE << get_order(size),
> flags, node);
>
> @@ -4457,7 +4457,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
>
> ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
>
> - trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
> + trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
>
> ret = kasan_kmalloc(s, ret, size, flags);
>
> @@ -4916,7 +4916,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
> ret = slab_alloc(s, NULL, gfpflags, caller, size);
>
> /* Honor the call site pointer we received. */
> - trace_kmalloc(caller, ret, size, s->size, gfpflags);
> + trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
>
> return ret;
> }
> @@ -4932,7 +4932,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
> if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
> ret = kmalloc_large_node(size, gfpflags, node);
>
> - trace_kmalloc_node(caller, ret,
> + trace_kmalloc_node(caller, ret, NULL,
> size, PAGE_SIZE << get_order(size),
> gfpflags, node);
>
> @@ -4947,7 +4947,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
> ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
>
> /* Honor the call site pointer we received. */
> - trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
> + trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
>
> return ret;
> }


2022-05-25 11:57:05

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints

On 5/25/22 10:33, Hyeonggon Yoo wrote:
> On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
>> Slab caches marked with SLAB_ACCOUNT force accounting for every
>> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
>> Unfortunately, at the moment this flag is not visible in ftrace output,
>> and this makes it difficult to analyze the accounted allocations.
>>
>> This patch adds boolean "accounted" entry into trace output,
>> and set it to 'true' for calls used __GFP_ACCOUNT flag and
>> for allocations from caches marked with SLAB_ACCOUNT.
>>
>> Signed-off-by: Vasily Averin <[email protected]>
>> Acked-by: Shakeel Butt <[email protected]>
>> ---
>
>
> Maybe I worried a bit much...
>
> I failed to apply it on slab/for-next and v5.18. What tree is it based on?

Its context depends on changes in my other patches already included into mm-unstable.

>> v4:
>> 1) replaced in patch descripion: "accounted" instead of "allocated"
>> 2) added "Acked-by" from Shakeel,
>> 3) re-addressed to akpm@
>>
>> v3:
>> 1) rework kmem_cache_alloc* tracepoints once again,
>> added struct kmem_cache argument into existing templates,
>> thanks to Matthew Wilcox
>> 2) updated according trace_* calls
>> 3) added boolean "allocated" entry into trace output,
>> thanks to Roman
>> 4) updated patch subject and description
>>
>> v2:
>> 1) handle kmem_cache_alloc_node(), thanks to Shakeel
>> 2) rework kmem_cache_alloc* tracepoints to use cachep instead
>> of current cachep->*size parameters.
>> NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>> and therefore it was replaced by kmalloc_node.
>> ---
>> include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>> mm/slab.c | 10 +++++-----
>> mm/slab_common.c | 9 ++++-----
>> mm/slob.c | 8 ++++----
>> mm/slub.c | 20 +++++++++----------
>> 5 files changed, 47 insertions(+), 38 deletions(-)


2022-05-25 17:37:41

by Hyeonggon Yoo

[permalink] [raw]
Subject: Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints

On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
>
> Signed-off-by: Vasily Averin <[email protected]>
> Acked-by: Shakeel Butt <[email protected]>
> ---


Maybe I worried a bit much...

I failed to apply it on slab/for-next and v5.18. What tree is it based on?

> v4:
> 1) replaced in patch descripion: "accounted" instead of "allocated"
> 2) added "Acked-by" from Shakeel,
> 3) re-addressed to akpm@
>
> v3:
> 1) rework kmem_cache_alloc* tracepoints once again,
> added struct kmem_cache argument into existing templates,
> thanks to Matthew Wilcox
> 2) updated according trace_* calls
> 3) added boolean "allocated" entry into trace output,
> thanks to Roman
> 4) updated patch subject and description
>
> v2:
> 1) handle kmem_cache_alloc_node(), thanks to Shakeel
> 2) rework kmem_cache_alloc* tracepoints to use cachep instead
> of current cachep->*size parameters.
> NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
> and therefore it was replaced by kmalloc_node.
> ---
> include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
> mm/slab.c | 10 +++++-----
> mm/slab_common.c | 9 ++++-----
> mm/slob.c | 8 ++++----
> mm/slub.c | 20 +++++++++----------
> 5 files changed, 47 insertions(+), 38 deletions(-)

2022-05-28 19:18:50

by Roman Gushchin

[permalink] [raw]
Subject: Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints

On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
>
> Signed-off-by: Vasily Averin <[email protected]>
> Acked-by: Shakeel Butt <[email protected]>
> ---
> v4:
> 1) replaced in patch descripion: "accounted" instead of "allocated"
> 2) added "Acked-by" from Shakeel,
> 3) re-addressed to akpm@
>
> v3:
> 1) rework kmem_cache_alloc* tracepoints once again,
> added struct kmem_cache argument into existing templates,
> thanks to Matthew Wilcox
> 2) updated according trace_* calls
> 3) added boolean "allocated" entry into trace output,
> thanks to Roman
> 4) updated patch subject and description
>
> v2:
> 1) handle kmem_cache_alloc_node(), thanks to Shakeel
> 2) rework kmem_cache_alloc* tracepoints to use cachep instead
> of current cachep->*size parameters.
> NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
> and therefore it was replaced by kmalloc_node.
> ---
> include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
> mm/slab.c | 10 +++++-----
> mm/slab_common.c | 9 ++++-----
> mm/slob.c | 8 ++++----
> mm/slub.c | 20 +++++++++----------
> 5 files changed, 47 insertions(+), 38 deletions(-)

LGTM

Acked-by: Roman Gushchin <[email protected]>

Thanks!

2022-05-30 11:15:45

by Vasily Averin

[permalink] [raw]
Subject: [PATCH v5] tracing: add 'accounted' entry into output of allocation tracepoints

Slab caches marked with SLAB_ACCOUNT force accounting for every
allocation from this cache even if __GFP_ACCOUNT flag is not passed.
Unfortunately, at the moment this flag is not visible in ftrace output,
and this makes it difficult to analyze the accounted allocations.

This patch adds boolean "accounted" entry into trace output,
and set it to 'true' for calls used __GFP_ACCOUNT flag and
for allocations from caches marked with SLAB_ACCOUNT.

Signed-off-by: Vasily Averin <[email protected]>
Acked-by: Shakeel Butt <[email protected]>
Acked-by: Roman Gushchin <[email protected]>

---
v5:
1) re-based to current upstream (v5.18-11267-gb00ed48bb0a7)
2) added "Acked-by" from Roman
3) re-addressed to slab maintaiers

v4:
1) replaced in patch descripion: "accounted" instead of "allocated"
2) added "Acked-by" from Shakeel,
3) re-addressed to akpm@

v3:
1) rework kmem_cache_alloc* tracepoints once again,
added struct kmem_cache argument into existing templates,
thanks to Matthew Wilcox
2) updated according trace_* calls
3) added boolean "allocated" entry into trace output,
thanks to Roman
4) updated patch subject and description

v2:
1) handle kmem_cache_alloc_node(), thanks to Shakeel
2) rework kmem_cache_alloc* tracepoints to use cachep instead
of current cachep->*size parameters.
NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
and therefore it was replaced by kmalloc_node.
---
include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
mm/slab.c | 10 +++++-----
mm/slab_common.c | 9 ++++-----
mm/slob.c | 8 ++++----
mm/slub.c | 20 +++++++++----------
5 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index f76668305ac5..8d8736a5b654 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,

TP_PROTO(unsigned long call_site,
const void *ptr,
+ struct kmem_cache *s,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),

TP_STRUCT__entry(
__field( unsigned long, call_site )
@@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__field( size_t, bytes_req )
__field( size_t, bytes_alloc )
__field( unsigned long, gfp_flags )
+ __field( bool, accounted )
),

TP_fast_assign(
@@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__entry->bytes_req = bytes_req;
__entry->bytes_alloc = bytes_alloc;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
+ __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
+ (s && s->flags & SLAB_ACCOUNT);
),

- TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+ TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
(void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
- show_gfp_flags(__entry->gfp_flags))
+ show_gfp_flags(__entry->gfp_flags),
+ __entry->accounted ? "true" : "false")
);

DEFINE_EVENT(kmem_alloc, kmalloc,

- TP_PROTO(unsigned long call_site, const void *ptr,
+ TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
);

DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,

- TP_PROTO(unsigned long call_site, const void *ptr,
+ TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
);

DECLARE_EVENT_CLASS(kmem_alloc_node,

TP_PROTO(unsigned long call_site,
const void *ptr,
+ struct kmem_cache *s,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags,
int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),

TP_STRUCT__entry(
__field( unsigned long, call_site )
@@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__field( size_t, bytes_alloc )
__field( unsigned long, gfp_flags )
__field( int, node )
+ __field( bool, accounted )
),

TP_fast_assign(
@@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__entry->bytes_alloc = bytes_alloc;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
__entry->node = node;
+ __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
+ (s && s->flags & SLAB_ACCOUNT);
),

- TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+ TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
(void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
show_gfp_flags(__entry->gfp_flags),
- __entry->node)
+ __entry->node,
+ __entry->accounted ? "true" : "false")
);

DEFINE_EVENT(kmem_alloc_node, kmalloc_node,

TP_PROTO(unsigned long call_site, const void *ptr,
- size_t bytes_req, size_t bytes_alloc,
+ struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
gfp_t gfp_flags, int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
);

DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,

TP_PROTO(unsigned long call_site, const void *ptr,
- size_t bytes_req, size_t bytes_alloc,
+ struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
gfp_t gfp_flags, int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
);

TRACE_EVENT(kfree,
diff --git a/mm/slab.c b/mm/slab.c
index f8cd00f4ba13..1f7864ff41e3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3478,7 +3478,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
{
void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);

- trace_kmem_cache_alloc(_RET_IP_, ret,
+ trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
cachep->object_size, cachep->size, flags);

return ret;
@@ -3567,7 +3567,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc(_RET_IP_, ret,
+ trace_kmalloc(_RET_IP_, ret, cachep,
size, cachep->size, flags);
return ret;
}
@@ -3592,7 +3592,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);

- trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
cachep->object_size, cachep->size,
flags, nodeid);

@@ -3611,7 +3611,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, cachep,
size, cachep->size,
flags, nodeid);
return ret;
@@ -3694,7 +3694,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
ret = slab_alloc(cachep, NULL, flags, size, caller);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc(caller, ret,
+ trace_kmalloc(caller, ret, cachep,
size, cachep->size, flags);

return ret;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 77c3adf40e50..6c9aac5d8f4a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -26,13 +26,12 @@
#include <linux/memcontrol.h>
#include <linux/stackdepot.h>

-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
#include "internal.h"
-
#include "slab.h"

+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
enum slab_state slab_state;
LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
@@ -959,7 +958,7 @@ EXPORT_SYMBOL(kmalloc_order);
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
void *ret = kmalloc_order(size, flags, order);
- trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+ trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order_trace);
diff --git a/mm/slob.c b/mm/slob.c
index f47811f09aca..56421fe461c4 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -507,7 +507,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
*m = size;
ret = (void *)m + minalign;

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, size + minalign, gfp, node);
} else {
unsigned int order = get_order(size);
@@ -516,7 +516,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
gfp |= __GFP_COMP;
ret = slob_new_pages(gfp, order, node);

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, PAGE_SIZE << order, gfp, node);
}

@@ -616,12 +616,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)

if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node, 0);
- trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+ trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
SLOB_UNITS(c->size) * SLOB_UNIT,
flags, node);
} else {
b = slob_new_pages(flags, get_order(c->size), node);
- trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+ trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
PAGE_SIZE << get_order(c->size),
flags, node);
}
diff --git a/mm/slub.c b/mm/slub.c
index e5535020e0fd..c323dd916b08 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3228,7 +3228,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
{
void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);

- trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+ trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
s->size, gfpflags);

return ret;
@@ -3251,7 +3251,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
- trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+ trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
@@ -3263,7 +3263,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);

- trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
s->object_size, s->size, gfpflags, node);

return ret;
@@ -3277,7 +3277,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);

- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, s,
size, s->size, gfpflags, node);

ret = kasan_kmalloc(s, ret, size, gfpflags);
@@ -4412,7 +4412,7 @@ void *__kmalloc(size_t size, gfp_t flags)

ret = slab_alloc(s, NULL, flags, _RET_IP_, size);

- trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+ trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);

ret = kasan_kmalloc(s, ret, size, flags);

@@ -4446,7 +4446,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, flags, node);

- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, NULL,
size, PAGE_SIZE << get_order(size),
flags, node);

@@ -4460,7 +4460,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)

ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);

- trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+ trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);

ret = kasan_kmalloc(s, ret, size, flags);

@@ -4919,7 +4919,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
ret = slab_alloc(s, NULL, gfpflags, caller, size);

/* Honor the call site pointer we received. */
- trace_kmalloc(caller, ret, size, s->size, gfpflags);
+ trace_kmalloc(caller, ret, s, size, s->size, gfpflags);

return ret;
}
@@ -4935,7 +4935,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, gfpflags, node);

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, PAGE_SIZE << get_order(size),
gfpflags, node);

@@ -4950,7 +4950,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);

/* Honor the call site pointer we received. */
- trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+ trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);

return ret;
}
--
2.31.1


2022-05-31 13:51:46

by Muchun Song

[permalink] [raw]
Subject: Re: [PATCH v5] tracing: add 'accounted' entry into output of allocation tracepoints

On Mon, May 30, 2022 at 10:47:26AM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
>
> Signed-off-by: Vasily Averin <[email protected]>

Acked-by: Muchun Song <[email protected]>

Thanks.

2022-05-31 13:52:17

by Hyeonggon Yoo

[permalink] [raw]
Subject: Re: [PATCH v5] tracing: add 'accounted' entry into output of allocation tracepoints

On Mon, May 30, 2022 at 10:47:26AM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
>
> Signed-off-by: Vasily Averin <[email protected]>
> Acked-by: Shakeel Butt <[email protected]>
> Acked-by: Roman Gushchin <[email protected]>
>
> ---
> v5:
> 1) re-based to current upstream (v5.18-11267-gb00ed48bb0a7)
> 2) added "Acked-by" from Roman
> 3) re-addressed to slab maintaiers
>
> v4:
> 1) replaced in patch descripion: "accounted" instead of "allocated"
> 2) added "Acked-by" from Shakeel,
> 3) re-addressed to akpm@
>
> v3:
> 1) rework kmem_cache_alloc* tracepoints once again,
> added struct kmem_cache argument into existing templates,
> thanks to Matthew Wilcox
> 2) updated according trace_* calls
> 3) added boolean "allocated" entry into trace output,
> thanks to Roman
> 4) updated patch subject and description
>
> v2:
> 1) handle kmem_cache_alloc_node(), thanks to Shakeel
> 2) rework kmem_cache_alloc* tracepoints to use cachep instead
> of current cachep->*size parameters.
> NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
> and therefore it was replaced by kmalloc_node.
> ---
> include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
> mm/slab.c | 10 +++++-----
> mm/slab_common.c | 9 ++++-----
> mm/slob.c | 8 ++++----
> mm/slub.c | 20 +++++++++----------
> 5 files changed, 47 insertions(+), 38 deletions(-)
>

Looks good to me.
Reviewed-by: Hyeonggon Yoo <[email protected]>

a small comment:
>
> TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> __entry->bytes_req = bytes_req;
> __entry->bytes_alloc = bytes_alloc;
> __entry->gfp_flags = (__force unsigned long)gfp_flags;
> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
> + (s && s->flags & SLAB_ACCOUNT);
> ),
>

It doesn't make sense for SLOB to print accounted=true because SLOB does
not support object accounting.

> DEFINE_EVENT(kmem_alloc, kmalloc,
>
> - TP_PROTO(unsigned long call_site, const void *ptr,
> + TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
> size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
> );
>
> DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
>
> - TP_PROTO(unsigned long call_site, const void *ptr,
> + TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
> size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
> );
>
> DECLARE_EVENT_CLASS(kmem_alloc_node,
>
> TP_PROTO(unsigned long call_site,
> const void *ptr,
> + struct kmem_cache *s,
> size_t bytes_req,
> size_t bytes_alloc,
> gfp_t gfp_flags,
> int node),
>
> - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
> + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
>
> TP_STRUCT__entry(
> __field( unsigned long, call_site )
> @@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
> __field( size_t, bytes_alloc )
> __field( unsigned long, gfp_flags )
> __field( int, node )
> + __field( bool, accounted )
> ),
>
> TP_fast_assign(
> @@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
> __entry->bytes_alloc = bytes_alloc;
> __entry->gfp_flags = (__force unsigned long)gfp_flags;
> __entry->node = node;
> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
> + (s && s->flags & SLAB_ACCOUNT);
> ),

2022-06-01 20:00:17

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH v5] tracing: add 'accounted' entry into output of allocation tracepoints

On 5/31/22 14:46, Hyeonggon Yoo wrote:
> On Mon, May 30, 2022 at 10:47:26AM +0300, Vasily Averin wrote:
> Looks good to me.
> Reviewed-by: Hyeonggon Yoo <[email protected]>
>
> a small comment:
>>
>> TP_fast_assign(
>> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>> __entry->bytes_req = bytes_req;
>> __entry->bytes_alloc = bytes_alloc;
>> __entry->gfp_flags = (__force unsigned long)gfp_flags;
>> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
>> + (s && s->flags & SLAB_ACCOUNT);
>> ),
>>
>
> It doesn't make sense for SLOB to print accounted=true because SLOB does
> not support object accounting.

Thank you very much for this comment.
SLAB_ACCOUNT is not defined for SLOB, but __GFP_ACCOUNT really can incorrectly
set this field to true.
I'll think how to handle this correctly.

Thank you,
Vasily Averin

2022-06-06 03:45:58

by Vasily Averin

[permalink] [raw]
Subject: [PATCH mm v6] mm/tracing: add 'accounted' entry into output of allocation tracepoints

Slab caches marked with SLAB_ACCOUNT force accounting for every
allocation from this cache even if __GFP_ACCOUNT flag is not passed.
Unfortunately, at the moment this flag is not visible in ftrace output,
and this makes it difficult to analyze the accounted allocations.

This patch adds boolean "accounted" entry into trace output,
and set it to 'true' for calls used __GFP_ACCOUNT flag and
for allocations from caches marked with SLAB_ACCOUNT.
Set it to 'false' if accounting is disabled in configs.

Signed-off-by: Vasily Averin <[email protected]>
Acked-by: Shakeel Butt <[email protected]>
Acked-by: Roman Gushchin <[email protected]>
Acked-by: Muchun Song <[email protected]>
Reviewed-by: Hyeonggon Yoo <[email protected]>

---
v6:
1) subject changed from "tracing:" to "mm/tracing:"
2) set flag to 'false' if accounting is disabled in configs
3) added approvals from Muchun Song and Hyeonggon Yoo

v5:
1) re-based to current upstream (v5.18-11267-gb00ed48bb0a7)
2) added "Acked-by" from Roman
3) re-addressed to slab maintaiers

v4:
1) replaced in patch descripion: "accounted" instead of "allocated"
2) added "Acked-by" from Shakeel,
3) re-addressed to akpm@

v3:
1) rework kmem_cache_alloc* tracepoints once again,
added struct kmem_cache argument into existing templates,
thanks to Matthew Wilcox
2) updated according trace_* calls
3) added boolean "allocated" entry into trace output,
thanks to Roman
4) updated patch subject and description

v2:
1) handle kmem_cache_alloc_node(), thanks to Shakeel
2) rework kmem_cache_alloc* tracepoints to use cachep instead
of current cachep->*size parameters.
NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
and therefore it was replaced by kmalloc_node.
---
include/trace/events/kmem.h | 39 ++++++++++++++++++++++++-------------
mm/slab.c | 10 +++++-----
mm/slab_common.c | 9 ++++-----
mm/slob.c | 8 ++++----
mm/slub.c | 20 +++++++++----------
5 files changed, 48 insertions(+), 38 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index f76668305ac5..97f60ad82453 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,

TP_PROTO(unsigned long call_site,
const void *ptr,
+ struct kmem_cache *s,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),

TP_STRUCT__entry(
__field( unsigned long, call_site )
@@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__field( size_t, bytes_req )
__field( size_t, bytes_alloc )
__field( unsigned long, gfp_flags )
+ __field( bool, accounted )
),

TP_fast_assign(
@@ -33,42 +35,47 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__entry->bytes_req = bytes_req;
__entry->bytes_alloc = bytes_alloc;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
+ __entry->accounted = IS_ENABLED(CONFIG_MEMCG_KMEM) ?
+ ((gfp_flags & __GFP_ACCOUNT) ||
+ (s && s->flags & SLAB_ACCOUNT)) : false;
),

- TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+ TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
(void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
- show_gfp_flags(__entry->gfp_flags))
+ show_gfp_flags(__entry->gfp_flags),
+ __entry->accounted ? "true" : "false")
);

DEFINE_EVENT(kmem_alloc, kmalloc,

- TP_PROTO(unsigned long call_site, const void *ptr,
+ TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
);

DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,

- TP_PROTO(unsigned long call_site, const void *ptr,
+ TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
);

DECLARE_EVENT_CLASS(kmem_alloc_node,

TP_PROTO(unsigned long call_site,
const void *ptr,
+ struct kmem_cache *s,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags,
int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),

TP_STRUCT__entry(
__field( unsigned long, call_site )
@@ -77,6 +84,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__field( size_t, bytes_alloc )
__field( unsigned long, gfp_flags )
__field( int, node )
+ __field( bool, accounted )
),

TP_fast_assign(
@@ -86,33 +94,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__entry->bytes_alloc = bytes_alloc;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
__entry->node = node;
+ __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
+ (s && s->flags & SLAB_ACCOUNT);
),

- TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+ TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
(void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
show_gfp_flags(__entry->gfp_flags),
- __entry->node)
+ __entry->node,
+ __entry->accounted ? "true" : "false")
);

DEFINE_EVENT(kmem_alloc_node, kmalloc_node,

TP_PROTO(unsigned long call_site, const void *ptr,
- size_t bytes_req, size_t bytes_alloc,
+ struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
gfp_t gfp_flags, int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
);

DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,

TP_PROTO(unsigned long call_site, const void *ptr,
- size_t bytes_req, size_t bytes_alloc,
+ struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
gfp_t gfp_flags, int node),

- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
);

TRACE_EVENT(kfree,
diff --git a/mm/slab.c b/mm/slab.c
index f8cd00f4ba13..1f7864ff41e3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3478,7 +3478,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
{
void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);

- trace_kmem_cache_alloc(_RET_IP_, ret,
+ trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
cachep->object_size, cachep->size, flags);

return ret;
@@ -3567,7 +3567,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc(_RET_IP_, ret,
+ trace_kmalloc(_RET_IP_, ret, cachep,
size, cachep->size, flags);
return ret;
}
@@ -3592,7 +3592,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);

- trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
cachep->object_size, cachep->size,
flags, nodeid);

@@ -3611,7 +3611,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, cachep,
size, cachep->size,
flags, nodeid);
return ret;
@@ -3694,7 +3694,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
ret = slab_alloc(cachep, NULL, flags, size, caller);

ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc(caller, ret,
+ trace_kmalloc(caller, ret, cachep,
size, cachep->size, flags);

return ret;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 77c3adf40e50..6c9aac5d8f4a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -26,13 +26,12 @@
#include <linux/memcontrol.h>
#include <linux/stackdepot.h>

-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
#include "internal.h"
-
#include "slab.h"

+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
enum slab_state slab_state;
LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
@@ -959,7 +958,7 @@ EXPORT_SYMBOL(kmalloc_order);
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
void *ret = kmalloc_order(size, flags, order);
- trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+ trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order_trace);
diff --git a/mm/slob.c b/mm/slob.c
index f47811f09aca..56421fe461c4 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -507,7 +507,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
*m = size;
ret = (void *)m + minalign;

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, size + minalign, gfp, node);
} else {
unsigned int order = get_order(size);
@@ -516,7 +516,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
gfp |= __GFP_COMP;
ret = slob_new_pages(gfp, order, node);

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, PAGE_SIZE << order, gfp, node);
}

@@ -616,12 +616,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)

if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node, 0);
- trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+ trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
SLOB_UNITS(c->size) * SLOB_UNIT,
flags, node);
} else {
b = slob_new_pages(flags, get_order(c->size), node);
- trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+ trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
PAGE_SIZE << get_order(c->size),
flags, node);
}
diff --git a/mm/slub.c b/mm/slub.c
index e5535020e0fd..c323dd916b08 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3228,7 +3228,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
{
void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);

- trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+ trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
s->size, gfpflags);

return ret;
@@ -3251,7 +3251,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
- trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+ trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
@@ -3263,7 +3263,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);

- trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
s->object_size, s->size, gfpflags, node);

return ret;
@@ -3277,7 +3277,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);

- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, s,
size, s->size, gfpflags, node);

ret = kasan_kmalloc(s, ret, size, gfpflags);
@@ -4412,7 +4412,7 @@ void *__kmalloc(size_t size, gfp_t flags)

ret = slab_alloc(s, NULL, flags, _RET_IP_, size);

- trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+ trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);

ret = kasan_kmalloc(s, ret, size, flags);

@@ -4446,7 +4446,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, flags, node);

- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, NULL,
size, PAGE_SIZE << get_order(size),
flags, node);

@@ -4460,7 +4460,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)

ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);

- trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+ trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);

ret = kasan_kmalloc(s, ret, size, flags);

@@ -4919,7 +4919,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
ret = slab_alloc(s, NULL, gfpflags, caller, size);

/* Honor the call site pointer we received. */
- trace_kmalloc(caller, ret, size, s->size, gfpflags);
+ trace_kmalloc(caller, ret, s, size, s->size, gfpflags);

return ret;
}
@@ -4935,7 +4935,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, gfpflags, node);

- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, PAGE_SIZE << get_order(size),
gfpflags, node);

@@ -4950,7 +4950,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);

/* Honor the call site pointer we received. */
- trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+ trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);

return ret;
}
--
2.36.1

2022-06-15 10:05:36

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH mm v6] mm/tracing: add 'accounted' entry into output of allocation tracepoints

On 6/3/22 05:21, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> Set it to 'false' if accounting is disabled in configs.
>
> Signed-off-by: Vasily Averin <[email protected]>
> Acked-by: Shakeel Butt <[email protected]>
> Acked-by: Roman Gushchin <[email protected]>
> Acked-by: Muchun Song <[email protected]>
> Reviewed-by: Hyeonggon Yoo <[email protected]>

Thanks, added to slab/for-5.20/tracing

>
> ---
> v6:
> 1) subject changed from "tracing:" to "mm/tracing:"
> 2) set flag to 'false' if accounting is disabled in configs

Looks like you forgot the kmem_alloc_node variant, fixed up locally:

--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -94,8 +94,9 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__entry->bytes_alloc = bytes_alloc;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
__entry->node = node;
- __entry->accounted = (gfp_flags & __GFP_ACCOUNT) ||
- (s && s->flags & SLAB_ACCOUNT);
+ __entry->accounted = IS_ENABLED(CONFIG_MEMCG_KMEM) ?
+ ((gfp_flags & __GFP_ACCOUNT) ||
+ (s && s->flags & SLAB_ACCOUNT)) : false;
),

TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",