From: Dmitry Rokosov <[email protected]>
The motivation behind this commit is to enhance the traceability and
understanding of memcg events. By integrating the function cgroup_name()
into the existing memcg tracepoints, this patch series introduces a new
tracepoint template for the begin() and end() events. It utilizes a
static __array() to store the cgroup name, enabling developers to easily
identify the cgroup associated with a specific memcg tracepoint event.
Additionally, this patch series introduces new shrink_memcg tracepoints
to facilitate non-direct memcg reclaim tracing and debugging.
Dmitry Rokosov (2):
mm: memcg: print out cgroup name in the memcg tracepoints
mm: memcg: introduce new event to trace shrink_memcg
include/trace/events/vmscan.h | 91 ++++++++++++++++++++++++++++++-----
mm/vmscan.c | 15 ++++--
2 files changed, 90 insertions(+), 16 deletions(-)
--
2.25.1
Sometimes it is necessary to understand in which memcg tracepoint event
occurred. The function cgroup_name() is a useful tool for this purpose.
To integrate cgroup_name() into the existing memcg tracepoints, this
patch introduces a new tracepoint template for the begin() and end()
events, utilizing static __array() to store the cgroup name.
Signed-off-by: Dmitry Rokosov <[email protected]>
---
include/trace/events/vmscan.h | 77 +++++++++++++++++++++++++++++------
mm/vmscan.c | 8 ++--
2 files changed, 69 insertions(+), 16 deletions(-)
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index d2123dd960d5..124bc22866c8 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -141,19 +141,47 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_b
);
#ifdef CONFIG_MEMCG
-DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
- TP_PROTO(int order, gfp_t gfp_flags),
+DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_begin_template,
- TP_ARGS(order, gfp_flags)
+ TP_PROTO(const struct mem_cgroup *memcg, int order, gfp_t gfp_flags),
+
+ TP_ARGS(memcg, order, gfp_flags),
+
+ TP_STRUCT__entry(
+ __field(int, order)
+ __field(unsigned long, gfp_flags)
+ __array(char, name, NAME_MAX + 1)
+ ),
+
+ TP_fast_assign(
+ __entry->order = order;
+ __entry->gfp_flags = (__force unsigned long)gfp_flags;
+ cgroup_name(memcg->css.cgroup,
+ __entry->name,
+ sizeof(__entry->name));
+ ),
+
+ TP_printk("memcg=%s order=%d gfp_flags=%s",
+ __entry->name,
+ __entry->order,
+ show_gfp_flags(__entry->gfp_flags))
);
-DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
- TP_PROTO(int order, gfp_t gfp_flags),
+ TP_PROTO(const struct mem_cgroup *memcg, int order, gfp_t gfp_flags),
- TP_ARGS(order, gfp_flags)
+ TP_ARGS(memcg, order, gfp_flags)
+);
+
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
+
+ TP_PROTO(const struct mem_cgroup *memcg, int order, gfp_t gfp_flags),
+
+ TP_ARGS(memcg, order, gfp_flags)
);
+
#endif /* CONFIG_MEMCG */
DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template,
@@ -181,19 +209,44 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_direct_reclaim_end
);
#ifdef CONFIG_MEMCG
-DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end,
- TP_PROTO(unsigned long nr_reclaimed),
+DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_end_template,
- TP_ARGS(nr_reclaimed)
+ TP_PROTO(const struct mem_cgroup *memcg, unsigned long nr_reclaimed),
+
+ TP_ARGS(memcg, nr_reclaimed),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, nr_reclaimed)
+ __array(char, name, NAME_MAX + 1)
+ ),
+
+ TP_fast_assign(
+ __entry->nr_reclaimed = nr_reclaimed;
+ cgroup_name(memcg->css.cgroup,
+ __entry->name,
+ sizeof(__entry->name));
+ ),
+
+ TP_printk("memcg=%s nr_reclaimed=%lu",
+ __entry->name,
+ __entry->nr_reclaimed)
);
-DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end,
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template, mm_vmscan_memcg_reclaim_end,
- TP_PROTO(unsigned long nr_reclaimed),
+ TP_PROTO(const struct mem_cgroup *memcg, unsigned long nr_reclaimed),
- TP_ARGS(nr_reclaimed)
+ TP_ARGS(memcg, nr_reclaimed)
);
+
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end,
+
+ TP_PROTO(const struct mem_cgroup *memcg, unsigned long nr_reclaimed),
+
+ TP_ARGS(memcg, nr_reclaimed)
+);
+
#endif /* CONFIG_MEMCG */
TRACE_EVENT(mm_shrink_slab_start,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1080209a568b..4309eaf188b4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -7088,7 +7088,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
- trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
+ trace_mm_vmscan_memcg_softlimit_reclaim_begin(memcg, sc.order,
sc.gfp_mask);
/*
@@ -7100,7 +7100,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
*/
shrink_lruvec(lruvec, &sc);
- trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
+ trace_mm_vmscan_memcg_softlimit_reclaim_end(memcg, sc.nr_reclaimed);
*nr_scanned = sc.nr_scanned;
@@ -7134,13 +7134,13 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
set_task_reclaim_state(current, &sc.reclaim_state);
- trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
+ trace_mm_vmscan_memcg_reclaim_begin(memcg, 0, sc.gfp_mask);
noreclaim_flag = memalloc_noreclaim_save();
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
memalloc_noreclaim_restore(noreclaim_flag);
- trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
+ trace_mm_vmscan_memcg_reclaim_end(memcg, nr_reclaimed);
set_task_reclaim_state(current, NULL);
return nr_reclaimed;
--
2.25.1
The shrink_memcg flow plays a crucial role in memcg reclamation.
Currently, it is not possible to trace this point from non-direct
reclaim paths. However, direct reclaim has its own tracepoint, so there
is no issue there. In certain cases, when debugging memcg pressure,
developers may need to identify all potential requests for memcg
reclamation including kswapd(). The patchset introduces the tracepoints
mm_vmscan_memcg_shrink_{begin|end}() to address this problem.
Example of output in the kswapd context (non-direct reclaim):
kswapd0-39 [001] ..... 240.356378: mm_vmscan_memcg_shrink_begin: memcg=test order=0 gfp_flags=GFP_KERNEL
kswapd0-39 [001] ..... 240.356396: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=0
kswapd0-39 [001] ..... 240.356420: mm_vmscan_memcg_shrink_begin: memcg=test oorder=0 gfp_flags=GFP_KERNEL
kswapd0-39 [001] ..... 240.356454: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=1
kswapd0-39 [001] ..... 240.356479: mm_vmscan_memcg_shrink_begin: memcg=test oorder=0 gfp_flags=GFP_KERNEL
kswapd0-39 [001] ..... 240.356506: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=4
kswapd0-39 [001] ..... 240.356525: mm_vmscan_memcg_shrink_begin: memcg=test oorder=0 gfp_flags=GFP_KERNEL
kswapd0-39 [001] ..... 240.356593: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=11
kswapd0-39 [001] ..... 240.356614: mm_vmscan_memcg_shrink_begin: memcg=test oorder=0 gfp_flags=GFP_KERNEL
kswapd0-39 [001] ..... 240.356738: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=25
kswapd0-39 [001] ..... 240.356790: mm_vmscan_memcg_shrink_begin: memcg=test oorder=0 gfp_flags=GFP_KERNEL
kswapd0-39 [001] ..... 240.357125: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=53
Signed-off-by: Dmitry Rokosov <[email protected]>
---
include/trace/events/vmscan.h | 14 ++++++++++++++
mm/vmscan.c | 7 +++++++
2 files changed, 21 insertions(+)
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 124bc22866c8..518e7232c9eb 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -182,6 +182,13 @@ DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template, mm_vmscan_memcg_softlimit_r
TP_ARGS(memcg, order, gfp_flags)
);
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template, mm_vmscan_memcg_shrink_begin,
+
+ TP_PROTO(const struct mem_cgroup *memcg, int order, gfp_t gfp_flags),
+
+ TP_ARGS(memcg, order, gfp_flags)
+);
+
#endif /* CONFIG_MEMCG */
DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template,
@@ -247,6 +254,13 @@ DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template, mm_vmscan_memcg_softlimit_rec
TP_ARGS(memcg, nr_reclaimed)
);
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template, mm_vmscan_memcg_shrink_end,
+
+ TP_PROTO(const struct mem_cgroup *memcg, unsigned long nr_reclaimed),
+
+ TP_ARGS(memcg, nr_reclaimed)
+);
+
#endif /* CONFIG_MEMCG */
TRACE_EVENT(mm_shrink_slab_start,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4309eaf188b4..6b9619922dfb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6461,6 +6461,10 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
*/
cond_resched();
+ trace_mm_vmscan_memcg_shrink_begin(memcg,
+ sc->order,
+ sc->gfp_mask);
+
mem_cgroup_calculate_protection(target_memcg, memcg);
if (mem_cgroup_below_min(target_memcg, memcg)) {
@@ -6491,6 +6495,9 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
sc->priority);
+ trace_mm_vmscan_memcg_shrink_end(memcg,
+ sc->nr_reclaimed - reclaimed);
+
/* Record the group's reclaim efficiency */
if (!sc->proactive)
vmpressure(sc->gfp_mask, memcg, false,
--
2.25.1
Hi Dmitry,
kernel test robot noticed the following build errors:
[auto build test ERROR on akpm-mm/mm-everything]
url: https://github.com/intel-lab-lkp/linux/commits/Dmitry-Rokosov/mm-memcg-print-out-cgroup-name-in-the-memcg-tracepoints/20231101-183040
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20231101102837.25205-3-ddrokosov%40salutedevices.com
patch subject: [PATCH v1 2/2] mm: memcg: introduce new event to trace shrink_memcg
config: um-allyesconfig (https://download.01.org/0day-ci/archive/20231101/[email protected]/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231101/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All errors (new ones prefixed by >>):
In file included from mm/vmscan.c:19:
In file included from include/linux/kernel_stat.h:9:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from arch/um/include/asm/hardirq.h:5:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/um/include/asm/io.h:24:
include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __raw_readb(PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
#define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
^
In file included from mm/vmscan.c:19:
In file included from include/linux/kernel_stat.h:9:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from arch/um/include/asm/hardirq.h:5:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/um/include/asm/io.h:24:
include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
#define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
^
In file included from mm/vmscan.c:19:
In file included from include/linux/kernel_stat.h:9:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from arch/um/include/asm/hardirq.h:5:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/um/include/asm/io.h:24:
include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writeb(value, PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:692:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
readsb(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
include/asm-generic/io.h:700:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
readsw(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
include/asm-generic/io.h:708:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
readsl(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
include/asm-generic/io.h:717:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
writesb(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
include/asm-generic/io.h:726:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
writesw(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
include/asm-generic/io.h:735:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
writesl(PCI_IOBASE + addr, buffer, count);
~~~~~~~~~~ ^
>> mm/vmscan.c:5811:3: error: implicit declaration of function 'trace_mm_vmscan_memcg_shrink_begin' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
trace_mm_vmscan_memcg_shrink_begin(memcg,
^
mm/vmscan.c:5811:3: note: did you mean 'trace_mm_vmscan_lru_shrink_active'?
include/trace/events/vmscan.h:467:1: note: 'trace_mm_vmscan_lru_shrink_active' declared here
TRACE_EVENT(mm_vmscan_lru_shrink_active,
^
include/linux/tracepoint.h:566:2: note: expanded from macro 'TRACE_EVENT'
DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
^
include/linux/tracepoint.h:432:2: note: expanded from macro 'DECLARE_TRACE'
__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \
^
include/linux/tracepoint.h:355:21: note: expanded from macro '__DECLARE_TRACE'
static inline void trace_##name(proto) \
^
<scratch space>:33:1: note: expanded from here
trace_mm_vmscan_lru_shrink_active
^
>> mm/vmscan.c:5845:3: error: implicit declaration of function 'trace_mm_vmscan_memcg_shrink_end' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
trace_mm_vmscan_memcg_shrink_end(memcg,
^
mm/vmscan.c:5845:3: note: did you mean 'trace_mm_vmscan_memcg_shrink_begin'?
mm/vmscan.c:5811:3: note: 'trace_mm_vmscan_memcg_shrink_begin' declared here
trace_mm_vmscan_memcg_shrink_begin(memcg,
^
12 warnings and 2 errors generated.
vim +/trace_mm_vmscan_memcg_shrink_begin +5811 mm/vmscan.c
5791
5792 static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
5793 {
5794 struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
5795 struct mem_cgroup *memcg;
5796
5797 memcg = mem_cgroup_iter(target_memcg, NULL, NULL);
5798 do {
5799 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
5800 unsigned long reclaimed;
5801 unsigned long scanned;
5802
5803 /*
5804 * This loop can become CPU-bound when target memcgs
5805 * aren't eligible for reclaim - either because they
5806 * don't have any reclaimable pages, or because their
5807 * memory is explicitly protected. Avoid soft lockups.
5808 */
5809 cond_resched();
5810
> 5811 trace_mm_vmscan_memcg_shrink_begin(memcg,
5812 sc->order,
5813 sc->gfp_mask);
5814
5815 mem_cgroup_calculate_protection(target_memcg, memcg);
5816
5817 if (mem_cgroup_below_min(target_memcg, memcg)) {
5818 /*
5819 * Hard protection.
5820 * If there is no reclaimable memory, OOM.
5821 */
5822 continue;
5823 } else if (mem_cgroup_below_low(target_memcg, memcg)) {
5824 /*
5825 * Soft protection.
5826 * Respect the protection only as long as
5827 * there is an unprotected supply
5828 * of reclaimable memory from other cgroups.
5829 */
5830 if (!sc->memcg_low_reclaim) {
5831 sc->memcg_low_skipped = 1;
5832 continue;
5833 }
5834 memcg_memory_event(memcg, MEMCG_LOW);
5835 }
5836
5837 reclaimed = sc->nr_reclaimed;
5838 scanned = sc->nr_scanned;
5839
5840 shrink_lruvec(lruvec, sc);
5841
5842 shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
5843 sc->priority);
5844
> 5845 trace_mm_vmscan_memcg_shrink_end(memcg,
5846 sc->nr_reclaimed - reclaimed);
5847
5848 /* Record the group's reclaim efficiency */
5849 if (!sc->proactive)
5850 vmpressure(sc->gfp_mask, memcg, false,
5851 sc->nr_scanned - scanned,
5852 sc->nr_reclaimed - reclaimed);
5853
5854 } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
5855 }
5856
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Wed, Nov 1, 2023 at 3:29 AM Dmitry Rokosov
<[email protected]> wrote:
>
> Sometimes it is necessary to understand in which memcg tracepoint event
> occurred. The function cgroup_name() is a useful tool for this purpose.
> To integrate cgroup_name() into the existing memcg tracepoints, this
> patch introduces a new tracepoint template for the begin() and end()
> events, utilizing static __array() to store the cgroup name.
>
> Signed-off-by: Dmitry Rokosov <[email protected]>
> ---
> include/trace/events/vmscan.h | 77 +++++++++++++++++++++++++++++------
> mm/vmscan.c | 8 ++--
> 2 files changed, 69 insertions(+), 16 deletions(-)
>
> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> index d2123dd960d5..124bc22866c8 100644
> --- a/include/trace/events/vmscan.h
> +++ b/include/trace/events/vmscan.h
> @@ -141,19 +141,47 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_b
> );
>
> #ifdef CONFIG_MEMCG
> -DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
>
> - TP_PROTO(int order, gfp_t gfp_flags),
> +DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_begin_template,
>
> - TP_ARGS(order, gfp_flags)
> + TP_PROTO(const struct mem_cgroup *memcg, int order, gfp_t gfp_flags),
> +
> + TP_ARGS(memcg, order, gfp_flags),
By adding memcg in front of existing tracepoint arguments, you
unnecessarily break everyone who currently has some scripts based on
this tracepoint. Given there is no reason why memcg has to be the very
first argument, it would be nice if you can just append it at the end
to make it nicely backwards compatible. Same for other tracepoints
below.
Tracepoints are not an ABI, but there is also no point in arbitrarily
breaking all current scripts for such a trivial reason.
> +
> + TP_STRUCT__entry(
> + __field(int, order)
> + __field(unsigned long, gfp_flags)
> + __array(char, name, NAME_MAX + 1)
> + ),
> +
> + TP_fast_assign(
> + __entry->order = order;
> + __entry->gfp_flags = (__force unsigned long)gfp_flags;
> + cgroup_name(memcg->css.cgroup,
> + __entry->name,
> + sizeof(__entry->name));
> + ),
> +
> + TP_printk("memcg=%s order=%d gfp_flags=%s",
> + __entry->name,
> + __entry->order,
> + show_gfp_flags(__entry->gfp_flags))
> );
[...]
Oh, I apologize for that. It seems that I need to wrap the new
tracepoint calls with CONFIG_MEMCG. I will proceed to prepare the new
version.
On Wed, Nov 01, 2023 at 11:44:10PM +0800, kernel test robot wrote:
> Hi Dmitry,
>
> kernel test robot noticed the following build errors:
>
> [auto build test ERROR on akpm-mm/mm-everything]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Dmitry-Rokosov/mm-memcg-print-out-cgroup-name-in-the-memcg-tracepoints/20231101-183040
> base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> patch link: https://lore.kernel.org/r/20231101102837.25205-3-ddrokosov%40salutedevices.com
> patch subject: [PATCH v1 2/2] mm: memcg: introduce new event to trace shrink_memcg
> config: um-allyesconfig (https://download.01.org/0day-ci/archive/20231101/[email protected]/config)
> compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231101/[email protected]/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <[email protected]>
> | Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
>
> All errors (new ones prefixed by >>):
>
> In file included from mm/vmscan.c:19:
> In file included from include/linux/kernel_stat.h:9:
> In file included from include/linux/interrupt.h:11:
> In file included from include/linux/hardirq.h:11:
> In file included from arch/um/include/asm/hardirq.h:5:
> In file included from include/asm-generic/hardirq.h:17:
> In file included from include/linux/irq.h:20:
> In file included from include/linux/io.h:13:
> In file included from arch/um/include/asm/io.h:24:
> include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> val = __raw_readb(PCI_IOBASE + addr);
> ~~~~~~~~~~ ^
> include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
> ~~~~~~~~~~ ^
> include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
> #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
> ^
> In file included from mm/vmscan.c:19:
> In file included from include/linux/kernel_stat.h:9:
> In file included from include/linux/interrupt.h:11:
> In file included from include/linux/hardirq.h:11:
> In file included from arch/um/include/asm/hardirq.h:5:
> In file included from include/asm-generic/hardirq.h:17:
> In file included from include/linux/irq.h:20:
> In file included from include/linux/io.h:13:
> In file included from arch/um/include/asm/io.h:24:
> include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
> ~~~~~~~~~~ ^
> include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
> #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
> ^
> In file included from mm/vmscan.c:19:
> In file included from include/linux/kernel_stat.h:9:
> In file included from include/linux/interrupt.h:11:
> In file included from include/linux/hardirq.h:11:
> In file included from arch/um/include/asm/hardirq.h:5:
> In file included from include/asm-generic/hardirq.h:17:
> In file included from include/linux/irq.h:20:
> In file included from include/linux/io.h:13:
> In file included from arch/um/include/asm/io.h:24:
> include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> __raw_writeb(value, PCI_IOBASE + addr);
> ~~~~~~~~~~ ^
> include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
> ~~~~~~~~~~ ^
> include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
> ~~~~~~~~~~ ^
> include/asm-generic/io.h:692:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> readsb(PCI_IOBASE + addr, buffer, count);
> ~~~~~~~~~~ ^
> include/asm-generic/io.h:700:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> readsw(PCI_IOBASE + addr, buffer, count);
> ~~~~~~~~~~ ^
> include/asm-generic/io.h:708:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> readsl(PCI_IOBASE + addr, buffer, count);
> ~~~~~~~~~~ ^
> include/asm-generic/io.h:717:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> writesb(PCI_IOBASE + addr, buffer, count);
> ~~~~~~~~~~ ^
> include/asm-generic/io.h:726:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> writesw(PCI_IOBASE + addr, buffer, count);
> ~~~~~~~~~~ ^
> include/asm-generic/io.h:735:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
> writesl(PCI_IOBASE + addr, buffer, count);
> ~~~~~~~~~~ ^
> >> mm/vmscan.c:5811:3: error: implicit declaration of function 'trace_mm_vmscan_memcg_shrink_begin' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
> trace_mm_vmscan_memcg_shrink_begin(memcg,
> ^
> mm/vmscan.c:5811:3: note: did you mean 'trace_mm_vmscan_lru_shrink_active'?
> include/trace/events/vmscan.h:467:1: note: 'trace_mm_vmscan_lru_shrink_active' declared here
> TRACE_EVENT(mm_vmscan_lru_shrink_active,
> ^
> include/linux/tracepoint.h:566:2: note: expanded from macro 'TRACE_EVENT'
> DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
> ^
> include/linux/tracepoint.h:432:2: note: expanded from macro 'DECLARE_TRACE'
> __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \
> ^
> include/linux/tracepoint.h:355:21: note: expanded from macro '__DECLARE_TRACE'
> static inline void trace_##name(proto) \
> ^
> <scratch space>:33:1: note: expanded from here
> trace_mm_vmscan_lru_shrink_active
> ^
> >> mm/vmscan.c:5845:3: error: implicit declaration of function 'trace_mm_vmscan_memcg_shrink_end' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
> trace_mm_vmscan_memcg_shrink_end(memcg,
> ^
> mm/vmscan.c:5845:3: note: did you mean 'trace_mm_vmscan_memcg_shrink_begin'?
> mm/vmscan.c:5811:3: note: 'trace_mm_vmscan_memcg_shrink_begin' declared here
> trace_mm_vmscan_memcg_shrink_begin(memcg,
> ^
> 12 warnings and 2 errors generated.
>
>
> vim +/trace_mm_vmscan_memcg_shrink_begin +5811 mm/vmscan.c
>
> 5791
> 5792 static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
> 5793 {
> 5794 struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
> 5795 struct mem_cgroup *memcg;
> 5796
> 5797 memcg = mem_cgroup_iter(target_memcg, NULL, NULL);
> 5798 do {
> 5799 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
> 5800 unsigned long reclaimed;
> 5801 unsigned long scanned;
> 5802
> 5803 /*
> 5804 * This loop can become CPU-bound when target memcgs
> 5805 * aren't eligible for reclaim - either because they
> 5806 * don't have any reclaimable pages, or because their
> 5807 * memory is explicitly protected. Avoid soft lockups.
> 5808 */
> 5809 cond_resched();
> 5810
> > 5811 trace_mm_vmscan_memcg_shrink_begin(memcg,
> 5812 sc->order,
> 5813 sc->gfp_mask);
> 5814
> 5815 mem_cgroup_calculate_protection(target_memcg, memcg);
> 5816
> 5817 if (mem_cgroup_below_min(target_memcg, memcg)) {
> 5818 /*
> 5819 * Hard protection.
> 5820 * If there is no reclaimable memory, OOM.
> 5821 */
> 5822 continue;
> 5823 } else if (mem_cgroup_below_low(target_memcg, memcg)) {
> 5824 /*
> 5825 * Soft protection.
> 5826 * Respect the protection only as long as
> 5827 * there is an unprotected supply
> 5828 * of reclaimable memory from other cgroups.
> 5829 */
> 5830 if (!sc->memcg_low_reclaim) {
> 5831 sc->memcg_low_skipped = 1;
> 5832 continue;
> 5833 }
> 5834 memcg_memory_event(memcg, MEMCG_LOW);
> 5835 }
> 5836
> 5837 reclaimed = sc->nr_reclaimed;
> 5838 scanned = sc->nr_scanned;
> 5839
> 5840 shrink_lruvec(lruvec, sc);
> 5841
> 5842 shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
> 5843 sc->priority);
> 5844
> > 5845 trace_mm_vmscan_memcg_shrink_end(memcg,
> 5846 sc->nr_reclaimed - reclaimed);
> 5847
> 5848 /* Record the group's reclaim efficiency */
> 5849 if (!sc->proactive)
> 5850 vmpressure(sc->gfp_mask, memcg, false,
> 5851 sc->nr_scanned - scanned,
> 5852 sc->nr_reclaimed - reclaimed);
> 5853
> 5854 } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
> 5855 }
> 5856
>
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests/wiki
--
Thank you,
Dmitry
Hello Andrii!
Thank you for quick feedback!
On Wed, Nov 01, 2023 at 10:08:34AM -0700, Andrii Nakryiko wrote:
> On Wed, Nov 1, 2023 at 3:29 AM Dmitry Rokosov
> <[email protected]> wrote:
> >
> > Sometimes it is necessary to understand in which memcg tracepoint event
> > occurred. The function cgroup_name() is a useful tool for this purpose.
> > To integrate cgroup_name() into the existing memcg tracepoints, this
> > patch introduces a new tracepoint template for the begin() and end()
> > events, utilizing static __array() to store the cgroup name.
> >
> > Signed-off-by: Dmitry Rokosov <[email protected]>
> > ---
> > include/trace/events/vmscan.h | 77 +++++++++++++++++++++++++++++------
> > mm/vmscan.c | 8 ++--
> > 2 files changed, 69 insertions(+), 16 deletions(-)
> >
> > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> > index d2123dd960d5..124bc22866c8 100644
> > --- a/include/trace/events/vmscan.h
> > +++ b/include/trace/events/vmscan.h
> > @@ -141,19 +141,47 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_b
> > );
> >
> > #ifdef CONFIG_MEMCG
> > -DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
> >
> > - TP_PROTO(int order, gfp_t gfp_flags),
> > +DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_begin_template,
> >
> > - TP_ARGS(order, gfp_flags)
> > + TP_PROTO(const struct mem_cgroup *memcg, int order, gfp_t gfp_flags),
> > +
> > + TP_ARGS(memcg, order, gfp_flags),
>
> By adding memcg in front of existing tracepoint arguments, you
> unnecessarily break everyone who currently has some scripts based on
> this tracepoint. Given there is no reason why memcg has to be the very
> first argument, it would be nice if you can just append it at the end
> to make it nicely backwards compatible. Same for other tracepoints
> below.
>
> Tracepoints are not an ABI, but there is also no point in arbitrarily
> breaking all current scripts for such a trivial reason.
>
You are absolutely correct. I didn't consider the scripts that rely on
these tracepoints, because tracepoints are not an ABI, as you mentioned.
Additionally, I added the memcg parameter as the first argument based on
my personal programming patterns, where the context object should always
come first :)
I apologize for that and will prepare new version.
> > +
> > + TP_STRUCT__entry(
> > + __field(int, order)
> > + __field(unsigned long, gfp_flags)
> > + __array(char, name, NAME_MAX + 1)
> > + ),
> > +
> > + TP_fast_assign(
> > + __entry->order = order;
> > + __entry->gfp_flags = (__force unsigned long)gfp_flags;
> > + cgroup_name(memcg->css.cgroup,
> > + __entry->name,
> > + sizeof(__entry->name));
> > + ),
> > +
> > + TP_printk("memcg=%s order=%d gfp_flags=%s",
> > + __entry->name,
> > + __entry->order,
> > + show_gfp_flags(__entry->gfp_flags))
> > );
>
> [...]
--
Thank you,
Dmitry
Hi Dmitry,
kernel test robot noticed the following build errors:
[auto build test ERROR on akpm-mm/mm-everything]
url: https://github.com/intel-lab-lkp/linux/commits/Dmitry-Rokosov/mm-memcg-print-out-cgroup-name-in-the-memcg-tracepoints/20231101-183040
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20231101102837.25205-3-ddrokosov%40salutedevices.com
patch subject: [PATCH v1 2/2] mm: memcg: introduce new event to trace shrink_memcg
config: sh-allnoconfig (https://download.01.org/0day-ci/archive/20231102/[email protected]/config)
compiler: sh4-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231102/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All errors (new ones prefixed by >>):
mm/vmscan.c: In function 'shrink_node_memcgs':
>> mm/vmscan.c:5811:17: error: implicit declaration of function 'trace_mm_vmscan_memcg_shrink_begin'; did you mean 'trace_mm_vmscan_lru_shrink_active'? [-Werror=implicit-function-declaration]
5811 | trace_mm_vmscan_memcg_shrink_begin(memcg,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| trace_mm_vmscan_lru_shrink_active
mm/vmscan.c:5845:17: error: implicit declaration of function 'trace_mm_vmscan_memcg_shrink_end'; did you mean 'trace_mm_vmscan_lru_shrink_active'? [-Werror=implicit-function-declaration]
5845 | trace_mm_vmscan_memcg_shrink_end(memcg,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| trace_mm_vmscan_lru_shrink_active
cc1: some warnings being treated as errors
vim +5811 mm/vmscan.c
5791
5792 static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
5793 {
5794 struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
5795 struct mem_cgroup *memcg;
5796
5797 memcg = mem_cgroup_iter(target_memcg, NULL, NULL);
5798 do {
5799 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
5800 unsigned long reclaimed;
5801 unsigned long scanned;
5802
5803 /*
5804 * This loop can become CPU-bound when target memcgs
5805 * aren't eligible for reclaim - either because they
5806 * don't have any reclaimable pages, or because their
5807 * memory is explicitly protected. Avoid soft lockups.
5808 */
5809 cond_resched();
5810
> 5811 trace_mm_vmscan_memcg_shrink_begin(memcg,
5812 sc->order,
5813 sc->gfp_mask);
5814
5815 mem_cgroup_calculate_protection(target_memcg, memcg);
5816
5817 if (mem_cgroup_below_min(target_memcg, memcg)) {
5818 /*
5819 * Hard protection.
5820 * If there is no reclaimable memory, OOM.
5821 */
5822 continue;
5823 } else if (mem_cgroup_below_low(target_memcg, memcg)) {
5824 /*
5825 * Soft protection.
5826 * Respect the protection only as long as
5827 * there is an unprotected supply
5828 * of reclaimable memory from other cgroups.
5829 */
5830 if (!sc->memcg_low_reclaim) {
5831 sc->memcg_low_skipped = 1;
5832 continue;
5833 }
5834 memcg_memory_event(memcg, MEMCG_LOW);
5835 }
5836
5837 reclaimed = sc->nr_reclaimed;
5838 scanned = sc->nr_scanned;
5839
5840 shrink_lruvec(lruvec, sc);
5841
5842 shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
5843 sc->priority);
5844
5845 trace_mm_vmscan_memcg_shrink_end(memcg,
5846 sc->nr_reclaimed - reclaimed);
5847
5848 /* Record the group's reclaim efficiency */
5849 if (!sc->proactive)
5850 vmpressure(sc->gfp_mask, memcg, false,
5851 sc->nr_scanned - scanned,
5852 sc->nr_reclaimed - reclaimed);
5853
5854 } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
5855 }
5856
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki