Commit e27be240df53 ("mm: memcg: make sure memory.events is
uptodate when waking pollers") converted most of memcg event
counters to per-memcg atomics, which made them less confusing
for a user. The "oom_kill" counter remained untouched, so now
it behaves differently than other counters (including "oom").
This adds nothing but confusion.
Let's fix this by adding the MEMCG_OOM_KILL event, and follow
the MEMCG_OOM approach. This also removes a hack from
count_memcg_event_mm(), introduced earlier specially for the
OOM_KILL counter.
Signed-off-by: Roman Gushchin <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Vladimir Davydov <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Konstantin Khlebnikov <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
---
include/linux/memcontrol.h | 26 ++++++++++++++++++++++----
mm/memcontrol.c | 6 ++++--
mm/oom_kill.c | 2 +-
3 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6cbea2f25a87..794475db7368 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,6 +54,7 @@ enum memcg_memory_event {
MEMCG_HIGH,
MEMCG_MAX,
MEMCG_OOM,
+ MEMCG_OOM_KILL,
MEMCG_SWAP_MAX,
MEMCG_SWAP_FAIL,
MEMCG_NR_MEMORY_EVENTS,
@@ -721,11 +722,8 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
rcu_read_lock();
memcg = rcu_dereference(mm->memcg);
- if (likely(memcg)) {
+ if (likely(memcg))
count_memcg_events(memcg, idx, 1);
- if (idx == OOM_KILL)
- cgroup_file_notify(&memcg->events_file);
- }
rcu_read_unlock();
}
@@ -736,6 +734,21 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
cgroup_file_notify(&memcg->events_file);
}
+static inline void memcg_memory_event_mm(struct mm_struct *mm,
+ enum memcg_memory_event event)
+{
+ struct mem_cgroup *memcg;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ rcu_read_lock();
+ memcg = rcu_dereference(mm->memcg);
+ if (likely(memcg))
+ memcg_memory_event(memcg, event);
+ rcu_read_unlock();
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void mem_cgroup_split_huge_fixup(struct page *head);
#endif
@@ -757,6 +770,11 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
{
}
+static inline void memcg_memory_event_mm(struct mm_struct *mm,
+ enum memcg_memory_event event)
+{
+}
+
static inline bool mem_cgroup_low(struct mem_cgroup *root,
struct mem_cgroup *memcg)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 10973671e562..38717630305d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3772,7 +3772,8 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
- seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
+ seq_printf(sf, "oom_kill %lu\n",
+ atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
return 0;
}
@@ -5529,7 +5530,8 @@ static int memory_events_show(struct seq_file *m, void *v)
atomic_long_read(&memcg->memory_events[MEMCG_MAX]));
seq_printf(m, "oom %lu\n",
atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
- seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
+ seq_printf(m, "oom_kill %lu\n",
+ atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
return 0;
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 8f7d8dd99e5d..6b74142a1259 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -868,7 +868,7 @@ static void __oom_kill_process(struct task_struct *victim)
/* Raise event before sending signal: task reaper must see this */
count_vm_event(OOM_KILL);
- count_memcg_event_mm(mm, OOM_KILL);
+ memcg_memory_event_mm(mm, MEMCG_OOM_KILL);
/*
* We should send SIGKILL before granting access to memory reserves
--
2.14.3
On 08.05.2018 15:46, Roman Gushchin wrote:
> Commit e27be240df53 ("mm: memcg: make sure memory.events is
> uptodate when waking pollers") converted most of memcg event
> counters to per-memcg atomics, which made them less confusing
> for a user. The "oom_kill" counter remained untouched, so now
> it behaves differently than other counters (including "oom").
> This adds nothing but confusion.
>
> Let's fix this by adding the MEMCG_OOM_KILL event, and follow
> the MEMCG_OOM approach. This also removes a hack from
> count_memcg_event_mm(), introduced earlier specially for the
> OOM_KILL counter.
>
> Signed-off-by: Roman Gushchin <[email protected]>
> Cc: Johannes Weiner <[email protected]>
> Cc: Michal Hocko <[email protected]>
> Cc: Vladimir Davydov <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Konstantin Khlebnikov <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
Acked-by: Konstantin Khlebnikov <[email protected]>
> ---
> include/linux/memcontrol.h | 26 ++++++++++++++++++++++----
> mm/memcontrol.c | 6 ++++--
> mm/oom_kill.c | 2 +-
> 3 files changed, 27 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 6cbea2f25a87..794475db7368 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -54,6 +54,7 @@ enum memcg_memory_event {
> MEMCG_HIGH,
> MEMCG_MAX,
> MEMCG_OOM,
> + MEMCG_OOM_KILL,
> MEMCG_SWAP_MAX,
> MEMCG_SWAP_FAIL,
> MEMCG_NR_MEMORY_EVENTS,
> @@ -721,11 +722,8 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
>
> rcu_read_lock();
> memcg = rcu_dereference(mm->memcg);
> - if (likely(memcg)) {
> + if (likely(memcg))
> count_memcg_events(memcg, idx, 1);
> - if (idx == OOM_KILL)
> - cgroup_file_notify(&memcg->events_file);
> - }
> rcu_read_unlock();
> }
>
> @@ -736,6 +734,21 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
> cgroup_file_notify(&memcg->events_file);
> }
>
> +static inline void memcg_memory_event_mm(struct mm_struct *mm,
> + enum memcg_memory_event event)
> +{
> + struct mem_cgroup *memcg;
> +
> + if (mem_cgroup_disabled())
> + return;
> +
> + rcu_read_lock();
> + memcg = rcu_dereference(mm->memcg);
> + if (likely(memcg))
> + memcg_memory_event(memcg, event);
> + rcu_read_unlock();
> +}
> +
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> void mem_cgroup_split_huge_fixup(struct page *head);
> #endif
> @@ -757,6 +770,11 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
> {
> }
>
> +static inline void memcg_memory_event_mm(struct mm_struct *mm,
> + enum memcg_memory_event event)
> +{
> +}
> +
> static inline bool mem_cgroup_low(struct mem_cgroup *root,
> struct mem_cgroup *memcg)
> {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 10973671e562..38717630305d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3772,7 +3772,8 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
>
> seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
> seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
> - seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
> + seq_printf(sf, "oom_kill %lu\n",
> + atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
> return 0;
> }
>
> @@ -5529,7 +5530,8 @@ static int memory_events_show(struct seq_file *m, void *v)
> atomic_long_read(&memcg->memory_events[MEMCG_MAX]));
> seq_printf(m, "oom %lu\n",
> atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
> - seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
> + seq_printf(m, "oom_kill %lu\n",
> + atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
>
> return 0;
> }
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 8f7d8dd99e5d..6b74142a1259 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -868,7 +868,7 @@ static void __oom_kill_process(struct task_struct *victim)
>
> /* Raise event before sending signal: task reaper must see this */
> count_vm_event(OOM_KILL);
> - count_memcg_event_mm(mm, OOM_KILL);
> + memcg_memory_event_mm(mm, MEMCG_OOM_KILL);
>
> /*
> * We should send SIGKILL before granting access to memory reserves
>
On Tue, May 08, 2018 at 01:46:37PM +0100, Roman Gushchin wrote:
> Commit e27be240df53 ("mm: memcg: make sure memory.events is
> uptodate when waking pollers") converted most of memcg event
> counters to per-memcg atomics, which made them less confusing
> for a user. The "oom_kill" counter remained untouched, so now
> it behaves differently than other counters (including "oom").
> This adds nothing but confusion.
>
> Let's fix this by adding the MEMCG_OOM_KILL event, and follow
> the MEMCG_OOM approach. This also removes a hack from
> count_memcg_event_mm(), introduced earlier specially for the
> OOM_KILL counter.
>
> Signed-off-by: Roman Gushchin <[email protected]>
> Cc: Johannes Weiner <[email protected]>
> Cc: Michal Hocko <[email protected]>
> Cc: Vladimir Davydov <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Konstantin Khlebnikov <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
Acked-by: Johannes Weiner <[email protected]>
On Tue 08-05-18 13:46:37, Roman Gushchin wrote:
> Commit e27be240df53 ("mm: memcg: make sure memory.events is
> uptodate when waking pollers") converted most of memcg event
> counters to per-memcg atomics, which made them less confusing
> for a user. The "oom_kill" counter remained untouched, so now
> it behaves differently than other counters (including "oom").
> This adds nothing but confusion.
>
> Let's fix this by adding the MEMCG_OOM_KILL event, and follow
> the MEMCG_OOM approach. This also removes a hack from
> count_memcg_event_mm(), introduced earlier specially for the
> OOM_KILL counter.
I agree that the current OOM_KILL is confusing. But do we really need
another memcg_memory_event_mm helper used for only one counter rather
than reuse memcg_memory_event. __oom_kill_process doesn't have the memcg
but nothing should really prevent us from adding the context
(oom_control) there, no?
[...]
--
Michal Hocko
SUSE Labs
On Thu, May 10, 2018 at 01:41:47PM +0200, Michal Hocko wrote:
> On Tue 08-05-18 13:46:37, Roman Gushchin wrote:
> > Commit e27be240df53 ("mm: memcg: make sure memory.events is
> > uptodate when waking pollers") converted most of memcg event
> > counters to per-memcg atomics, which made them less confusing
> > for a user. The "oom_kill" counter remained untouched, so now
> > it behaves differently than other counters (including "oom").
> > This adds nothing but confusion.
> >
> > Let's fix this by adding the MEMCG_OOM_KILL event, and follow
> > the MEMCG_OOM approach. This also removes a hack from
> > count_memcg_event_mm(), introduced earlier specially for the
> > OOM_KILL counter.
>
> I agree that the current OOM_KILL is confusing. But do we really need
> another memcg_memory_event_mm helper used for only one counter rather
> than reuse memcg_memory_event. __oom_kill_process doesn't have the memcg
> but nothing should really prevent us from adding the context
> (oom_control) there, no?
Not sure, that I follow. oom_control has memcg pointer,
but it's a pointer to a cgroup, where OOM happened.
In particular, it's NULL for a system-wide OOM.
And we do send the OOM_KILL event to the cgroup,
which actually contains the process.
On Thu 10-05-18 13:12:56, Roman Gushchin wrote:
> On Thu, May 10, 2018 at 01:41:47PM +0200, Michal Hocko wrote:
> > On Tue 08-05-18 13:46:37, Roman Gushchin wrote:
> > > Commit e27be240df53 ("mm: memcg: make sure memory.events is
> > > uptodate when waking pollers") converted most of memcg event
> > > counters to per-memcg atomics, which made them less confusing
> > > for a user. The "oom_kill" counter remained untouched, so now
> > > it behaves differently than other counters (including "oom").
> > > This adds nothing but confusion.
> > >
> > > Let's fix this by adding the MEMCG_OOM_KILL event, and follow
> > > the MEMCG_OOM approach. This also removes a hack from
> > > count_memcg_event_mm(), introduced earlier specially for the
> > > OOM_KILL counter.
> >
> > I agree that the current OOM_KILL is confusing. But do we really need
> > another memcg_memory_event_mm helper used for only one counter rather
> > than reuse memcg_memory_event. __oom_kill_process doesn't have the memcg
> > but nothing should really prevent us from adding the context
> > (oom_control) there, no?
>
> Not sure, that I follow. oom_control has memcg pointer,
> but it's a pointer to a cgroup, where OOM happened.
> In particular, it's NULL for a system-wide OOM.
>
> And we do send the OOM_KILL event to the cgroup,
> which actually contains the process.
You are right! For some reason I thought we do count events on the
hierarchy which is under OOM. I was wrong.
Acked-by: Michal Hocko <[email protected]>
--
Michal Hocko
SUSE Labs