Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754295AbbLOTuq (ORCPT ); Tue, 15 Dec 2015 14:50:46 -0500 Received: from mail-wm0-f47.google.com ([74.125.82.47]:37766 "EHLO mail-wm0-f47.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754180AbbLOTuo (ORCPT ); Tue, 15 Dec 2015 14:50:44 -0500 Date: Tue, 15 Dec 2015 20:50:40 +0100 From: Michal Hocko To: Johannes Weiner Cc: Andrew Morton , linux-mm@kvack.org, netdev@vger.kernel.org, cgroups@vger.kernel.org, linux-kernel@vger.kernel.org Subject: Re: [PATCH 12/14] mm: memcontrol: account socket memory in unified hierarchy memory controller Message-ID: <20151215195040.GD27880@dhcp22.suse.cz> References: <1449588624-9220-1-git-send-email-hannes@cmpxchg.org> <1449588624-9220-13-git-send-email-hannes@cmpxchg.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1449588624-9220-13-git-send-email-hannes@cmpxchg.org> User-Agent: Mutt/1.5.24 (2015-08-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10475 Lines: 308 On Tue 08-12-15 10:30:22, Johannes Weiner wrote: > Socket memory can be a significant share of overall memory consumed by > common workloads. In order to provide reasonable resource isolation in > the unified hierarchy, this type of memory needs to be included in the > tracking/accounting of a cgroup under active memory resource control. > > Overhead is only incurred when a non-root control group is created AND > the memory controller is instructed to track and account the memory > footprint of that group. cgroup.memory=nosocket can be specified on > the boot commandline to override any runtime configuration and > forcibly exclude socket memory from active memory resource control. > > Signed-off-by: Johannes Weiner > Acked-by: David S. Miller > Reviewed-by: Vladimir Davydov Sorry I forgot about this Acked-by: Michal Hocko > --- > Documentation/kernel-parameters.txt | 4 ++ > include/linux/memcontrol.h | 9 ++- > mm/memcontrol.c | 122 +++++++++++++++++++++++++++++------- > 3 files changed, 110 insertions(+), 25 deletions(-) > > diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt > index 742f69d..7868f1b 100644 > --- a/Documentation/kernel-parameters.txt > +++ b/Documentation/kernel-parameters.txt > @@ -599,6 +599,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. > cut the overhead, others just disable the usage. So > only cgroup_disable=memory is actually worthy} > > + cgroup.memory= [KNL] Pass options to the cgroup memory controller. > + Format: > + nosocket -- Disable socket memory accounting. > + > checkreqprot [SELINUX] Set initial checkreqprot flag value. > Format: { "0" | "1" } > See security/selinux/Kconfig help text. > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 654c2fb..863ae8d 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -170,6 +170,9 @@ struct mem_cgroup { > unsigned long low; > unsigned long high; > > + /* Range enforcement for interrupt charges */ > + struct work_struct high_work; > + > unsigned long soft_limit; > > /* vmpressure notifications */ > @@ -684,12 +687,16 @@ void sock_update_memcg(struct sock *sk); > void sock_release_memcg(struct sock *sk); > bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); > void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); > -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) > +#if defined(CONFIG_MEMCG) && defined(CONFIG_INET) > extern struct static_key memcg_sockets_enabled_key; > #define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key) > static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) > { > +#ifdef CONFIG_MEMCG_KMEM > return memcg->tcp_mem.memory_pressure; > +#else > + return false; > +#endif > } > #else > #define mem_cgroup_sockets_enabled 0 > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index ed030b5..59555b0 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -80,6 +80,9 @@ struct mem_cgroup *root_mem_cgroup __read_mostly; > > #define MEM_CGROUP_RECLAIM_RETRIES 5 > > +/* Socket memory accounting disabled? */ > +static bool cgroup_memory_nosocket; > + > /* Whether the swap controller is active */ > #ifdef CONFIG_MEMCG_SWAP > int do_swap_account __read_mostly; > @@ -1923,6 +1926,26 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb, > return NOTIFY_OK; > } > > +static void reclaim_high(struct mem_cgroup *memcg, > + unsigned int nr_pages, > + gfp_t gfp_mask) > +{ > + do { > + if (page_counter_read(&memcg->memory) <= memcg->high) > + continue; > + mem_cgroup_events(memcg, MEMCG_HIGH, 1); > + try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true); > + } while ((memcg = parent_mem_cgroup(memcg))); > +} > + > +static void high_work_func(struct work_struct *work) > +{ > + struct mem_cgroup *memcg; > + > + memcg = container_of(work, struct mem_cgroup, high_work); > + reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL); > +} > + > /* > * Scheduled by try_charge() to be executed from the userland return path > * and reclaims memory over the high limit. > @@ -1930,20 +1953,13 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb, > void mem_cgroup_handle_over_high(void) > { > unsigned int nr_pages = current->memcg_nr_pages_over_high; > - struct mem_cgroup *memcg, *pos; > + struct mem_cgroup *memcg; > > if (likely(!nr_pages)) > return; > > - pos = memcg = get_mem_cgroup_from_mm(current->mm); > - > - do { > - if (page_counter_read(&pos->memory) <= pos->high) > - continue; > - mem_cgroup_events(pos, MEMCG_HIGH, 1); > - try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true); > - } while ((pos = parent_mem_cgroup(pos))); > - > + memcg = get_mem_cgroup_from_mm(current->mm); > + reclaim_high(memcg, nr_pages, GFP_KERNEL); > css_put(&memcg->css); > current->memcg_nr_pages_over_high = 0; > } > @@ -2078,6 +2094,11 @@ done_restock: > */ > do { > if (page_counter_read(&memcg->memory) > memcg->high) { > + /* Don't bother a random interrupted task */ > + if (in_interrupt()) { > + schedule_work(&memcg->high_work); > + break; > + } > current->memcg_nr_pages_over_high += batch; > set_notify_resume(current); > break; > @@ -4126,6 +4147,8 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) > { > int node; > > + cancel_work_sync(&memcg->high_work); > + > mem_cgroup_remove_from_trees(memcg); > > for_each_node(node) > @@ -4172,6 +4195,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) > page_counter_init(&memcg->kmem, NULL); > } > > + INIT_WORK(&memcg->high_work, high_work_func); > memcg->last_scanned_node = MAX_NUMNODES; > INIT_LIST_HEAD(&memcg->oom_notify); > memcg->move_charge_at_immigrate = 0; > @@ -4243,6 +4267,11 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) > if (ret) > return ret; > > +#ifdef CONFIG_INET > + if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) > + static_key_slow_inc(&memcg_sockets_enabled_key); > +#endif > + > /* > * Make sure the memcg is initialized: mem_cgroup_iter() > * orders reading memcg->initialized against its callers > @@ -4282,6 +4311,10 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) > struct mem_cgroup *memcg = mem_cgroup_from_css(css); > > memcg_destroy_kmem(memcg); > +#ifdef CONFIG_INET > + if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) > + static_key_slow_dec(&memcg_sockets_enabled_key); > +#endif > __mem_cgroup_free(memcg); > } > > @@ -5470,8 +5503,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage) > commit_charge(newpage, memcg, true); > } > > -/* Writing them here to avoid exposing memcg's inner layout */ > -#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) > +#ifdef CONFIG_INET > > struct static_key memcg_sockets_enabled_key; > EXPORT_SYMBOL(memcg_sockets_enabled_key); > @@ -5496,10 +5528,15 @@ void sock_update_memcg(struct sock *sk) > > rcu_read_lock(); > memcg = mem_cgroup_from_task(current); > - if (memcg != root_mem_cgroup && > - memcg->tcp_mem.active && > - css_tryget_online(&memcg->css)) > + if (memcg == root_mem_cgroup) > + goto out; > +#ifdef CONFIG_MEMCG_KMEM > + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !memcg->tcp_mem.active) > + goto out; > +#endif > + if (css_tryget_online(&memcg->css)) > sk->sk_memcg = memcg; > +out: > rcu_read_unlock(); > } > EXPORT_SYMBOL(sock_update_memcg); > @@ -5520,15 +5557,30 @@ void sock_release_memcg(struct sock *sk) > */ > bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) > { > - struct page_counter *counter; > + gfp_t gfp_mask = GFP_KERNEL; > > - if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated, > - nr_pages, &counter)) { > - memcg->tcp_mem.memory_pressure = 0; > - return true; > +#ifdef CONFIG_MEMCG_KMEM > + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) { > + struct page_counter *counter; > + > + if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated, > + nr_pages, &counter)) { > + memcg->tcp_mem.memory_pressure = 0; > + return true; > + } > + page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages); > + memcg->tcp_mem.memory_pressure = 1; > + return false; > } > - page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages); > - memcg->tcp_mem.memory_pressure = 1; > +#endif > + /* Don't block in the packet receive path */ > + if (in_softirq()) > + gfp_mask = GFP_NOWAIT; > + > + if (try_charge(memcg, gfp_mask, nr_pages) == 0) > + return true; > + > + try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages); > return false; > } > > @@ -5539,10 +5591,32 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) > */ > void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) > { > - page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages); > +#ifdef CONFIG_MEMCG_KMEM > + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) { > + page_counter_uncharge(&memcg->tcp_mem.memory_allocated, > + nr_pages); > + return; > + } > +#endif > + page_counter_uncharge(&memcg->memory, nr_pages); > + css_put_many(&memcg->css, nr_pages); > } > > -#endif > +#endif /* CONFIG_INET */ > + > +static int __init cgroup_memory(char *s) > +{ > + char *token; > + > + while ((token = strsep(&s, ",")) != NULL) { > + if (!*token) > + continue; > + if (!strcmp(token, "nosocket")) > + cgroup_memory_nosocket = true; > + } > + return 0; > +} > +__setup("cgroup.memory=", cgroup_memory); > > /* > * subsys_initcall() for memory controller. > -- > 2.6.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- Michal Hocko SUSE Labs -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/