2016-04-06 08:40:41

by Vlastimil Babka

[permalink] [raw]
Subject: [PATCH] cpuset: use static key better and convert to new API

An important function for cpusets is cpuset_node_allowed(), which optimizes on
the fact if there's a single root CPU set, it must be trivially allowed. But
the check "nr_cpusets() <= 1" doesn't use the cpusets_enabled_key static key
the right way where static keys eliminate branching overhead with jump labels.

This patch converts it so that static key is used properly. It's also switched
to the new static key API and the checking functions are converted to return
bool instead of int. We also provide a new variant __cpuset_zone_allowed()
which expects that the static key check was already done and they key was
enabled. This is needed for get_page_from_freelist() where we want to also
avoid the relatively slower check when ALLOC_CPUSET is not set in alloc_flags.

Signed-off-by: Vlastimil Babka <[email protected]>
---
include/linux/cpuset.h | 29 +++++++++++++++++++----------
kernel/cpuset.c | 14 +++++++-------
mm/page_alloc.c | 2 +-
3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index fea160ee5803..d5f5c7f87e36 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -16,26 +16,26 @@

#ifdef CONFIG_CPUSETS

-extern struct static_key cpusets_enabled_key;
+extern struct static_key_false cpusets_enabled_key;
static inline bool cpusets_enabled(void)
{
- return static_key_false(&cpusets_enabled_key);
+ return static_branch_unlikely(&cpusets_enabled_key);
}

static inline int nr_cpusets(void)
{
/* jump label reference count + the top-level cpuset */
- return static_key_count(&cpusets_enabled_key) + 1;
+ return static_key_count(&cpusets_enabled_key.key) + 1;
}

static inline void cpuset_inc(void)
{
- static_key_slow_inc(&cpusets_enabled_key);
+ static_branch_inc(&cpusets_enabled_key);
}

static inline void cpuset_dec(void)
{
- static_key_slow_dec(&cpusets_enabled_key);
+ static_branch_dec(&cpusets_enabled_key);
}

extern int cpuset_init(void);
@@ -48,16 +48,25 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
void cpuset_init_current_mems_allowed(void);
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);

-extern int __cpuset_node_allowed(int node, gfp_t gfp_mask);
+extern bool __cpuset_node_allowed(int node, gfp_t gfp_mask);

-static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
+static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask)
{
- return nr_cpusets() <= 1 || __cpuset_node_allowed(node, gfp_mask);
+ if (cpusets_enabled())
+ return __cpuset_node_allowed(node, gfp_mask);
+ return true;
}

-static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+{
+ return __cpuset_node_allowed(zone_to_nid(z), gfp_mask);
+}
+
+static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{
- return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
+ if (cpusets_enabled())
+ return __cpuset_zone_allowed(z, gfp_mask);
+ return true;
}

extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 00ab5c2b7c5b..37a0b44d101f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -62,7 +62,7 @@
#include <linux/cgroup.h>
#include <linux/wait.h>

-struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE;
+DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);

/* See "Frequency meter" comments, below. */

@@ -2528,27 +2528,27 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* GFP_KERNEL - any node in enclosing hardwalled cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
*/
-int __cpuset_node_allowed(int node, gfp_t gfp_mask)
+bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
{
struct cpuset *cs; /* current cpuset ancestors */
int allowed; /* is allocation in zone z allowed? */
unsigned long flags;

if (in_interrupt())
- return 1;
+ return true;
if (node_isset(node, current->mems_allowed))
- return 1;
+ return true;
/*
* Allow tasks that have access to memory reserves because they have
* been OOM killed to get memory anywhere.
*/
if (unlikely(test_thread_flag(TIF_MEMDIE)))
- return 1;
+ return true;
if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
- return 0;
+ return false;

if (current->flags & PF_EXITING) /* Let dying task have memory */
- return 1;
+ return true;

/* Not hardwall and node outside mems_allowed: scan up cpusets */
spin_lock_irqsave(&callback_lock, flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 59de90d5d3a3..69edac810084 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2650,7 +2650,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,

if (cpusets_enabled() &&
(alloc_flags & ALLOC_CPUSET) &&
- !cpuset_zone_allowed(zone, gfp_mask))
+ !__cpuset_zone_allowed(zone, gfp_mask))
continue;
/*
* Distribute pages in proportion to the individual
--
2.7.4


2016-04-06 08:57:38

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] cpuset: use static key better and convert to new API

Hi Vlastimil,

[auto build test ERROR on v4.6-rc2]
[also build test ERROR on next-20160406]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url: https://github.com/0day-ci/linux/commits/Vlastimil-Babka/cpuset-use-static-key-better-and-convert-to-new-API/20160406-164542
config: x86_64-randconfig-x011-201614 (attached as .config)
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64

All errors (new ones prefixed by >>):

mm/page_alloc.c: In function 'get_page_from_freelist':
>> mm/page_alloc.c:2653:5: error: implicit declaration of function '__cpuset_zone_allowed' [-Werror=implicit-function-declaration]
!__cpuset_zone_allowed(zone, gfp_mask))
^
cc1: some warnings being treated as errors

vim +/__cpuset_zone_allowed +2653 mm/page_alloc.c

2647 for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
2648 ac->nodemask) {
2649 unsigned long mark;
2650
2651 if (cpusets_enabled() &&
2652 (alloc_flags & ALLOC_CPUSET) &&
> 2653 !__cpuset_zone_allowed(zone, gfp_mask))
2654 continue;
2655 /*
2656 * Distribute pages in proportion to the individual

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (1.36 kB)
.config.gz (20.62 kB)
Download all attachments

2016-04-06 08:57:48

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] cpuset: use static key better and convert to new API

Hi Vlastimil,

[auto build test WARNING on v4.6-rc2]
[also build test WARNING on next-20160406]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url: https://github.com/0day-ci/linux/commits/Vlastimil-Babka/cpuset-use-static-key-better-and-convert-to-new-API/20160406-164542
config: x86_64-randconfig-x007-201614 (attached as .config)
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64

All warnings (new ones prefixed by >>):

In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from mm/page_alloc.c:17:
mm/page_alloc.c: In function 'get_page_from_freelist':
mm/page_alloc.c:2653:5: error: implicit declaration of function '__cpuset_zone_allowed' [-Werror=implicit-function-declaration]
!__cpuset_zone_allowed(zone, gfp_mask))
^
include/linux/compiler.h:151:30: note: in definition of macro '__trace_if'
if (__builtin_constant_p(!!(cond)) ? !!(cond) : \
^
>> mm/page_alloc.c:2651:3: note: in expansion of macro 'if'
if (cpusets_enabled() &&
^
cc1: some warnings being treated as errors

vim +/if +2651 mm/page_alloc.c

7fb1d9fca Rohit Seth 2005-11-13 2635 struct page *page = NULL;
5117f45d1 Mel Gorman 2009-06-16 2636 struct zone *zone;
4ffeaf356 Mel Gorman 2014-08-06 2637 int nr_fair_skipped = 0;
4ffeaf356 Mel Gorman 2014-08-06 2638 bool zonelist_rescan;
54a6eb5c4 Mel Gorman 2008-04-28 2639
9276b1bc9 Paul Jackson 2006-12-06 2640 zonelist_scan:
4ffeaf356 Mel Gorman 2014-08-06 2641 zonelist_rescan = false;
4ffeaf356 Mel Gorman 2014-08-06 2642
7fb1d9fca Rohit Seth 2005-11-13 2643 /*
9276b1bc9 Paul Jackson 2006-12-06 2644 * Scan zonelist, looking for a zone with enough free.
344736f29 Vladimir Davydov 2014-10-20 2645 * See also __cpuset_node_allowed() comment in kernel/cpuset.c.
7fb1d9fca Rohit Seth 2005-11-13 2646 */
a9263751e Vlastimil Babka 2015-02-11 2647 for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
a9263751e Vlastimil Babka 2015-02-11 2648 ac->nodemask) {
e085dbc52 Johannes Weiner 2013-09-11 2649 unsigned long mark;
e085dbc52 Johannes Weiner 2013-09-11 2650
664eeddee Mel Gorman 2014-06-04 @2651 if (cpusets_enabled() &&
664eeddee Mel Gorman 2014-06-04 2652 (alloc_flags & ALLOC_CPUSET) &&
a8f315757 Vlastimil Babka 2016-04-06 2653 !__cpuset_zone_allowed(zone, gfp_mask))
cd38b115d Mel Gorman 2011-07-25 2654 continue;
a756cf590 Johannes Weiner 2012-01-10 2655 /*
81c0a2bb5 Johannes Weiner 2013-09-11 2656 * Distribute pages in proportion to the individual
81c0a2bb5 Johannes Weiner 2013-09-11 2657 * zone size to ensure fair page aging. The zone a
81c0a2bb5 Johannes Weiner 2013-09-11 2658 * page was allocated in should have no effect on the
81c0a2bb5 Johannes Weiner 2013-09-11 2659 * time the page has in memory before being reclaimed.

:::::: The code at line 2651 was first introduced by commit
:::::: 664eeddeef6539247691197c1ac124d4aa872ab6 mm: page_alloc: use jump labels to avoid checking number_of_cpusets

:::::: TO: Mel Gorman <[email protected]>
:::::: CC: Linus Torvalds <[email protected]>

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (3.48 kB)
.config.gz (28.65 kB)
Download all attachments

2016-04-06 09:16:56

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH] cpuset: use static key better and convert to new API

On 04/06/2016 10:56 AM, kbuild test robot wrote:
> Hi Vlastimil,
>
> [auto build test ERROR on v4.6-rc2]
> [also build test ERROR on next-20160406]
> [if your patch is applied to the wrong git tree, please drop us a note to help improving the system]
>
> url: https://github.com/0day-ci/linux/commits/Vlastimil-Babka/cpuset-use-static-key-better-and-convert-to-new-API/20160406-164542
> config: x86_64-randconfig-x011-201614 (attached as .config)
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=x86_64
>
> All errors (new ones prefixed by >>):
>
> mm/page_alloc.c: In function 'get_page_from_freelist':
>>> mm/page_alloc.c:2653:5: error: implicit declaration of function '__cpuset_zone_allowed' [-Werror=implicit-function-declaration]

Ah, forgot about !CONFIG_CPUSETS. Sorry, I'll send v2.

2016-04-06 09:20:03

by Vlastimil Babka

[permalink] [raw]
Subject: [PATCH v2] cpuset: use static key better and convert to new API

An important function for cpusets is cpuset_node_allowed(), which optimizes on
the fact if there's a single root CPU set, it must be trivially allowed. But
the check "nr_cpusets() <= 1" doesn't use the cpusets_enabled_key static key
the right way where static keys eliminate branching overhead with jump labels.

This patch converts it so that static key is used properly. It's also switched
to the new static key API and the checking functions are converted to return
bool instead of int. We also provide a new variant __cpuset_zone_allowed()
which expects that the static key check was already done and they key was
enabled. This is needed for get_page_from_freelist() where we want to also
avoid the relatively slower check when ALLOC_CPUSET is not set in alloc_flags.

Signed-off-by: Vlastimil Babka <[email protected]>
---
v2: fix !CONFIG_CPUSETS thanks to kbuild test robot

include/linux/cpuset.h | 42 ++++++++++++++++++++++++++++--------------
kernel/cpuset.c | 14 +++++++-------
mm/page_alloc.c | 2 +-
3 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index fea160ee5803..054c734d0170 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -16,26 +16,26 @@

#ifdef CONFIG_CPUSETS

-extern struct static_key cpusets_enabled_key;
+extern struct static_key_false cpusets_enabled_key;
static inline bool cpusets_enabled(void)
{
- return static_key_false(&cpusets_enabled_key);
+ return static_branch_unlikely(&cpusets_enabled_key);
}

static inline int nr_cpusets(void)
{
/* jump label reference count + the top-level cpuset */
- return static_key_count(&cpusets_enabled_key) + 1;
+ return static_key_count(&cpusets_enabled_key.key) + 1;
}

static inline void cpuset_inc(void)
{
- static_key_slow_inc(&cpusets_enabled_key);
+ static_branch_inc(&cpusets_enabled_key);
}

static inline void cpuset_dec(void)
{
- static_key_slow_dec(&cpusets_enabled_key);
+ static_branch_dec(&cpusets_enabled_key);
}

extern int cpuset_init(void);
@@ -48,16 +48,25 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
void cpuset_init_current_mems_allowed(void);
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);

-extern int __cpuset_node_allowed(int node, gfp_t gfp_mask);
+extern bool __cpuset_node_allowed(int node, gfp_t gfp_mask);

-static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
+static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask)
{
- return nr_cpusets() <= 1 || __cpuset_node_allowed(node, gfp_mask);
+ if (cpusets_enabled())
+ return __cpuset_node_allowed(node, gfp_mask);
+ return true;
}

-static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{
- return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
+ return __cpuset_node_allowed(zone_to_nid(z), gfp_mask);
+}
+
+static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+{
+ if (cpusets_enabled())
+ return __cpuset_zone_allowed(z, gfp_mask);
+ return true;
}

extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
@@ -174,14 +183,19 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
return 1;
}

-static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
+static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask)
{
- return 1;
+ return true;
}

-static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{
- return 1;
+ return true;
+}
+
+static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+{
+ return true;
}

static inline int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 00ab5c2b7c5b..37a0b44d101f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -62,7 +62,7 @@
#include <linux/cgroup.h>
#include <linux/wait.h>

-struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE;
+DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);

/* See "Frequency meter" comments, below. */

@@ -2528,27 +2528,27 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* GFP_KERNEL - any node in enclosing hardwalled cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
*/
-int __cpuset_node_allowed(int node, gfp_t gfp_mask)
+bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
{
struct cpuset *cs; /* current cpuset ancestors */
int allowed; /* is allocation in zone z allowed? */
unsigned long flags;

if (in_interrupt())
- return 1;
+ return true;
if (node_isset(node, current->mems_allowed))
- return 1;
+ return true;
/*
* Allow tasks that have access to memory reserves because they have
* been OOM killed to get memory anywhere.
*/
if (unlikely(test_thread_flag(TIF_MEMDIE)))
- return 1;
+ return true;
if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
- return 0;
+ return false;

if (current->flags & PF_EXITING) /* Let dying task have memory */
- return 1;
+ return true;

/* Not hardwall and node outside mems_allowed: scan up cpusets */
spin_lock_irqsave(&callback_lock, flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 59de90d5d3a3..69edac810084 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2650,7 +2650,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,

if (cpusets_enabled() &&
(alloc_flags & ALLOC_CPUSET) &&
- !cpuset_zone_allowed(zone, gfp_mask))
+ !__cpuset_zone_allowed(zone, gfp_mask))
continue;
/*
* Distribute pages in proportion to the individual
--
2.7.4

2016-04-14 08:34:06

by Zefan Li

[permalink] [raw]
Subject: Re: [PATCH v2] cpuset: use static key better and convert to new API

On 2016/4/6 17:19, Vlastimil Babka wrote:
> An important function for cpusets is cpuset_node_allowed(), which optimizes on
> the fact if there's a single root CPU set, it must be trivially allowed. But
> the check "nr_cpusets() <= 1" doesn't use the cpusets_enabled_key static key
> the right way where static keys eliminate branching overhead with jump labels.
>
> This patch converts it so that static key is used properly. It's also switched
> to the new static key API and the checking functions are converted to return
> bool instead of int. We also provide a new variant __cpuset_zone_allowed()
> which expects that the static key check was already done and they key was
> enabled. This is needed for get_page_from_freelist() where we want to also
> avoid the relatively slower check when ALLOC_CPUSET is not set in alloc_flags.
>
> Signed-off-by: Vlastimil Babka <[email protected]>

Looks good to me.

Acked-by: Zefan Li <[email protected]>