2022-09-04 04:13:00

by Zhongkun He

[permalink] [raw]
Subject: [PATCH] cgroup/cpuset: Add a new isolated mems.policy type.

From: Zhongkun He <[email protected]>

Mempolicy is difficult to use because it is set in-process
via a system call. We want to make it easier to use mempolicy
in cpuset, and we can control low-priority cgroups to
allocate memory in specified nodes. So this patch want to
adds the mempolicy interface in cpuset.

The mempolicy priority of cpuset is lower than the task.
The order of getting the policy is:
1) vma mempolicy
2) task->mempolicy
3) cpuset->mempolicy
4) default policy.

cpuset's policy is owned by itself, but descendants will
get the default mempolicy from parent.

How to use the mempolicy interface:
echo prefer:2 > /sys/fs/cgroup/zz/cpuset.mems.policy
echo bind:1-3 > /sys/fs/cgroup/zz/cpuset.mems.policy
echo interleave:0,1,2,3 >/sys/fs/cgroup/zz/cpuset.mems.policy
Show the policy:
cat /sys/fs/cgroup/zz/cpuset.mems.policy
prefer:2
cat /sys/fs/cgroup/zz/cpuset.mems.policy
bind:1-3
cat /sys/fs/cgroup/zz/cpuset.mems.policy
interleave:0-3
Clear the policy:
echo default > /sys/fs/cgroup/zz/cpuset.mems.policy

Signed-off-by: Zhongkun He <[email protected]>
---
include/linux/mempolicy.h | 9 ++
include/linux/sched.h | 2 +
kernel/cgroup/cpuset.c | 182 +++++++++++++++++++++++++++++++++++++-
kernel/fork.c | 1 +
mm/mempolicy.c | 25 ++++--
5 files changed, 209 insertions(+), 10 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 668389b4b53d..41c4b28fb71f 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -142,6 +142,7 @@ extern void numa_default_policy(void);
extern void numa_policy_init(void);
extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new);
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
+extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask);

extern int huge_node(struct vm_area_struct *vma,
unsigned long addr, gfp_t gfp_flags,
@@ -211,6 +212,11 @@ static inline void mpol_get(struct mempolicy *pol)
{
}

+static inline struct mempolicy *mpol_dup(struct mempolicy *pol)
+{
+ return NULL;
+}
+
struct shared_policy {};

static inline void mpol_shared_policy_init(struct shared_policy *sp,
@@ -252,6 +258,9 @@ static inline void mpol_rebind_task(struct task_struct *tsk,
static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
{
}
+static inline void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
+{
+}

static inline int huge_node(struct vm_area_struct *vma,
unsigned long addr, gfp_t gfp_flags,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e7b2f8a5c711..f935e1707e7c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1236,6 +1236,8 @@ struct task_struct {
unsigned long preempt_disable_ip;
#endif
#ifdef CONFIG_NUMA
+ /* cpuset memory policy */
+ struct mempolicy *cs_mpol;
/* Protected by alloc_lock: */
struct mempolicy *mempolicy;
short il_prev;
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 1f3a55297f39..19fd359dc8d8 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -118,6 +118,9 @@ struct cpuset {
cpumask_var_t effective_cpus;
nodemask_t effective_mems;

+ /*cpuset mem policy */
+ struct mempolicy *mempolicy;
+
/*
* CPUs allocated to child sub-partitions (default hierarchy only)
* - CPUs granted by the parent = effective_cpus U subparts_cpus
@@ -378,6 +381,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work);
static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);

static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
+static void cpuset_change_task_cs_mpol(struct task_struct *tsk,
+ struct mempolicy *mpol);
+static inline void update_cs_mpol_nodemask(struct cpuset *cs,
+ struct mempolicy *mpol);

static inline void check_insane_mems_config(nodemask_t *nodes)
{
@@ -570,7 +577,10 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
if (!trial)
return NULL;

+ mpol_get(trial->mempolicy);
+
if (alloc_cpumasks(trial, NULL)) {
+ mpol_put(trial->mempolicy);
kfree(trial);
return NULL;
}
@@ -587,6 +597,7 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
static inline void free_cpuset(struct cpuset *cs)
{
free_cpumasks(cs, NULL);
+ mpol_put(cs->mempolicy);
kfree(cs);
}

@@ -1849,6 +1860,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)

spin_lock_irq(&callback_lock);
cp->effective_mems = *new_mems;
+ update_cs_mpol_nodemask(cp, cp->mempolicy);
spin_unlock_irq(&callback_lock);

WARN_ON(!is_in_v2_mode() &&
@@ -2304,7 +2316,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
* fail. TODO: have a better way to handle failure here
*/
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-
+ /*update the cpuset mempolicy to task*/
+ cpuset_change_task_cs_mpol(task, cs->mempolicy);
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
cpuset_update_task_spread_flag(cs, task);
}
@@ -2441,6 +2454,140 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
return retval;
}

+#ifdef CONFIG_NUMA
+
+/*update the mpol nodemask based on cpuset nodemask*/
+static inline void update_cs_mpol_nodemask(struct cpuset *cs, struct mempolicy *mpol)
+{
+ nodemask_t newmems;
+
+ if (!mpol)
+ return;
+ nodes_and(newmems, cs->effective_mems, mpol->w.user_nodemask);
+
+ /*If no intersection between effective_mems and mpol user_nodemask,
+ *use effective_mems to allocate mems.
+ */
+ if (!nodes_weight(newmems))
+ newmems = cs->effective_mems;
+ mpol_rebind_policy(mpol, &newmems);
+}
+
+/*update cpuset policy for task*/
+static void cpuset_change_task_cs_mpol(struct task_struct *tsk,
+ struct mempolicy *mpol)
+{
+ struct mempolicy *old = NULL;
+
+ task_lock(tsk);
+ local_irq_disable();
+ write_seqcount_begin(&tsk->mems_allowed_seq);
+
+ old = tsk->cs_mpol;
+ tsk->cs_mpol = mpol;
+ mpol_get(mpol);
+ tsk->il_prev = 0;
+
+ write_seqcount_end(&tsk->mems_allowed_seq);
+ local_irq_enable();
+ task_unlock(tsk);
+ mpol_put(old);
+}
+
+static void update_tasks_cs_mpol(struct cpuset *cs)
+{
+ struct css_task_iter it;
+ struct task_struct *task;
+
+ css_task_iter_start(&cs->css, 0, &it);
+
+ while ((task = css_task_iter_next(&it)))
+ cpuset_change_task_cs_mpol(task, cs->mempolicy);
+ css_task_iter_end(&it);
+}
+
+/* change cpuset mempolicy */
+static ssize_t cpuset_mpol_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct mempolicy *mpol, *old = NULL;
+ struct cpuset *cs = css_cs(of_css(of));
+ nodemask_t cs_allowed;
+ int err = -ENODEV;
+
+ css_get(&cs->css);
+ kernfs_break_active_protection(of->kn);
+ percpu_down_write(&cpuset_rwsem);
+
+ if (!is_cpuset_online(cs))
+ goto out_unlock;
+
+ buf = strstrip(buf);
+ err = mpol_parse_str(buf, &mpol);
+
+ if (err) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ spin_lock_irq(&callback_lock);
+ old = cs->mempolicy;
+ update_cs_mpol_nodemask(cs, mpol);
+ cs->mempolicy = mpol;
+ spin_unlock_irq(&callback_lock);
+
+ update_tasks_cs_mpol(cs);
+
+out_unlock:
+ percpu_up_write(&cpuset_rwsem);
+ kernfs_unbreak_active_protection(of->kn);
+ css_put(&cs->css);
+
+ if (old) {
+ /*Wait for outstanding programs to complete.*/
+ synchronize_rcu();
+ mpol_put(old);
+ }
+ return err ?: nbytes;
+}
+
+/*show cpuset mempolicy*/
+static int cpuset_mpol_show(struct seq_file *seq, void *v)
+{
+ char buffer[64];
+ int ret = 0;
+ struct mempolicy *mpol;
+ struct cpuset *cs = css_cs(seq_css(seq));
+
+ memset(buffer, 0, sizeof(buffer));
+ spin_lock_irq(&callback_lock);
+ mpol = cs->mempolicy;
+
+ if (!mpol || mpol->mode == MPOL_DEFAULT)
+ goto out_unlock;
+
+ mpol_to_str(buffer, sizeof(buffer), mpol);
+ seq_printf(seq, buffer);
+ seq_putc(seq, '\n');
+
+out_unlock:
+ spin_unlock_irq(&callback_lock);
+ return ret;
+}
+
+#else
+static void cpuset_change_task_cs_mpol(struct task_struct *tsk,
+ struct mempolicy *mpol)
+{
+}
+
+static inline void update_cs_mpol_nodemask(struct cpuset *cs,
+ struct mempolicy *mpol)
+{
+}
+
+#endif
+
/*
* Common handling for a write to a "cpus" or "mems" file.
*/
@@ -2678,7 +2825,14 @@ static struct cftype legacy_files[] = {
.seq_show = cpuset_common_seq_show,
.private = FILE_EFFECTIVE_MEMLIST,
},
-
+#ifdef CONFIG_NUMA
+ {
+ .name = "mems_policy",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = cpuset_mpol_show,
+ .write = cpuset_mpol_write,
+ },
+#endif
{
.name = "cpu_exclusive",
.read_u64 = cpuset_read_u64,
@@ -2786,7 +2940,14 @@ static struct cftype dfl_files[] = {
.seq_show = cpuset_common_seq_show,
.private = FILE_EFFECTIVE_MEMLIST,
},
-
+#ifdef CONFIG_NUMA
+ {
+ .name = "mems.policy",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = cpuset_mpol_show,
+ .write = cpuset_mpol_write,
+ },
+#endif
{
.name = "cpus.partition",
.seq_show = sched_partition_show,
@@ -2835,6 +2996,21 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
fmeter_init(&cs->fmeter);
cs->relax_domain_level = -1;

+ if (IS_ENABLED(CONFIG_NUMA)) {
+ struct cpuset *pcs = css_cs(parent_css);
+ struct mempolicy *new;
+
+ /*Inherit mempolicy from parent.*/
+ spin_lock_irq(&callback_lock);
+ new = mpol_dup(pcs->mempolicy);
+
+ if (IS_ERR(new))
+ new = NULL;
+
+ cs->mempolicy = new;
+ spin_unlock_irq(&callback_lock);
+ }
+
/* Set CS_MEMORY_MIGRATE for default hierarchy */
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
__set_bit(CS_MEMORY_MIGRATE, &cs->flags);
diff --git a/kernel/fork.c b/kernel/fork.c
index 90c85b17bf69..3f695449e2a5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2190,6 +2190,7 @@ static __latent_entropy struct task_struct *copy_process(
p->mempolicy = NULL;
goto bad_fork_cleanup_delayacct;
}
+ mpol_get(p->cs_mpol); /*ref cpuset mempolicy*/
#endif
#ifdef CONFIG_CPUSETS
p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b73d3248d976..144f79887df9 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -158,9 +158,15 @@ int numa_map_to_online_node(int node)
}
EXPORT_SYMBOL_GPL(numa_map_to_online_node);

+static inline struct mempolicy *task_or_cs_mpol(struct task_struct *p)
+{
+ return p->mempolicy ?
+ p->mempolicy : p->cs_mpol;
+}
+
struct mempolicy *get_task_policy(struct task_struct *p)
{
- struct mempolicy *pol = p->mempolicy;
+ struct mempolicy *pol = task_or_cs_mpol(p);
int node;

if (pol)
@@ -349,7 +355,7 @@ static void mpol_rebind_preferred(struct mempolicy *pol,
* policies are protected by task->mems_allowed_seq to prevent a premature
* OOM/allocation failure due to parallel nodemask modification.
*/
-static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
+void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
{
if (!pol || pol->mode == MPOL_LOCAL)
return;
@@ -1895,7 +1901,7 @@ unsigned int mempolicy_slab_node(void)
if (!in_task())
return node;

- policy = current->mempolicy;
+ policy = task_or_cs_mpol(current);
if (!policy)
return node;

@@ -2043,7 +2049,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
return false;

task_lock(current);
- mempolicy = current->mempolicy;
+ mempolicy = task_or_cs_mpol(current);
switch (mempolicy->mode) {
case MPOL_PREFERRED:
case MPOL_PREFERRED_MANY:
@@ -2633,13 +2639,16 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
*/
void mpol_put_task_policy(struct task_struct *task)
{
- struct mempolicy *pol;
+ struct mempolicy *pol, *cs_pol;

task_lock(task);
pol = task->mempolicy;
+ cs_pol = task->cs_mpol;
+ task->cs_mpol = NULL;
task->mempolicy = NULL;
task_unlock(task);
mpol_put(pol);
+ mpol_put(cs_pol);
}

static void sp_delete(struct shared_policy *sp, struct sp_node *n)
@@ -2957,8 +2966,7 @@ static const char * const policy_modes[] =
[MPOL_PREFERRED_MANY] = "prefer (many)",
};

-
-#ifdef CONFIG_TMPFS
+#if defined(CONFIG_TMPFS) || defined(CONFIG_NUMA)
/**
* mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
* @str: string containing mempolicy to parse
@@ -3054,6 +3062,9 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
mode_flags |= MPOL_F_RELATIVE_NODES;
else
goto out;
+ } else {
+ /*use static mode_flags in default*/
+ mode_flags |= MPOL_F_STATIC_NODES;
}

new = mpol_new(mode, mode_flags, &nodes);

base-commit: c40e8341e3b3bb27e3a65b06b5b454626234c4f0
--
2.25.1


2022-09-04 06:21:53

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] cgroup/cpuset: Add a new isolated mems.policy type.

Hi hezhongkun,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on c40e8341e3b3bb27e3a65b06b5b454626234c4f0]

url: https://github.com/intel-lab-lkp/linux/commits/hezhongkun/cgroup-cpuset-Add-a-new-isolated-mems-policy-type/20220904-120436
base: c40e8341e3b3bb27e3a65b06b5b454626234c4f0
config: x86_64-randconfig-a014
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/aafa4e2c9e5c3cd4d02190ac2a7ed0654ed9cf47
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review hezhongkun/cgroup-cpuset-Add-a-new-isolated-mems-policy-type/20220904-120436
git checkout aafa4e2c9e5c3cd4d02190ac2a7ed0654ed9cf47
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash kernel/cgroup/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

>> kernel/cgroup/cpuset.c:2515:13: warning: unused variable 'cs_allowed' [-Wunused-variable]
nodemask_t cs_allowed;
^
1 warning generated.


vim +/cs_allowed +2515 kernel/cgroup/cpuset.c

2508
2509 /* change cpuset mempolicy */
2510 static ssize_t cpuset_mpol_write(struct kernfs_open_file *of,
2511 char *buf, size_t nbytes, loff_t off)
2512 {
2513 struct mempolicy *mpol, *old = NULL;
2514 struct cpuset *cs = css_cs(of_css(of));
> 2515 nodemask_t cs_allowed;
2516 int err = -ENODEV;
2517
2518 css_get(&cs->css);
2519 kernfs_break_active_protection(of->kn);
2520 percpu_down_write(&cpuset_rwsem);
2521
2522 if (!is_cpuset_online(cs))
2523 goto out_unlock;
2524
2525 buf = strstrip(buf);
2526 err = mpol_parse_str(buf, &mpol);
2527
2528 if (err) {
2529 err = -EINVAL;
2530 goto out_unlock;
2531 }
2532
2533 spin_lock_irq(&callback_lock);
2534 old = cs->mempolicy;
2535 update_cs_mpol_nodemask(cs, mpol);
2536 cs->mempolicy = mpol;
2537 spin_unlock_irq(&callback_lock);
2538
2539 update_tasks_cs_mpol(cs);
2540
2541 out_unlock:
2542 percpu_up_write(&cpuset_rwsem);
2543 kernfs_unbreak_active_protection(of->kn);
2544 css_put(&cs->css);
2545
2546 if (old) {
2547 /*Wait for outstanding programs to complete.*/
2548 synchronize_rcu();
2549 mpol_put(old);
2550 }
2551 return err ?: nbytes;
2552 }
2553

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (2.90 kB)
config (165.43 kB)
Download all attachments

2022-09-04 06:36:33

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] cgroup/cpuset: Add a new isolated mems.policy type.

Hi hezhongkun,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on c40e8341e3b3bb27e3a65b06b5b454626234c4f0]

url: https://github.com/intel-lab-lkp/linux/commits/hezhongkun/cgroup-cpuset-Add-a-new-isolated-mems-policy-type/20220904-120436
base: c40e8341e3b3bb27e3a65b06b5b454626234c4f0
config: x86_64-defconfig
compiler: gcc-11 (Debian 11.3.0-5) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/aafa4e2c9e5c3cd4d02190ac2a7ed0654ed9cf47
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review hezhongkun/cgroup-cpuset-Add-a-new-isolated-mems-policy-type/20220904-120436
git checkout aafa4e2c9e5c3cd4d02190ac2a7ed0654ed9cf47
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash kernel/cgroup/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

kernel/cgroup/cpuset.c: In function 'cpuset_mpol_write':
>> kernel/cgroup/cpuset.c:2515:20: warning: unused variable 'cs_allowed' [-Wunused-variable]
2515 | nodemask_t cs_allowed;
| ^~~~~~~~~~


vim +/cs_allowed +2515 kernel/cgroup/cpuset.c

2508
2509 /* change cpuset mempolicy */
2510 static ssize_t cpuset_mpol_write(struct kernfs_open_file *of,
2511 char *buf, size_t nbytes, loff_t off)
2512 {
2513 struct mempolicy *mpol, *old = NULL;
2514 struct cpuset *cs = css_cs(of_css(of));
> 2515 nodemask_t cs_allowed;
2516 int err = -ENODEV;
2517
2518 css_get(&cs->css);
2519 kernfs_break_active_protection(of->kn);
2520 percpu_down_write(&cpuset_rwsem);
2521
2522 if (!is_cpuset_online(cs))
2523 goto out_unlock;
2524
2525 buf = strstrip(buf);
2526 err = mpol_parse_str(buf, &mpol);
2527
2528 if (err) {
2529 err = -EINVAL;
2530 goto out_unlock;
2531 }
2532
2533 spin_lock_irq(&callback_lock);
2534 old = cs->mempolicy;
2535 update_cs_mpol_nodemask(cs, mpol);
2536 cs->mempolicy = mpol;
2537 spin_unlock_irq(&callback_lock);
2538
2539 update_tasks_cs_mpol(cs);
2540
2541 out_unlock:
2542 percpu_up_write(&cpuset_rwsem);
2543 kernfs_unbreak_active_protection(of->kn);
2544 css_put(&cs->css);
2545
2546 if (old) {
2547 /*Wait for outstanding programs to complete.*/
2548 synchronize_rcu();
2549 mpol_put(old);
2550 }
2551 return err ?: nbytes;
2552 }
2553

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (2.69 kB)
config (137.82 kB)
Download all attachments

2022-09-04 08:21:58

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] cgroup/cpuset: Add a new isolated mems.policy type.

Hi hezhongkun,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on c40e8341e3b3bb27e3a65b06b5b454626234c4f0]

url: https://github.com/intel-lab-lkp/linux/commits/hezhongkun/cgroup-cpuset-Add-a-new-isolated-mems-policy-type/20220904-120436
base: c40e8341e3b3bb27e3a65b06b5b454626234c4f0
config: s390-randconfig-r044-20220904
compiler: s390-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/aafa4e2c9e5c3cd4d02190ac2a7ed0654ed9cf47
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review hezhongkun/cgroup-cpuset-Add-a-new-isolated-mems-policy-type/20220904-120436
git checkout aafa4e2c9e5c3cd4d02190ac2a7ed0654ed9cf47
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

>> mm/mempolicy.c:2980:5: warning: no previous prototype for 'mpol_parse_str' [-Wmissing-prototypes]
2980 | int mpol_parse_str(char *str, struct mempolicy **mpol)
| ^~~~~~~~~~~~~~


vim +/mpol_parse_str +2980 mm/mempolicy.c

1a75a6c825c172 Christoph Lameter 2006-01-08 2968
aafa4e2c9e5c3c Zhongkun He 2022-09-04 2969 #if defined(CONFIG_TMPFS) || defined(CONFIG_NUMA)
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2970 /**
f2a07f40dbc603 Hugh Dickins 2013-01-02 2971 * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2972 * @str: string containing mempolicy to parse
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 2973 * @mpol: pointer to struct mempolicy pointer, returned on success.
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2974 *
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2975 * Format of input:
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2976 * <mode>[=<flags>][:<nodelist>]
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2977 *
dad5b023294981 Randy Dunlap 2022-01-14 2978 * Return: %0 on success, else %1
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2979 */
a7a88b23737095 Hugh Dickins 2013-01-02 @2980 int mpol_parse_str(char *str, struct mempolicy **mpol)
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2981 {
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 2982 struct mempolicy *new = NULL;
f2a07f40dbc603 Hugh Dickins 2013-01-02 2983 unsigned short mode_flags;
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 2984 nodemask_t nodes;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2985 char *nodelist = strchr(str, ':');
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2986 char *flags = strchr(str, '=');
dedf2c73b80b45 zhong jiang 2018-10-26 2987 int err = 1, mode;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2988
c7a91bc7c2e17e Dan Carpenter 2020-01-30 2989 if (flags)
c7a91bc7c2e17e Dan Carpenter 2020-01-30 2990 *flags++ = '\0'; /* terminate mode string */
c7a91bc7c2e17e Dan Carpenter 2020-01-30 2991
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2992 if (nodelist) {
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2993 /* NUL-terminate mode or flags string */
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2994 *nodelist++ = '\0';
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 2995 if (nodelist_parse(nodelist, nodes))
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2996 goto out;
01f13bd607346a Lai Jiangshan 2012-12-12 2997 if (!nodes_subset(nodes, node_states[N_MEMORY]))
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 2998 goto out;
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 2999 } else
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3000 nodes_clear(nodes);
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3001
dedf2c73b80b45 zhong jiang 2018-10-26 3002 mode = match_string(policy_modes, MPOL_MAX, str);
dedf2c73b80b45 zhong jiang 2018-10-26 3003 if (mode < 0)
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3004 goto out;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3005
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3006 switch (mode) {
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3007 case MPOL_PREFERRED:
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3008 /*
aa9f7d5172fac9 Randy Dunlap 2020-04-01 3009 * Insist on a nodelist of one node only, although later
aa9f7d5172fac9 Randy Dunlap 2020-04-01 3010 * we use first_node(nodes) to grab a single node, so here
aa9f7d5172fac9 Randy Dunlap 2020-04-01 3011 * nodelist (or nodes) cannot be empty.
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3012 */
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3013 if (nodelist) {
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3014 char *rest = nodelist;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3015 while (isdigit(*rest))
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3016 rest++;
926f2ae04f1830 KOSAKI Motohiro 2010-03-23 3017 if (*rest)
926f2ae04f1830 KOSAKI Motohiro 2010-03-23 3018 goto out;
aa9f7d5172fac9 Randy Dunlap 2020-04-01 3019 if (nodes_empty(nodes))
aa9f7d5172fac9 Randy Dunlap 2020-04-01 3020 goto out;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3021 }
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3022 break;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3023 case MPOL_INTERLEAVE:
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3024 /*
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3025 * Default to online nodes with memory if no nodelist
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3026 */
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3027 if (!nodelist)
01f13bd607346a Lai Jiangshan 2012-12-12 3028 nodes = node_states[N_MEMORY];
3f226aa1cbc006 Lee Schermerhorn 2008-04-28 3029 break;
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3030 case MPOL_LOCAL:
3f226aa1cbc006 Lee Schermerhorn 2008-04-28 3031 /*
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3032 * Don't allow a nodelist; mpol_new() checks flags
3f226aa1cbc006 Lee Schermerhorn 2008-04-28 3033 */
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3034 if (nodelist)
3f226aa1cbc006 Lee Schermerhorn 2008-04-28 3035 goto out;
3f226aa1cbc006 Lee Schermerhorn 2008-04-28 3036 break;
413b43deab8377 Ravikiran G Thirumalai 2010-03-23 3037 case MPOL_DEFAULT:
413b43deab8377 Ravikiran G Thirumalai 2010-03-23 3038 /*
413b43deab8377 Ravikiran G Thirumalai 2010-03-23 3039 * Insist on a empty nodelist
413b43deab8377 Ravikiran G Thirumalai 2010-03-23 3040 */
413b43deab8377 Ravikiran G Thirumalai 2010-03-23 3041 if (!nodelist)
413b43deab8377 Ravikiran G Thirumalai 2010-03-23 3042 err = 0;
413b43deab8377 Ravikiran G Thirumalai 2010-03-23 3043 goto out;
b27abaccf8e8b0 Dave Hansen 2021-09-02 3044 case MPOL_PREFERRED_MANY:
d69b2e63e9172a KOSAKI Motohiro 2010-03-23 3045 case MPOL_BIND:
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3046 /*
d69b2e63e9172a KOSAKI Motohiro 2010-03-23 3047 * Insist on a nodelist
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3048 */
d69b2e63e9172a KOSAKI Motohiro 2010-03-23 3049 if (!nodelist)
d69b2e63e9172a KOSAKI Motohiro 2010-03-23 3050 goto out;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3051 }
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3052
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3053 mode_flags = 0;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3054 if (flags) {
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3055 /*
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3056 * Currently, we only support two mutually exclusive
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3057 * mode flags.
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3058 */
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3059 if (!strcmp(flags, "static"))
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3060 mode_flags |= MPOL_F_STATIC_NODES;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3061 else if (!strcmp(flags, "relative"))
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3062 mode_flags |= MPOL_F_RELATIVE_NODES;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3063 else
926f2ae04f1830 KOSAKI Motohiro 2010-03-23 3064 goto out;
aafa4e2c9e5c3c Zhongkun He 2022-09-04 3065 } else {
aafa4e2c9e5c3c Zhongkun He 2022-09-04 3066 /*use static mode_flags in default*/
aafa4e2c9e5c3c Zhongkun He 2022-09-04 3067 mode_flags |= MPOL_F_STATIC_NODES;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3068 }
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3069
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3070 new = mpol_new(mode, mode_flags, &nodes);
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3071 if (IS_ERR(new))
926f2ae04f1830 KOSAKI Motohiro 2010-03-23 3072 goto out;
926f2ae04f1830 KOSAKI Motohiro 2010-03-23 3073
f2a07f40dbc603 Hugh Dickins 2013-01-02 3074 /*
f2a07f40dbc603 Hugh Dickins 2013-01-02 3075 * Save nodes for mpol_to_str() to show the tmpfs mount options
f2a07f40dbc603 Hugh Dickins 2013-01-02 3076 * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo.
f2a07f40dbc603 Hugh Dickins 2013-01-02 3077 */
269fbe72cded0a Ben Widawsky 2021-06-30 3078 if (mode != MPOL_PREFERRED) {
269fbe72cded0a Ben Widawsky 2021-06-30 3079 new->nodes = nodes;
269fbe72cded0a Ben Widawsky 2021-06-30 3080 } else if (nodelist) {
269fbe72cded0a Ben Widawsky 2021-06-30 3081 nodes_clear(new->nodes);
269fbe72cded0a Ben Widawsky 2021-06-30 3082 node_set(first_node(nodes), new->nodes);
269fbe72cded0a Ben Widawsky 2021-06-30 3083 } else {
7858d7bca7fbbb Feng Tang 2021-06-30 3084 new->mode = MPOL_LOCAL;
269fbe72cded0a Ben Widawsky 2021-06-30 3085 }
f2a07f40dbc603 Hugh Dickins 2013-01-02 3086
f2a07f40dbc603 Hugh Dickins 2013-01-02 3087 /*
f2a07f40dbc603 Hugh Dickins 2013-01-02 3088 * Save nodes for contextualization: this will be used to "clone"
f2a07f40dbc603 Hugh Dickins 2013-01-02 3089 * the mempolicy in a specific context [cpuset] at a later time.
f2a07f40dbc603 Hugh Dickins 2013-01-02 3090 */
e17f74af351cce Lee Schermerhorn 2010-05-24 3091 new->w.user_nodemask = nodes;
f2a07f40dbc603 Hugh Dickins 2013-01-02 3092
926f2ae04f1830 KOSAKI Motohiro 2010-03-23 3093 err = 0;
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3094
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3095 out:
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3096 /* Restore string for error message */
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3097 if (nodelist)
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3098 *--nodelist = ':';
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3099 if (flags)
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3100 *--flags = '=';
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3101 if (!err)
71fe804b6d56d6 Lee Schermerhorn 2008-04-28 3102 *mpol = new;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3103 return err;
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3104 }
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3105 #endif /* CONFIG_TMPFS */
095f1fc4ebf36c Lee Schermerhorn 2008-04-28 3106

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (12.18 kB)
config (73.64 kB)
Download all attachments

2022-09-04 23:40:45

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] cgroup/cpuset: Add a new isolated mems.policy type.

Hi hezhongkun,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on c40e8341e3b3bb27e3a65b06b5b454626234c4f0]

url: https://github.com/intel-lab-lkp/linux/commits/hezhongkun/cgroup-cpuset-Add-a-new-isolated-mems-policy-type/20220904-120436
base: c40e8341e3b3bb27e3a65b06b5b454626234c4f0
config: ia64-randconfig-c44-20220904
compiler: ia64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/aafa4e2c9e5c3cd4d02190ac2a7ed0654ed9cf47
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review hezhongkun/cgroup-cpuset-Add-a-new-isolated-mems-policy-type/20220904-120436
git checkout aafa4e2c9e5c3cd4d02190ac2a7ed0654ed9cf47
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=ia64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

kernel/cgroup/cpuset.c: In function 'cpuset_mpol_write':
>> kernel/cgroup/cpuset.c:2526:15: error: implicit declaration of function 'mpol_parse_str'; did you mean 'mpol_to_str'? [-Werror=implicit-function-declaration]
2526 | err = mpol_parse_str(buf, &mpol);
| ^~~~~~~~~~~~~~
| mpol_to_str
kernel/cgroup/cpuset.c:2515:20: warning: unused variable 'cs_allowed' [-Wunused-variable]
2515 | nodemask_t cs_allowed;
| ^~~~~~~~~~
cc1: some warnings being treated as errors


vim +2526 kernel/cgroup/cpuset.c

2508
2509 /* change cpuset mempolicy */
2510 static ssize_t cpuset_mpol_write(struct kernfs_open_file *of,
2511 char *buf, size_t nbytes, loff_t off)
2512 {
2513 struct mempolicy *mpol, *old = NULL;
2514 struct cpuset *cs = css_cs(of_css(of));
2515 nodemask_t cs_allowed;
2516 int err = -ENODEV;
2517
2518 css_get(&cs->css);
2519 kernfs_break_active_protection(of->kn);
2520 percpu_down_write(&cpuset_rwsem);
2521
2522 if (!is_cpuset_online(cs))
2523 goto out_unlock;
2524
2525 buf = strstrip(buf);
> 2526 err = mpol_parse_str(buf, &mpol);
2527
2528 if (err) {
2529 err = -EINVAL;
2530 goto out_unlock;
2531 }
2532
2533 spin_lock_irq(&callback_lock);
2534 old = cs->mempolicy;
2535 update_cs_mpol_nodemask(cs, mpol);
2536 cs->mempolicy = mpol;
2537 spin_unlock_irq(&callback_lock);
2538
2539 update_tasks_cs_mpol(cs);
2540
2541 out_unlock:
2542 percpu_up_write(&cpuset_rwsem);
2543 kernfs_unbreak_active_protection(of->kn);
2544 css_put(&cs->css);
2545
2546 if (old) {
2547 /*Wait for outstanding programs to complete.*/
2548 synchronize_rcu();
2549 mpol_put(old);
2550 }
2551 return err ?: nbytes;
2552 }
2553

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (3.19 kB)
config (151.48 kB)
Download all attachments

2022-09-05 06:57:17

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH] cgroup/cpuset: Add a new isolated mems.policy type.

On Sun 04-09-22 12:02:41, hezhongkun wrote:
> From: Zhongkun He <[email protected]>
>
> Mempolicy is difficult to use because it is set in-process
> via a system call. We want to make it easier to use mempolicy
> in cpuset, and we can control low-priority cgroups to
> allocate memory in specified nodes. So this patch want to
> adds the mempolicy interface in cpuset.
>
> The mempolicy priority of cpuset is lower than the task.
> The order of getting the policy is:
> 1) vma mempolicy
> 2) task->mempolicy
> 3) cpuset->mempolicy
> 4) default policy.
>
> cpuset's policy is owned by itself, but descendants will
> get the default mempolicy from parent.

What is the hierarchical behavior of the policy? Say parent has a
stronger requirement (say bind) than a child (prefer)?

> How to use the mempolicy interface:
> echo prefer:2 > /sys/fs/cgroup/zz/cpuset.mems.policy
> echo bind:1-3 > /sys/fs/cgroup/zz/cpuset.mems.policy
> echo interleave:0,1,2,3 >/sys/fs/cgroup/zz/cpuset.mems.policy

Am I just confused or did you really mean to combine all these
together?
--
Michal Hocko
SUSE Labs

2022-09-05 12:02:59

by Zhongkun He

[permalink] [raw]
Subject: Re: [External] Re: [PATCH] cgroup/cpuset: Add a new isolated mems.policy type.

Hi Michal, thanks for your reply.

The current 'mempolicy' is hierarchically independent. The default value
of the child is to inherit from the parent. The modification of the
child policy will not be restricted by the parent.

Of course, there are other options, such as the child's policy mode must
be the same as the parent's. node can be the subset of parent's, but the
interleave type will be complicated, that's why hierarchy independence
is used. It would be better if you have other suggestions?

Thanks.

> On Sun 04-09-22 12:02:41, hezhongkun wrote:
>> From: Zhongkun He <[email protected]>
>>
>> Mempolicy is difficult to use because it is set in-process
>> via a system call. We want to make it easier to use mempolicy
>> in cpuset, and we can control low-priority cgroups to
>> allocate memory in specified nodes. So this patch want to
>> adds the mempolicy interface in cpuset.
>>
>> The mempolicy priority of cpuset is lower than the task.
>> The order of getting the policy is:
>> 1) vma mempolicy
>> 2) task->mempolicy
>> 3) cpuset->mempolicy
>> 4) default policy.
>>
>> cpuset's policy is owned by itself, but descendants will
>> get the default mempolicy from parent.
>
> What is the hierarchical behavior of the policy? Say parent has a
> stronger requirement (say bind) than a child (prefer)?
>
>> How to use the mempolicy interface:
>> echo prefer:2 > /sys/fs/cgroup/zz/cpuset.mems.policy
>> echo bind:1-3 > /sys/fs/cgroup/zz/cpuset.mems.policy
>> echo interleave:0,1,2,3 >/sys/fs/cgroup/zz/cpuset.mems.policy
>
> Am I just confused or did you really mean to combine all these
> together?