2011-02-24 06:55:14

by Li Zefan

[permalink] [raw]
Subject: [PATCH v2 1/4] cpuset: Remove unneeded NODEMASK_ALLOC() in cpuset_sprintf_memlist()

It's not necessary to copy cpuset->mems_allowed to a buffer
allocated by NODEMASK_ALLOC(). Just pass it to nodelist_scnprintf().

As spotted by Paul, a side effect is we fix a bug that the function
can return -ENOMEM but the caller doesn't expect negative return
value. Therefore change the return value of cpuset_sprintf_cpulist()
and cpuset_sprintf_memlist() from int to size_t.

Acked-by: Paul Menage <[email protected]>
Acked-by: David Rientjes <[email protected]>
Signed-off-by: Li Zefan <[email protected]>
---
kernel/cpuset.c | 24 ++++++++----------------
1 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 10f1835..e79650b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1607,34 +1607,26 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
* across a page fault.
*/

-static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
+static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
{
- int ret;
+ size_t count;

mutex_lock(&callback_mutex);
- ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
+ count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
mutex_unlock(&callback_mutex);

- return ret;
+ return count;
}

-static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
+static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
{
- NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL);
- int retval;
-
- if (mask == NULL)
- return -ENOMEM;
+ size_t count;

mutex_lock(&callback_mutex);
- *mask = cs->mems_allowed;
+ count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
mutex_unlock(&callback_mutex);

- retval = nodelist_scnprintf(page, PAGE_SIZE, *mask);
-
- NODEMASK_FREE(mask);
-
- return retval;
+ return count;
}

static ssize_t cpuset_common_file_read(struct cgroup *cont,
--
1.7.3.1


2011-02-24 06:55:32

by Li Zefan

[permalink] [raw]
Subject: [PATCH v2 2/4] cpuset: Remove unneeded NODEMASK_ALLOC() in cpuset_attch()

The variable 'from' is not modified after it's copied from
oldcs->mems_allowed, so we can just pass oldcs->mems_allowed
to cpuset_migrate_mm().

Acked-by: David Rientjes <[email protected]>
Signed-off-by: Li Zefan <[email protected]>
---
kernel/cpuset.c | 7 ++-----
1 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e79650b..8fef8c6 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1438,10 +1438,9 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
struct mm_struct *mm;
struct cpuset *cs = cgroup_cs(cont);
struct cpuset *oldcs = cgroup_cs(oldcont);
- NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL);
NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);

- if (from == NULL || to == NULL)
+ if (to == NULL)
goto alloc_fail;

if (cs == &top_cpuset) {
@@ -1463,18 +1462,16 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
}

/* change mm; only needs to be done once even if threadgroup */
- *from = oldcs->mems_allowed;
*to = cs->mems_allowed;
mm = get_task_mm(tsk);
if (mm) {
mpol_rebind_mm(mm, to);
if (is_memory_migrate(cs))
- cpuset_migrate_mm(mm, from, to);
+ cpuset_migrate_mm(mm, &oldcs->mems_allowed, to);
mmput(mm);
}

alloc_fail:
- NODEMASK_FREE(from);
NODEMASK_FREE(to);
}

--
1.7.3.1

2011-02-24 06:55:49

by Li Zefan

[permalink] [raw]
Subject: [PATCH v2 3/4] cpuset: Fix unchecked calls to NODEMASK_ALLOC()

Those functions that use NODEMASK_ALLOC() can't propogate errno
to users, so might fail silently.

Fix it by using a static nodemask_t variable for each function, and
those variables are protected by cgroup_mutex;

Acked-by: David Rientjes <[email protected]>
Signed-off-by: Li Zefan <[email protected]>
---
kernel/cpuset.c | 51 ++++++++++++++++-----------------------------------
1 files changed, 16 insertions(+), 35 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8fef8c6..3f93e5a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1015,17 +1015,12 @@ static void cpuset_change_nodemask(struct task_struct *p,
struct cpuset *cs;
int migrate;
const nodemask_t *oldmem = scan->data;
- NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL);
-
- if (!newmems)
- return;
+ static nodemask_t newmems; /* protected by cgroup_mutex */

cs = cgroup_cs(scan->cg);
- guarantee_online_mems(cs, newmems);
+ guarantee_online_mems(cs, &newmems);

- cpuset_change_task_nodemask(p, newmems);
-
- NODEMASK_FREE(newmems);
+ cpuset_change_task_nodemask(p, &newmems);

mm = get_task_mm(p);
if (!mm)
@@ -1438,41 +1433,35 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
struct mm_struct *mm;
struct cpuset *cs = cgroup_cs(cont);
struct cpuset *oldcs = cgroup_cs(oldcont);
- NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
-
- if (to == NULL)
- goto alloc_fail;
+ static nodemask_t to; /* protected by cgroup_mutex */

if (cs == &top_cpuset) {
cpumask_copy(cpus_attach, cpu_possible_mask);
} else {
guarantee_online_cpus(cs, cpus_attach);
}
- guarantee_online_mems(cs, to);
+ guarantee_online_mems(cs, &to);

/* do per-task migration stuff possibly for each in the threadgroup */
- cpuset_attach_task(tsk, to, cs);
+ cpuset_attach_task(tsk, &to, cs);
if (threadgroup) {
struct task_struct *c;
rcu_read_lock();
list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
- cpuset_attach_task(c, to, cs);
+ cpuset_attach_task(c, &to, cs);
}
rcu_read_unlock();
}

/* change mm; only needs to be done once even if threadgroup */
- *to = cs->mems_allowed;
+ to = cs->mems_allowed;
mm = get_task_mm(tsk);
if (mm) {
- mpol_rebind_mm(mm, to);
+ mpol_rebind_mm(mm, &to);
if (is_memory_migrate(cs))
- cpuset_migrate_mm(mm, &oldcs->mems_allowed, to);
+ cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
mmput(mm);
}
-
-alloc_fail:
- NODEMASK_FREE(to);
}

/* The various types of files and directories in a cpuset file system */
@@ -2051,10 +2040,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
struct cpuset *cp; /* scans cpusets being updated */
struct cpuset *child; /* scans child cpusets of cp */
struct cgroup *cont;
- NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
-
- if (oldmems == NULL)
- return;
+ static nodemask_t oldmems; /* protected by cgroup_mutex */

list_add_tail((struct list_head *)&root->stack_list, &queue);

@@ -2071,7 +2057,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
continue;

- *oldmems = cp->mems_allowed;
+ oldmems = cp->mems_allowed;

/* Remove offline cpus and mems from this cpuset. */
mutex_lock(&callback_mutex);
@@ -2087,10 +2073,9 @@ static void scan_for_empty_cpusets(struct cpuset *root)
remove_tasks_in_empty_cpuset(cp);
else {
update_tasks_cpumask(cp, NULL);
- update_tasks_nodemask(cp, oldmems, NULL);
+ update_tasks_nodemask(cp, &oldmems, NULL);
}
}
- NODEMASK_FREE(oldmems);
}

/*
@@ -2132,19 +2117,16 @@ void cpuset_update_active_cpus(void)
static int cpuset_track_online_nodes(struct notifier_block *self,
unsigned long action, void *arg)
{
- NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
-
- if (oldmems == NULL)
- return NOTIFY_DONE;
+ static nodemask_t oldmems; /* protected by cgroup_mutex */

cgroup_lock();
switch (action) {
case MEM_ONLINE:
- *oldmems = top_cpuset.mems_allowed;
+ oldmems = top_cpuset.mems_allowed;
mutex_lock(&callback_mutex);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
mutex_unlock(&callback_mutex);
- update_tasks_nodemask(&top_cpuset, oldmems, NULL);
+ update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
break;
case MEM_OFFLINE:
/*
@@ -2158,7 +2140,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
}
cgroup_unlock();

- NODEMASK_FREE(oldmems);
return NOTIFY_OK;
}
#endif
--
1.7.3.1

2011-02-24 06:56:08

by Li Zefan

[permalink] [raw]
Subject: [PATCH v2 4/4] cpuset: Hold callback_mutex in cpuset_clone()

Chaning cpuset->mems/cpuset->cpus should be protected under
callback_mutex.

cpuset_post_clone() doesn't follow this rule. It's ok because it's
called when creating/initializing a cgroup, but we'd better
hold the lock to avoid subtil break in the future.

Acked-by: Paul Menage <[email protected]>
Acked-by: David Rientjes <[email protected]>
Signed-off-by: Li Zefan <[email protected]>
---
kernel/cpuset.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3f93e5a..1ca786a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1836,8 +1836,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
cs = cgroup_cs(cgroup);
parent_cs = cgroup_cs(parent);

+ mutex_lock(&callback_mutex);
cs->mems_allowed = parent_cs->mems_allowed;
cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
+ mutex_unlock(&callback_mutex);
return;
}

--
1.7.3.1