2007-10-12 09:34:35

by Milton Miller

[permalink] [raw]
Subject: [PATCH] sched domain debug sysctl fixes

The following 5 patches provide fixes for the sysctl code added under
CONFIG_DEBUG_SCHED. After debugging a boot failure dereferencing
not per-cpu data for an impossible cpu, I found the other issues by
inspection.

Following these fixes, /proc/sysctl/kernel/sched_debug/ should work with
exclusive cpusets, cpu hotplug, offline and not present cpus, and will
show all 11 files.

milton


2007-10-12 09:35:40

by Milton Miller

[permalink] [raw]
Subject: [PATCH] sched domain sysctl: free and rebuild when changing domains

Unregister and free the sysctl table before destroying domains, then
rebuild and register after creating the new domains. This prevents the
sysctl table from pointing to freed memory for root to write.

Signed-off-by: Milton Miller <[email protected]>

Index: kernel/kernel/sched.c
===================================================================
--- kernel.orig/kernel/sched.c 2007-10-12 03:55:19.000000000 -0500
+++ kernel/kernel/sched.c 2007-10-12 03:58:35.000000000 -0500
@@ -5291,6 +5291,18 @@ static struct ctl_table *sd_alloc_ctl_en
return entry;
}

+static void sd_free_ctl_entry(struct ctl_table **tablep)
+{
+ struct ctl_table *entry = *tablep;
+
+ for (entry = *tablep; entry->procname; entry++)
+ if (entry->child)
+ sd_free_ctl_entry(&entry->child);
+
+ kfree(*tablep);
+ *tablep = NULL;
+}
+
static void
set_table_entry(struct ctl_table *entry,
const char *procname, void *data, int maxlen,
@@ -5359,7 +5371,7 @@ static ctl_table *sd_alloc_ctl_cpu_table
}

static struct ctl_table_header *sd_sysctl_header;
-static void init_sched_domain_sysctl(void)
+static void register_sched_domain_sysctl(void)
{
int i, cpu_num = num_online_cpus();
struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
@@ -5376,8 +5388,18 @@ static void init_sched_domain_sysctl(voi
}
sd_sysctl_header = register_sysctl_table(sd_ctl_root);
}
+
+static void unregister_sched_domain_sysctl(void)
+{
+ unregister_sysctl_table(sd_sysctl_header);
+ sd_sysctl_header = NULL;
+ sd_free_ctl_entry(&sd_ctl_dir[0].child);
+}
#else
-static void init_sched_domain_sysctl(void)
+static void register_sched_domain_sysctl(void)
+{
+}
+static void unregister_sched_domain_sysctl(void)
{
}
#endif
@@ -6311,6 +6333,8 @@ static int arch_init_sched_domains(const

err = build_sched_domains(&cpu_default_map);

+ register_sched_domain_sysctl();
+
return err;
}

@@ -6327,6 +6351,8 @@ static void detach_destroy_domains(const
{
int i;

+ unregister_sched_domain_sysctl();
+
for_each_cpu_mask(i, *cpu_map)
cpu_attach_domain(NULL, i);
synchronize_sched();
@@ -6357,6 +6383,8 @@ int partition_sched_domains(cpumask_t *p
if (!err && !cpus_empty(*partition2))
err = build_sched_domains(partition2);

+ register_sched_domain_sysctl();
+
return err;
}

@@ -6488,8 +6516,6 @@ void __init sched_init_smp(void)
/* XXX: Theoretical race here - CPU may be hotplugged now */
hotcpu_notifier(update_sched_domains, 0);

- init_sched_domain_sysctl();
-
/* Move init over to a non-isolated CPU */
if (set_cpus_allowed(current, non_isolated_cpus) < 0)
BUG();

2007-10-12 09:35:55

by Milton Miller

[permalink] [raw]
Subject: [PATCH] sched.c: use kcalloc

kcalloc checks for n * sizeof(element) overflows and it zeros.

Signed-off-by: Milton Miller <[email protected]>


Index: kernel/kernel/sched.c
===================================================================
--- kernel.orig/kernel/sched.c 2007-10-12 03:58:35.000000000 -0500
+++ kernel/kernel/sched.c 2007-10-12 03:58:38.000000000 -0500
@@ -5283,10 +5283,9 @@ static struct ctl_table sd_ctl_root[] =
static struct ctl_table *sd_alloc_ctl_entry(int n)
{
struct ctl_table *entry =
- kmalloc(n * sizeof(struct ctl_table), GFP_KERNEL);
+ kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);

BUG_ON(!entry);
- memset(entry, 0, n * sizeof(struct ctl_table));

return entry;
}
@@ -6080,7 +6079,7 @@ static int build_sched_domains(const cpu
/*
* Allocate the per-node list of sched groups
*/
- sched_group_nodes = kzalloc(sizeof(struct sched_group *)*MAX_NUMNODES,
+ sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *),
GFP_KERNEL);
if (!sched_group_nodes) {
printk(KERN_WARNING "Can not alloc sched group node list\n");

2007-10-12 09:36:20

by Milton Miller

[permalink] [raw]
Subject: [PATCH] sched domain sysctl: register online cpus

init_sched_domain_sysctl was walking cpus 0-n and referencing per_cpu
variables. If the cpus_possible mask is not contigious this will result
in a crash referencing unallocated data. If the online mask is not
contigious then we would show offline cpus and miss online ones.

Signed-off-by: Milton Miller <[email protected]>

Index: kernel/kernel/sched.c
===================================================================
--- kernel.orig/kernel/sched.c 2007-10-11 23:48:10.000000000 -0500
+++ kernel/kernel/sched.c 2007-10-11 23:55:51.000000000 -0500
@@ -5367,11 +5367,12 @@ static void init_sched_domain_sysctl(voi

sd_ctl_dir[0].child = entry;

- for (i = 0; i < cpu_num; i++, entry++) {
+ for_each_online_cpu(i) {
snprintf(buf, 32, "cpu%d", i);
entry->procname = kstrdup(buf, GFP_KERNEL);
entry->mode = 0555;
entry->child = sd_alloc_ctl_cpu_table(i);
+ entry++;
}
sd_sysctl_header = register_sysctl_table(sd_ctl_root);
}

2007-10-12 09:36:41

by Milton Miller

[permalink] [raw]
Subject: [PATCH] sched domain sysctl: sysctl tables can have no holes

The register_sysctl_table stops on the first entry without a
procname and ctl_name, so we need to fill in consecutive entries.

Signed-off-by: Milton Miller <[email protected]>

Index: kernel/kernel/sched.c
===================================================================
--- kernel.orig/kernel/sched.c 2007-10-12 03:59:07.000000000 -0500
+++ kernel/kernel/sched.c 2007-10-12 03:59:16.000000000 -0500
@@ -5315,7 +5315,7 @@ set_table_entry(struct ctl_table *entry,
static struct ctl_table *
sd_alloc_ctl_domain_table(struct sched_domain *sd)
{
- struct ctl_table *table = sd_alloc_ctl_entry(14);
+ struct ctl_table *table = sd_alloc_ctl_entry(12);

if (table == NULL)
return NULL;
@@ -5338,11 +5338,12 @@ sd_alloc_ctl_domain_table(struct sched_d
sizeof(int), 0644, proc_dointvec_minmax);
set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
sizeof(int), 0644, proc_dointvec_minmax);
- set_table_entry(&table[10], "cache_nice_tries",
+ set_table_entry(&table[9], "cache_nice_tries",
&sd->cache_nice_tries,
sizeof(int), 0644, proc_dointvec_minmax);
- set_table_entry(&table[12], "flags", &sd->flags,
+ set_table_entry(&table[10], "flags", &sd->flags,
sizeof(int), 0644, proc_dointvec_minmax);
+ /* &table[11] is terminator */

return table;
}

2007-10-12 09:37:03

by Milton Miller

[permalink] [raw]
Subject: [PATCH] sched domain sysctl: don't bug on alloc failure

Now that we are calling this at runtime, a more relaxed error path is
suggested. If an allocation fails, we just register the partial table,
which will show empty directories.

Signed-off-by: Milton Miller <[email protected]>

Index: kernel/kernel/sched.c
===================================================================
--- kernel.orig/kernel/sched.c 2007-10-12 03:59:02.000000000 -0500
+++ kernel/kernel/sched.c 2007-10-12 03:59:07.000000000 -0500
@@ -5285,8 +5285,6 @@ static struct ctl_table *sd_alloc_ctl_en
struct ctl_table *entry =
kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);

- BUG_ON(!entry);
-
return entry;
}

@@ -5319,6 +5317,9 @@ sd_alloc_ctl_domain_table(struct sched_d
{
struct ctl_table *table = sd_alloc_ctl_entry(14);

+ if (table == NULL)
+ return NULL;
+
set_table_entry(&table[0], "min_interval", &sd->min_interval,
sizeof(long), 0644, proc_doulongvec_minmax);
set_table_entry(&table[1], "max_interval", &sd->max_interval,
@@ -5356,6 +5357,8 @@ static ctl_table *sd_alloc_ctl_cpu_table
for_each_domain(cpu, sd)
domain_num++;
entry = table = sd_alloc_ctl_entry(domain_num + 1);
+ if (table == NULL)
+ return NULL;

i = 0;
for_each_domain(cpu, sd) {
@@ -5376,6 +5379,9 @@ static void register_sched_domain_sysctl
struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
char buf[32];

+ if (entry == NULL)
+ return;
+
sd_ctl_dir[0].child = entry;

for_each_online_cpu(i) {

2007-10-12 09:54:27

by Milton Miller

[permalink] [raw]
Subject: [PATCH] sched domain debug sysctl fixes


I forgot to put the seqence numbers in the patch subjects (I did
in the message ids).

Here is the order I diffed them:

sched domain sysctl: register online cpus
sched domain sysctl: free and rebuild when changing domains
sched.c: use kcalloc
sched domain sysctl: don't bug on alloc failure
sched domain sysctl: sysctl tables can have no holes

milton

2007-10-15 07:47:21

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] sched domain debug sysctl fixes


* Milton Miller <[email protected]> wrote:

> The following 5 patches provide fixes for the sysctl code added under
> CONFIG_DEBUG_SCHED. After debugging a boot failure dereferencing not
> per-cpu data for an impossible cpu, I found the other issues by
> inspection.
>
> Following these fixes, /proc/sysctl/kernel/sched_debug/ should work
> with exclusive cpusets, cpu hotplug, offline and not present cpus, and
> will show all 11 files.

thanks, great fixes! They should show up in the next sched-devel git
tree.

Ingo

2007-10-15 23:41:23

by Milton Miller

[permalink] [raw]
Subject: [PATCH] sched domain sysctl: free kstrdup allocations

The procnames for the cpu and domain were allocated via kstrdup and so
should also be freed. The names for the files are static, but we
can differentiate them by the presence of the proc_handler. If a
kstrdup (of < 32 characters) fails the sysctl code will not see the
procname or remaining table entries, but any child tables and names
will be reclaimed upon free.

Signed-off-by: Milton Miller <[email protected]>
---

Hi Ingo.

It occurred to me this morning that the procname field was dynamically
allocated and needed to be freed. I started to put in break statements
when allocation failed but it was approaching 50% error handling code.

I came up with this alternative of looping while entry->mode is set and
checking proc_handler instead of ->table. Alternatively, the string
version of the domain name and cpu number could be stored the structs.

I verified by compiling CONFIG_DEBUG_SLAB and checking the allocation
counts after taking a cpuset exclusive and back.

milton

Index: kernel/kernel/sched.c
===================================================================
--- kernel.orig/kernel/sched.c 2007-10-15 12:21:38.000000000 -0500
+++ kernel/kernel/sched.c 2007-10-15 12:22:12.000000000 -0500
@@ -5290,11 +5290,20 @@ static struct ctl_table *sd_alloc_ctl_en

static void sd_free_ctl_entry(struct ctl_table **tablep)
{
- struct ctl_table *entry = *tablep;
+ struct ctl_table *entry;

- for (entry = *tablep; entry->procname; entry++)
+ /*
+ * In the intermediate directories, both the child directory and
+ * procname are dynamically allocated and could fail but the mode
+ * will always be set. In the lowest directory the names are
+ * static strings and all have proc handlers.
+ */
+ for (entry = *tablep; entry->mode; entry++) {
if (entry->child)
sd_free_ctl_entry(&entry->child);
+ if (entry->proc_handler == NULL)
+ kfree(entry->procname);
+ }

kfree(*tablep);
*tablep = NULL;

2007-10-16 08:44:43

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] sched domain sysctl: free kstrdup allocations


* Milton Miller <[email protected]> wrote:

> The procnames for the cpu and domain were allocated via kstrdup and so
> should also be freed. The names for the files are static, but we can
> differentiate them by the presence of the proc_handler. If a kstrdup
> (of < 32 characters) fails the sysctl code will not see the procname
> or remaining table entries, but any child tables and names will be
> reclaimed upon free.

thanks, applied.

Ingo