2011-03-29 23:46:53

by Mike Travis

[permalink] [raw]
Subject: [PATCH] bitmap, irq: Add smp_affinity_list interface to /proc/irq

Subject: bitmap, irq: Add smp_affinity_list interface to /proc/irq

Manually adjusting the smp_affinity for IRQ's becomes unwieldy when the
cpu count is large. Add a companion interface, smp_affinity_list to
use cpu lists instead of cpu maps. This conforms to other companion
interfaces where both a map and a list interface exists.

This required adding a bitmap_parselist_user() function in a manner
similar to the bitmap_parse_user() function.

Signed-off-by: Mike Travis <[email protected]>
---
include/linux/bitmap.h | 5 +-
include/linux/cpumask.h | 15 ++++++
kernel/irq/proc.c | 54 ++++++++++++++++++++++-
lib/bitmap.c | 109 ++++++++++++++++++++++++++++++++++++++++++------
4 files changed, 166 insertions(+), 17 deletions(-)

--- linux-2.6.32.orig/include/linux/bitmap.h
+++ linux-2.6.32/include/linux/bitmap.h
@@ -52,7 +52,8 @@
* bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf
* bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf
* bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf
- * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list
+ * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf
+ * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf
* bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region
* bitmap_release_region(bitmap, pos, order) Free specified bit region
* bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region
@@ -118,6 +119,8 @@ extern int bitmap_scnlistprintf(char *bu
const unsigned long *src, int nbits);
extern int bitmap_parselist(const char *buf, unsigned long *maskp,
int nmaskbits);
+extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
+ unsigned long *dst, int nbits);
extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
const unsigned long *old, const unsigned long *new, int bits);
extern int bitmap_bitremap(int oldbit,
--- linux-2.6.32.orig/include/linux/cpumask.h
+++ linux-2.6.32/include/linux/cpumask.h
@@ -533,6 +533,21 @@ static inline int cpumask_parse_user(con
}

/**
+ * cpumask_parselist_user - extract a cpumask from a user string
+ * @buf: the buffer to extract from
+ * @len: the length of the buffer
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpumask_parselist_user(const char __user *buf, int len,
+ struct cpumask *dstp)
+{
+ return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
+ nr_cpumask_bits);
+}
+
+/**
* cpulist_scnprintf - print a cpumask into a string as comma-separated list
* @buf: the buffer to sprintf into
* @len: the length of the buffer
--- linux-2.6.32.orig/kernel/irq/proc.c
+++ linux-2.6.32/kernel/irq/proc.c
@@ -17,7 +17,7 @@ static struct proc_dir_entry *root_irq_d

#ifdef CONFIG_SMP

-static int irq_affinity_proc_show(struct seq_file *m, void *v)
+static int show_irq_affinity(int type, struct seq_file *m, void *v)
{
struct irq_desc *desc = irq_to_desc((long)m->private);
const struct cpumask *mask = desc->affinity;
@@ -26,17 +26,31 @@ static int irq_affinity_proc_show(struct
if (desc->status & IRQ_MOVE_PENDING)
mask = desc->pending_mask;
#endif
- seq_cpumask(m, mask);
+ if (type)
+ seq_cpumask_list(m, mask);
+ else
+ seq_cpumask(m, mask);
seq_putc(m, '\n');
return 0;
}

+static int irq_affinity_proc_show(struct seq_file *m, void *v)
+{
+ return show_irq_affinity(0, m, v);
+}
+
+static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
+{
+ return show_irq_affinity(1, m, v);
+}
+
+
#ifndef is_affinity_mask_valid
#define is_affinity_mask_valid(val) 1
#endif

int no_irq_affinity;
-static ssize_t irq_affinity_proc_write(struct file *file,
+static ssize_t write_irq_affinity(int type, struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
{
unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
@@ -50,7 +64,10 @@ static ssize_t irq_affinity_proc_write(s
if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
return -ENOMEM;

- err = cpumask_parse_user(buffer, count, new_value);
+ if (type)
+ err = cpumask_parselist_user(buffer, count, new_value);
+ else
+ err = cpumask_parse_user(buffer, count, new_value);
if (err)
goto free_cpumask;

@@ -78,11 +95,28 @@ free_cpumask:
return err;
}

+static ssize_t irq_affinity_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *pos)
+{
+ return write_irq_affinity(0, file, buffer, count, pos);
+}
+
+static ssize_t irq_affinity_list_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *pos)
+{
+ return write_irq_affinity(1, file, buffer, count, pos);
+}
+
static int irq_affinity_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
}

+static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data);
+}
+
static const struct file_operations irq_affinity_proc_fops = {
.open = irq_affinity_proc_open,
.read = seq_read,
@@ -91,6 +125,14 @@ static const struct file_operations irq_
.write = irq_affinity_proc_write,
};

+static const struct file_operations irq_affinity_list_proc_fops = {
+ .open = irq_affinity_list_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = irq_affinity_list_proc_write,
+};
+
static int default_affinity_show(struct seq_file *m, void *v)
{
seq_cpumask(m, irq_default_affinity);
@@ -240,6 +282,10 @@ void register_irq_proc(unsigned int irq,
proc_create_data("smp_affinity", 0600, desc->dir,
&irq_affinity_proc_fops, (void *)(long)irq);

+ /* create /proc/irq/<irq>/smp_affinity_list */
+ proc_create_data("smp_affinity_list", 0600, desc->dir,
+ &irq_affinity_list_proc_fops, (void *)(long)irq);
+
proc_create_data("node", 0444, desc->dir,
&irq_node_proc_fops, (void *)(long)irq);
#endif
--- linux-2.6.32.orig/lib/bitmap.c
+++ linux-2.6.32/lib/bitmap.c
@@ -491,8 +491,11 @@ int bitmap_scnlistprintf(char *buf, unsi
EXPORT_SYMBOL(bitmap_scnlistprintf);

/**
- * bitmap_parselist - convert list format ASCII string to bitmap
+ * __bitmap_parselist - convert list format ASCII string to bitmap
* @bp: read nul-terminated user string from this buffer
+ * @buflen: buffer size in bytes. If string is smaller than this
+ * then it must be terminated with a \0.
+ * @is_user: location of buffer, 0 indicates kernel space
* @maskp: write resulting mask here
* @nmaskbits: number of bits in mask to be written
*
@@ -507,20 +510,63 @@ EXPORT_SYMBOL(bitmap_scnlistprintf);
* %-EINVAL: invalid character in string
* %-ERANGE: bit number specified too large for mask
*/
-int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+int __bitmap_parselist(const char *buf, unsigned int buflen,
+ int is_user, unsigned long *maskp,
+ int nmaskbits)
{
unsigned a, b;
+ int c, old_c, totaldigits;
+ const char __user *ubuf = buf;
+ int exp_digit, in_range;

+ totaldigits = c = 0;
bitmap_zero(maskp, nmaskbits);
do {
- if (!isdigit(*bp))
- return -EINVAL;
- b = a = simple_strtoul(bp, (char **)&bp, BASEDEC);
- if (*bp == '-') {
- bp++;
- if (!isdigit(*bp))
+ exp_digit = 1;
+ in_range = 0;
+ a = b = 0;
+
+ /* Get the next cpu# or a range of cpu#'s */
+ while (buflen) {
+ old_c = c;
+ if (is_user) {
+ if (__get_user(c, ubuf++))
+ return -EFAULT;
+ } else
+ c = *buf++;
+ buflen--;
+ if (isspace(c))
+ continue;
+
+ /*
+ * If the last character was a space and the current
+ * character isn't '\0', we've got embedded whitespace.
+ * This is a no-no, so throw an error.
+ */
+ if (totaldigits && c && isspace(old_c))
return -EINVAL;
- b = simple_strtoul(bp, (char **)&bp, BASEDEC);
+
+ /* A '\0' or a ',' signal the end of a cpu# or range */
+ if (c == '\0' || c == ',')
+ break;
+
+ if (c == '-') {
+ if (exp_digit || in_range)
+ return -EINVAL;
+ b = 0;
+ in_range = 1;
+ exp_digit = 1;
+ continue;
+ }
+
+ if (!isdigit(c))
+ return -EINVAL;
+
+ b = b * 10 + (c - '0');
+ if (!in_range)
+ a = b;
+ exp_digit = 0;
+ totaldigits++;
}
if (!(a <= b))
return -EINVAL;
@@ -530,13 +576,52 @@ int bitmap_parselist(const char *bp, uns
set_bit(a, maskp);
a++;
}
- if (*bp == ',')
- bp++;
- } while (*bp != '\0' && *bp != '\n');
+ } while (buflen && c == ',');
return 0;
}
+
+int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+{
+ char *nl = strchr(bp, '\n');
+ int len;
+
+ if (nl)
+ len = nl - bp;
+ else
+ len = strlen(bp);
+
+ return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
+}
EXPORT_SYMBOL(bitmap_parselist);

+
+/**
+ * bitmap_parselist_user()
+ *
+ * @ubuf: pointer to user buffer containing string.
+ * @ulen: buffer size in bytes. If string is smaller than this
+ * then it must be terminated with a \0.
+ * @maskp: pointer to bitmap array that will contain result.
+ * @nmaskbits: size of bitmap, in bits.
+ *
+ * Wrapper for bitmap_parselist(), providing it with user buffer.
+ *
+ * We cannot have this as an inline function in bitmap.h because it needs
+ * linux/uaccess.h to get the access_ok() declaration and this causes
+ * cyclic dependencies.
+ */
+int bitmap_parselist_user(const char __user *ubuf,
+ unsigned int ulen, unsigned long *maskp,
+ int nmaskbits)
+{
+ if (!access_ok(VERIFY_READ, ubuf, ulen))
+ return -EFAULT;
+ return __bitmap_parselist((const char *)ubuf,
+ ulen, 1, maskp, nmaskbits);
+}
+EXPORT_SYMBOL(bitmap_parselist_user);
+
+
/**
* bitmap_pos_to_ord(buf, pos, bits)
* @buf: pointer to a bitmap


2011-03-29 23:56:32

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] bitmap, irq: Add smp_affinity_list interface to /proc/irq

On Tue, 29 Mar 2011 16:46:52 -0700
Mike Travis <[email protected]> wrote:

> + /* create /proc/irq/<irq>/smp_affinity_list */
> + proc_create_data("smp_affinity_list", 0600, desc->dir,
> + &irq_affinity_list_proc_fops, (void *)(long)irq);

Always document your interfaces, please. `grep -r smp_affinity
Documentation' shows where.

And one we've seen a description of the proposed new interface, we can
review the patch!

2011-03-30 00:43:11

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] bitmap, irq: Add smp_affinity_list interface to /proc/irq

On Tue, 29 Mar 2011 16:56:12 -0700 Andrew Morton <[email protected]> wrote:

> On Tue, 29 Mar 2011 16:46:52 -0700
> Mike Travis <[email protected]> wrote:
>
> > + /* create /proc/irq/<irq>/smp_affinity_list */
> > + proc_create_data("smp_affinity_list", 0600, desc->dir,
> > + &irq_affinity_list_proc_fops, (void *)(long)irq);
>
> Always document your interfaces, please. `grep -r smp_affinity
> Documentation' shows where.
>
> And one we've seen a description of the proposed new interface, we can
> review the patch!

Also, the patch adds a new interface which duplicates an existing one,
only the formats are different, yes? This is, of course, bad.

The only justification we've seen for being bad is "Manually adjusting
the smp_affinity for IRQ's becomes unwieldy when the cpu count is
large". A more thorough description of how painful this is might help
motivate people to do bad things to the kernel.

Also, if it's just a matter of an alternative presentation of the data,
why not implement the desired user interface with a little userspace
tool then feed the results down into the existing kernel interface?

2011-03-30 00:51:18

by Mike Travis

[permalink] [raw]
Subject: Re: [PATCH] bitmap, irq: Add smp_affinity_list interface to /proc/irq



Andrew Morton wrote:
> On Tue, 29 Mar 2011 16:56:12 -0700 Andrew Morton <[email protected]> wrote:
>
>> On Tue, 29 Mar 2011 16:46:52 -0700
>> Mike Travis <[email protected]> wrote:
>>
>>> + /* create /proc/irq/<irq>/smp_affinity_list */
>>> + proc_create_data("smp_affinity_list", 0600, desc->dir,
>>> + &irq_affinity_list_proc_fops, (void *)(long)irq);
>> Always document your interfaces, please. `grep -r smp_affinity
>> Documentation' shows where.
>>
>> And one we've seen a description of the proposed new interface, we can
>> review the patch!
>
> Also, the patch adds a new interface which duplicates an existing one,
> only the formats are different, yes? This is, of course, bad.
>
> The only justification we've seen for being bad is "Manually adjusting
> the smp_affinity for IRQ's becomes unwieldy when the cpu count is
> large". A more thorough description of how painful this is might help
> motivate people to do bad things to the kernel.
>
> Also, if it's just a matter of an alternative presentation of the data,
> why not implement the desired user interface with a little userspace
> tool then feed the results down into the existing kernel interface?
>

Setting smp affinity to cpus 256 to 263 would be:

echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity

instead of:

echo 256-263 > smp_affinity_list

Think about what it looks like for cpus around say, 4088 to 4095.

We already have many alternate "list" interfaces:

/sys/devices/system/cpu/cpuX/indexY/shared_cpu_list
/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
/sys/devices/system/cpu/cpuX/topology/core_siblings_list
/sys/devices/system/node/nodeX/cpulist
/sys/devices/pci***/***/local_cpulist

etc.

This just expands on that same philosophy.

Thanks,
Mike

2011-03-30 00:55:38

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] bitmap, irq: Add smp_affinity_list interface to /proc/irq

On Tue, 29 Mar 2011 17:51:18 -0700 Mike Travis <[email protected]> wrote:

> >
> > Also, the patch adds a new interface which duplicates an existing one,
> > only the formats are different, yes? This is, of course, bad.
> >
> > The only justification we've seen for being bad is "Manually adjusting
> > the smp_affinity for IRQ's becomes unwieldy when the cpu count is
> > large". A more thorough description of how painful this is might help
> > motivate people to do bad things to the kernel.
> >
> > Also, if it's just a matter of an alternative presentation of the data,
> > why not implement the desired user interface with a little userspace
> > tool then feed the results down into the existing kernel interface?
> >
>
> Setting smp affinity to cpus 256 to 263 would be:
>
> echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity
>
> instead of:
>
> echo 256-263 > smp_affinity_list
>
> Think about what it looks like for cpus around say, 4088 to 4095.
>
> We already have many alternate "list" interfaces:
>
> /sys/devices/system/cpu/cpuX/indexY/shared_cpu_list
> /sys/devices/system/cpu/cpuX/topology/thread_siblings_list
> /sys/devices/system/cpu/cpuX/topology/core_siblings_list
> /sys/devices/system/node/nodeX/cpulist
> /sys/devices/pci***/***/local_cpulist
>
> etc.
>
> This just expands on that same philosophy.

You mean that if someone had written a stupid little tool to convert a
list of tuples into a bitmap, we wouldn't have needed to add all that
crap to the kernel?

2011-03-30 00:58:46

by Mike Travis

[permalink] [raw]
Subject: Re: [PATCH] bitmap, irq: Add smp_affinity_list interface to /proc/irq



Andrew Morton wrote:
> On Tue, 29 Mar 2011 17:51:18 -0700 Mike Travis <[email protected]> wrote:
>
>>> Also, the patch adds a new interface which duplicates an existing one,
>>> only the formats are different, yes? This is, of course, bad.
>>>
>>> The only justification we've seen for being bad is "Manually adjusting
>>> the smp_affinity for IRQ's becomes unwieldy when the cpu count is
>>> large". A more thorough description of how painful this is might help
>>> motivate people to do bad things to the kernel.
>>>
>>> Also, if it's just a matter of an alternative presentation of the data,
>>> why not implement the desired user interface with a little userspace
>>> tool then feed the results down into the existing kernel interface?
>>>
>> Setting smp affinity to cpus 256 to 263 would be:
>>
>> echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity
>>
>> instead of:
>>
>> echo 256-263 > smp_affinity_list
>>
>> Think about what it looks like for cpus around say, 4088 to 4095.
>>
>> We already have many alternate "list" interfaces:
>>
>> /sys/devices/system/cpu/cpuX/indexY/shared_cpu_list
>> /sys/devices/system/cpu/cpuX/topology/thread_siblings_list
>> /sys/devices/system/cpu/cpuX/topology/core_siblings_list
>> /sys/devices/system/node/nodeX/cpulist
>> /sys/devices/pci***/***/local_cpulist
>>
>> etc.
>>
>> This just expands on that same philosophy.
>
> You mean that if someone had written a stupid little tool to convert a
> list of tuples into a bitmap, we wouldn't have needed to add all that
> crap to the kernel?
>

2011-03-30 01:04:56

by Mike Travis

[permalink] [raw]
Subject: Re: [PATCH] bitmap, irq: Add smp_affinity_list interface to /proc/irq



Andrew Morton wrote:
> On Tue, 29 Mar 2011 17:51:18 -0700 Mike Travis <[email protected]> wrote:
>
>>> Also, the patch adds a new interface which duplicates an existing one,
>>> only the formats are different, yes? This is, of course, bad.
>>>
>>> The only justification we've seen for being bad is "Manually adjusting
>>> the smp_affinity for IRQ's becomes unwieldy when the cpu count is
>>> large". A more thorough description of how painful this is might help
>>> motivate people to do bad things to the kernel.
>>>
>>> Also, if it's just a matter of an alternative presentation of the data,
>>> why not implement the desired user interface with a little userspace
>>> tool then feed the results down into the existing kernel interface?
>>>
>> Setting smp affinity to cpus 256 to 263 would be:
>>
>> echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity
>>
>> instead of:
>>
>> echo 256-263 > smp_affinity_list
>>
>> Think about what it looks like for cpus around say, 4088 to 4095.
>>
>> We already have many alternate "list" interfaces:
>>
>> /sys/devices/system/cpu/cpuX/indexY/shared_cpu_list
>> /sys/devices/system/cpu/cpuX/topology/thread_siblings_list
>> /sys/devices/system/cpu/cpuX/topology/core_siblings_list
>> /sys/devices/system/node/nodeX/cpulist
>> /sys/devices/pci***/***/local_cpulist
>>
>> etc.
>>
>> This just expands on that same philosophy.
>
> You mean that if someone had written a stupid little tool to convert a
> list of tuples into a bitmap, we wouldn't have needed to add all that
> crap to the kernel?
>

We actually had a problem where the interface would not take enough characters
to set the irq mask. (It has since been fixed.)

I don't mind if there's an alternate way to do this if you really feel strongly
about it. Be nice if it was somehow included but that requires yet way more
infrastructure somewhere else.

How about if I #ifdef CONFIG_MAX_SMP around it? It's really not needed if
you only have a few cpu's enabled.

[If it was up to me, I'd eliminate the bitmask interfaces and just keep the
list interfaces. That's the stupid interface that's not needed, and far more
shortsighted.]

Thanks,
Mike

2011-03-30 01:12:11

by Mike Travis

[permalink] [raw]
Subject: [PATCH] bitmap, irq: Add smp_affinity_list interface to /proc/irq

Subject: bitmap, irq: Add smp_affinity_list interface to /proc/irq

Manually adjusting the smp_affinity for IRQ's becomes unwieldy when the
cpu count is large. Add a companion interface, smp_affinity_list to
use cpu lists instead of cpu maps. This conforms to other companion
interfaces where both a map and a list interface exists.

This required adding a bitmap_parselist_user() function in a manner
similar to the bitmap_parse_user() function.

Signed-off-by: Mike Travis <[email protected]>
---
Documentation/IRQ-affinity.txt | 19 +++++-
Documentation/filesystems/proc.txt | 11 +++
include/linux/bitmap.h | 5 +
include/linux/cpumask.h | 15 +++++
kernel/irq/proc.c | 54 ++++++++++++++++--
lib/bitmap.c | 109 ++++++++++++++++++++++++++++++++-----
6 files changed, 190 insertions(+), 23 deletions(-)

--- linux.orig/Documentation/IRQ-affinity.txt
+++ linux/Documentation/IRQ-affinity.txt
@@ -4,10 +4,11 @@ ChangeLog:

SMP IRQ affinity

-/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted
-for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed
-to turn off all CPUs, and if an IRQ controller does not support IRQ
-affinity then the value will not change from the default 0xffffffff.
+/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
+which target CPUs are permitted for a given IRQ source. It's a bitmask
+(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs. It's not
+allowed to turn off all CPUs, and if an IRQ controller does not support
+IRQ affinity then the value will not change from the default of all cpus.

/proc/irq/default_smp_affinity specifies default affinity mask that applies
to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
@@ -54,3 +55,13 @@ round-trip min/avg/max = 0.1/0.5/585.4 m
This time around IRQ44 was delivered only to the last four processors.
i.e counters for the CPU0-3 did not change.

+Here is an example of limiting that same irq (44) to cpus 1024 to 1031:
+
+[root@moon 44]# echo 1024-1031 > smp_affinity
+[root@moon 44]# cat smp_affinity
+1024-1031
+
+Note that to do this with a bitmask would require 32 bitmasks of zero
+to follow the pertinent one.
+
+
--- linux.orig/Documentation/filesystems/proc.txt
+++ linux/Documentation/filesystems/proc.txt
@@ -574,6 +574,12 @@ The contents of each smp_affinity file i
> cat /proc/irq/0/smp_affinity
ffffffff

+There is an alternate interface, smp_affinity_list which allows specifying
+a cpu range instead of a bitmask:
+
+ > cat /proc/irq/0/smp_affinity_list
+ 1024-1031
+
The default_smp_affinity mask applies to all non-active IRQs, which are the
IRQs which have not yet been allocated/activated, and hence which lack a
/proc/irq/[0-9]* directory.
@@ -583,12 +589,13 @@ reports itself as being attached. This h
include information about any possible driver locality preference.

prof_cpu_mask specifies which CPUs are to be profiled by the system wide
-profiler. Default value is ffffffff (all cpus).
+profiler. Default value is ffffffff (all cpus if there are only 32 of them).

The way IRQs are routed is handled by the IO-APIC, and it's Round Robin
between all the CPUs which are allowed to handle it. As usual the kernel has
more info than you and does a better job than you, so the defaults are the
-best choice for almost everyone.
+best choice for almost everyone. [Note this applies only to those IO-APIC's
+that support "Round Robin" interrupt distribution.]

There are three more important subdirectories in /proc: net, scsi, and sys.
The general rule is that the contents, or even the existence of these
--- linux.orig/include/linux/bitmap.h
+++ linux/include/linux/bitmap.h
@@ -55,7 +55,8 @@
* bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf
* bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf
* bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf
- * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list
+ * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf
+ * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf
* bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region
* bitmap_release_region(bitmap, pos, order) Free specified bit region
* bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region
@@ -129,6 +130,8 @@ extern int bitmap_scnlistprintf(char *bu
const unsigned long *src, int nbits);
extern int bitmap_parselist(const char *buf, unsigned long *maskp,
int nmaskbits);
+extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
+ unsigned long *dst, int nbits);
extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
const unsigned long *old, const unsigned long *new, int bits);
extern int bitmap_bitremap(int oldbit,
--- linux.orig/include/linux/cpumask.h
+++ linux/include/linux/cpumask.h
@@ -547,6 +547,21 @@ static inline int cpumask_parse_user(con
}

/**
+ * cpumask_parselist_user - extract a cpumask from a user string
+ * @buf: the buffer to extract from
+ * @len: the length of the buffer
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpumask_parselist_user(const char __user *buf, int len,
+ struct cpumask *dstp)
+{
+ return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
+ nr_cpumask_bits);
+}
+
+/**
* cpulist_scnprintf - print a cpumask into a string as comma-separated list
* @buf: the buffer to sprintf into
* @len: the length of the buffer
--- linux.orig/kernel/irq/proc.c
+++ linux/kernel/irq/proc.c
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_d

#ifdef CONFIG_SMP

-static int irq_affinity_proc_show(struct seq_file *m, void *v)
+static int show_irq_affinity(int type, struct seq_file *m, void *v)
{
struct irq_desc *desc = irq_to_desc((long)m->private);
const struct cpumask *mask = desc->irq_data.affinity;
@@ -28,7 +28,10 @@ static int irq_affinity_proc_show(struct
if (irqd_is_setaffinity_pending(&desc->irq_data))
mask = desc->pending_mask;
#endif
- seq_cpumask(m, mask);
+ if (type)
+ seq_cpumask_list(m, mask);
+ else
+ seq_cpumask(m, mask);
seq_putc(m, '\n');
return 0;
}
@@ -59,7 +62,18 @@ static int irq_affinity_hint_proc_show(s
#endif

int no_irq_affinity;
-static ssize_t irq_affinity_proc_write(struct file *file,
+static int irq_affinity_proc_show(struct seq_file *m, void *v)
+{
+ return show_irq_affinity(0, m, v);
+}
+
+static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
+{
+ return show_irq_affinity(1, m, v);
+}
+
+
+static ssize_t write_irq_affinity(int type, struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
{
unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
@@ -72,7 +86,10 @@ static ssize_t irq_affinity_proc_write(s
if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
return -ENOMEM;

- err = cpumask_parse_user(buffer, count, new_value);
+ if (type)
+ err = cpumask_parselist_user(buffer, count, new_value);
+ else
+ err = cpumask_parse_user(buffer, count, new_value);
if (err)
goto free_cpumask;

@@ -100,11 +117,28 @@ free_cpumask:
return err;
}

+static ssize_t irq_affinity_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *pos)
+{
+ return write_irq_affinity(0, file, buffer, count, pos);
+}
+
+static ssize_t irq_affinity_list_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *pos)
+{
+ return write_irq_affinity(1, file, buffer, count, pos);
+}
+
static int irq_affinity_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
}

+static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data);
+}
+
static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
@@ -125,6 +159,14 @@ static const struct file_operations irq_
.release = single_release,
};

+static const struct file_operations irq_affinity_list_proc_fops = {
+ .open = irq_affinity_list_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = irq_affinity_list_proc_write,
+};
+
static int default_affinity_show(struct seq_file *m, void *v)
{
seq_cpumask(m, irq_default_affinity);
@@ -289,6 +331,10 @@ void register_irq_proc(unsigned int irq,
proc_create_data("affinity_hint", 0400, desc->dir,
&irq_affinity_hint_proc_fops, (void *)(long)irq);

+ /* create /proc/irq/<irq>/smp_affinity_list */
+ proc_create_data("smp_affinity_list", 0600, desc->dir,
+ &irq_affinity_list_proc_fops, (void *)(long)irq);
+
proc_create_data("node", 0444, desc->dir,
&irq_node_proc_fops, (void *)(long)irq);
#endif
--- linux.orig/lib/bitmap.c
+++ linux/lib/bitmap.c
@@ -571,8 +571,11 @@ int bitmap_scnlistprintf(char *buf, unsi
EXPORT_SYMBOL(bitmap_scnlistprintf);

/**
- * bitmap_parselist - convert list format ASCII string to bitmap
+ * __bitmap_parselist - convert list format ASCII string to bitmap
* @bp: read nul-terminated user string from this buffer
+ * @buflen: buffer size in bytes. If string is smaller than this
+ * then it must be terminated with a \0.
+ * @is_user: location of buffer, 0 indicates kernel space
* @maskp: write resulting mask here
* @nmaskbits: number of bits in mask to be written
*
@@ -587,20 +590,63 @@ EXPORT_SYMBOL(bitmap_scnlistprintf);
* %-EINVAL: invalid character in string
* %-ERANGE: bit number specified too large for mask
*/
-int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+int __bitmap_parselist(const char *buf, unsigned int buflen,
+ int is_user, unsigned long *maskp,
+ int nmaskbits)
{
unsigned a, b;
+ int c, old_c, totaldigits;
+ const char __user *ubuf = buf;
+ int exp_digit, in_range;

+ totaldigits = c = 0;
bitmap_zero(maskp, nmaskbits);
do {
- if (!isdigit(*bp))
- return -EINVAL;
- b = a = simple_strtoul(bp, (char **)&bp, BASEDEC);
- if (*bp == '-') {
- bp++;
- if (!isdigit(*bp))
+ exp_digit = 1;
+ in_range = 0;
+ a = b = 0;
+
+ /* Get the next cpu# or a range of cpu#'s */
+ while (buflen) {
+ old_c = c;
+ if (is_user) {
+ if (__get_user(c, ubuf++))
+ return -EFAULT;
+ } else
+ c = *buf++;
+ buflen--;
+ if (isspace(c))
+ continue;
+
+ /*
+ * If the last character was a space and the current
+ * character isn't '\0', we've got embedded whitespace.
+ * This is a no-no, so throw an error.
+ */
+ if (totaldigits && c && isspace(old_c))
return -EINVAL;
- b = simple_strtoul(bp, (char **)&bp, BASEDEC);
+
+ /* A '\0' or a ',' signal the end of a cpu# or range */
+ if (c == '\0' || c == ',')
+ break;
+
+ if (c == '-') {
+ if (exp_digit || in_range)
+ return -EINVAL;
+ b = 0;
+ in_range = 1;
+ exp_digit = 1;
+ continue;
+ }
+
+ if (!isdigit(c))
+ return -EINVAL;
+
+ b = b * 10 + (c - '0');
+ if (!in_range)
+ a = b;
+ exp_digit = 0;
+ totaldigits++;
}
if (!(a <= b))
return -EINVAL;
@@ -610,13 +656,52 @@ int bitmap_parselist(const char *bp, uns
set_bit(a, maskp);
a++;
}
- if (*bp == ',')
- bp++;
- } while (*bp != '\0' && *bp != '\n');
+ } while (buflen && c == ',');
return 0;
}
+
+int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+{
+ char *nl = strchr(bp, '\n');
+ int len;
+
+ if (nl)
+ len = nl - bp;
+ else
+ len = strlen(bp);
+
+ return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
+}
EXPORT_SYMBOL(bitmap_parselist);

+
+/**
+ * bitmap_parselist_user()
+ *
+ * @ubuf: pointer to user buffer containing string.
+ * @ulen: buffer size in bytes. If string is smaller than this
+ * then it must be terminated with a \0.
+ * @maskp: pointer to bitmap array that will contain result.
+ * @nmaskbits: size of bitmap, in bits.
+ *
+ * Wrapper for bitmap_parselist(), providing it with user buffer.
+ *
+ * We cannot have this as an inline function in bitmap.h because it needs
+ * linux/uaccess.h to get the access_ok() declaration and this causes
+ * cyclic dependencies.
+ */
+int bitmap_parselist_user(const char __user *ubuf,
+ unsigned int ulen, unsigned long *maskp,
+ int nmaskbits)
+{
+ if (!access_ok(VERIFY_READ, ubuf, ulen))
+ return -EFAULT;
+ return __bitmap_parselist((const char *)ubuf,
+ ulen, 1, maskp, nmaskbits);
+}
+EXPORT_SYMBOL(bitmap_parselist_user);
+
+
/**
* bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
* @buf: pointer to a bitmap

2011-03-30 01:13:12

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] bitmap, irq: Add smp_affinity_list interface to /proc/irq

On Tue, 29 Mar 2011 18:04:56 -0700 Mike Travis <[email protected]> wrote:

>
>
> Andrew Morton wrote:
> > On Tue, 29 Mar 2011 17:51:18 -0700 Mike Travis <[email protected]> wrote:
> >
> >>> Also, the patch adds a new interface which duplicates an existing one,
> >>> only the formats are different, yes? This is, of course, bad.
> >>>
> >>> The only justification we've seen for being bad is "Manually adjusting
> >>> the smp_affinity for IRQ's becomes unwieldy when the cpu count is
> >>> large". A more thorough description of how painful this is might help
> >>> motivate people to do bad things to the kernel.
> >>>
> >>> Also, if it's just a matter of an alternative presentation of the data,
> >>> why not implement the desired user interface with a little userspace
> >>> tool then feed the results down into the existing kernel interface?
> >>>
> >> Setting smp affinity to cpus 256 to 263 would be:
> >>
> >> echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity
> >>
> >> instead of:
> >>
> >> echo 256-263 > smp_affinity_list
> >>
> >> Think about what it looks like for cpus around say, 4088 to 4095.
> >>
> >> We already have many alternate "list" interfaces:
> >>
> >> /sys/devices/system/cpu/cpuX/indexY/shared_cpu_list
> >> /sys/devices/system/cpu/cpuX/topology/thread_siblings_list
> >> /sys/devices/system/cpu/cpuX/topology/core_siblings_list
> >> /sys/devices/system/node/nodeX/cpulist
> >> /sys/devices/pci***/***/local_cpulist
> >>
> >> etc.
> >>
> >> This just expands on that same philosophy.
> >
> > You mean that if someone had written a stupid little tool to convert a
> > list of tuples into a bitmap, we wouldn't have needed to add all that
> > crap to the kernel?
> >
>
> We actually had a problem where the interface would not take enough characters
> to set the irq mask. (It has since been fixed.)
>
> I don't mind if there's an alternate way to do this if you really feel strongly
> about it. Be nice if it was somehow included but that requires yet way more
> infrastructure somewhere else.
>
> How about if I #ifdef CONFIG_MAX_SMP around it? It's really not needed if
> you only have a few cpu's enabled.

Oh, I'm just using your patch as an opportunity to have my regular rant
about how much we suck. Our hammer is kernel patches and all problems
look like nails, but we'd end up with better user interfaces and a
better kernel if we'd just stop stuffing more and fatter user interface
code into the kernel.

Please redo the patch with documentation updates and a changelog which
suitably justifies its awfulness and I'll add my Sucked-off-by: to it.

> [If it was up to me, I'd eliminate the bitmask interfaces and just keep the
> list interfaces. That's the stupid interface that's not needed, and far more
> shortsighted.]

Agree.

It's not impossible to remove those interfaces. My preferred approach
is to add a once-per-boot warning printk if anyone uses the old
interface and to remove the thing altogether in three or five years.

That reminds me. It's been like ten years. Someone please delete sys_bdflush().

2011-03-31 07:37:55

by KOSAKI Motohiro

[permalink] [raw]
Subject: Re: [PATCH] bitmap, irq: Add smp_affinity_list interface to /proc/irq

> > [If it was up to me, I'd eliminate the bitmask interfaces and just keep the
> > list interfaces. That's the stupid interface that's not needed, and far more
> > shortsighted.]
>
> Agree.
>
> It's not impossible to remove those interfaces. My preferred approach
> is to add a once-per-boot warning printk if anyone uses the old
> interface and to remove the thing altogether in three or five years.
>
> That reminds me. It's been like ten years. Someone please delete sys_bdflush().

This?

But to be honest, this long diffstat seems tell me the worth is doubious. ;-)


---
arch/alpha/kernel/systbls.S | 2 +-
arch/arm/kernel/calls.S | 2 +-
arch/avr32/kernel/syscall_table.S | 2 +-
arch/blackfin/mach-common/entry.S | 2 +-
arch/cris/arch-v10/kernel/entry.S | 2 +-
arch/cris/arch-v32/kernel/entry.S | 2 +-
arch/frv/kernel/entry.S | 2 +-
arch/h8300/kernel/syscalls.S | 2 +-
arch/ia64/kernel/entry.S | 2 +-
arch/m32r/kernel/syscall_table.S | 2 +-
arch/m68k/kernel/entry_mm.S | 2 +-
arch/m68k/kernel/syscalltable.S | 2 +-
arch/microblaze/kernel/syscall_table.S | 2 +-
arch/mips/kernel/scall32-o32.S | 2 +-
arch/mips/kernel/scall64-o32.S | 2 +-
arch/mn10300/kernel/entry.S | 2 +-
arch/parisc/kernel/syscall_table.S | 2 +-
arch/powerpc/include/asm/systbl.h | 2 +-
arch/s390/kernel/compat_wrapper.S | 6 ------
arch/s390/kernel/syscalls.S | 2 +-
arch/sh/kernel/syscalls_32.S | 2 +-
arch/sh/kernel/syscalls_64.S | 2 +-
arch/sparc/kernel/sys32.S | 1 -
arch/sparc/kernel/systbls_32.S | 2 +-
arch/sparc/kernel/systbls_64.S | 4 ++--
arch/x86/kernel/syscall_table_32.S | 2 +-
arch/xtensa/include/asm/unistd.h | 2 +-
fs/buffer.c | 27 ---------------------------
include/asm-generic/unistd.h | 2 +-
include/linux/capability.h | 1 -
include/linux/syscalls.h | 1 -
kernel/sys_ni.c | 1 -
32 files changed, 27 insertions(+), 64 deletions(-)

diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index a6a1de9..6165de6 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -318,7 +318,7 @@ sys_call_table:
.quad alpha_ni_syscall
.quad alpha_ni_syscall
/* linux-specific system calls start at 300 */
- .quad sys_bdflush /* 300 */
+ .quad sys_ni_syscall /* 300: old sys_bdflush */
.quad sys_sethae
.quad sys_mount
.quad sys_old_adjtimex
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 5c26ecc..1786738 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -143,7 +143,7 @@
CALL(sys_quotactl)
CALL(sys_getpgid)
CALL(sys_fchdir)
- CALL(sys_bdflush)
+ CALL(sys_ni_syscall)
/* 135 */ CALL(sys_sysfs)
CALL(sys_personality)
CALL(sys_ni_syscall) /* reserved for afs_syscall */
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index e76bad1..cdea078 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -132,7 +132,7 @@ sys_call_table:
.long sys_delete_module
.long sys_quotactl
.long sys_getpgid
- .long sys_bdflush
+ .long sys_ni_syscall /* was sys_bdflush */
.long sys_sysfs /* 120 */
.long sys_personality
.long sys_ni_syscall /* reserved for afs_syscall */
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 46ab457..23a0a0e 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1508,7 +1508,7 @@ ENTRY(_sys_call_table)
.long _sys_quotactl
.long _sys_getpgid
.long _sys_fchdir
- .long _sys_bdflush
+ .long _sys_ni_syscall /* old sys_bdflush */
.long _sys_ni_syscall /* 135 */ /* sys_sysfs */
.long _sys_personality
.long _sys_ni_syscall /* for afs_syscall */
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index 0d6420d..aa2d3d6 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -736,7 +736,7 @@ sys_call_table:
.long sys_quotactl
.long sys_getpgid
.long sys_fchdir
- .long sys_bdflush
+ .long sys_ni_syscall /* old "sys_bdflush" */
.long sys_sysfs /* 135 */
.long sys_personality
.long sys_ni_syscall /* for afs_syscall */
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index 0ecb50b..90e0cc2 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -679,7 +679,7 @@ sys_call_table:
.long sys_quotactl
.long sys_getpgid
.long sys_fchdir
- .long sys_bdflush
+ .long sys_ni_syscall /* old "sys_bdflush" */
.long sys_sysfs /* 135 */
.long sys_personality
.long sys_ni_syscall /* for afs_syscall */
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 63d579b..8932e28 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1323,7 +1323,7 @@ sys_call_table:
.long sys_quotactl
.long sys_getpgid
.long sys_fchdir
- .long sys_bdflush
+ .long sys_ni_syscall /* old "sys_bdflush" */
.long sys_sysfs /* 135 */
.long sys_personality
.long sys_ni_syscall /* for afs_syscall */
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index faefaff..e09bed8 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -148,7 +148,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
.long SYMBOL_NAME(sys_quotactl)
.long SYMBOL_NAME(sys_getpgid)
.long SYMBOL_NAME(sys_fchdir)
- .long SYMBOL_NAME(sys_bdflush)
+ .long SYMBOL_NAME(sys_ni_syscall) /* was sys_bdflush */
.long SYMBOL_NAME(sys_sysfs) /* 135 */
.long SYMBOL_NAME(sys_personality)
.long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 6de2e23..df46ffb 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1583,7 +1583,7 @@ sys_call_table:
data8 sys_ni_syscall // 1135 /* was: sys_get_kernel_syms */
data8 sys_ni_syscall /* was: sys_query_module */
data8 sys_quotactl
- data8 sys_bdflush
+ data8 sys_ni_syscall /* was: sys_bdflush */
data8 sys_sysfs
data8 sys_personality // 1140
data8 sys_ni_syscall // sys_afs_syscall
diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S
index 60536e2..8b197b9 100644
--- a/arch/m32r/kernel/syscall_table.S
+++ b/arch/m32r/kernel/syscall_table.S
@@ -133,7 +133,7 @@ ENTRY(sys_call_table)
.long sys_quotactl
.long sys_getpgid
.long sys_fchdir
- .long sys_bdflush
+ .long sys_ni_syscall /* sys_bdflush syscall holder */
.long sys_sysfs /* 135 */
.long sys_personality
.long sys_ni_syscall /* afs_syscall syscall holder */
diff --git a/arch/m68k/kernel/entry_mm.S b/arch/m68k/kernel/entry_mm.S
index 1559dea..49b280e 100644
--- a/arch/m68k/kernel/entry_mm.S
+++ b/arch/m68k/kernel/entry_mm.S
@@ -544,7 +544,7 @@ sys_call_table:
.long sys_quotactl
.long sys_getpgid
.long sys_fchdir
- .long sys_bdflush
+ .long sys_ni_syscall /* old "sys_bdflush" */
.long sys_sysfs /* 135 */
.long sys_personality
.long sys_ni_syscall /* for afs_syscall */
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 79b1ed1..88be6af 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -152,7 +152,7 @@ ENTRY(sys_call_table)
.long sys_quotactl
.long sys_getpgid
.long sys_fchdir
- .long sys_bdflush
+ .long sys_ni_syscall /* old "sys_bdflush" */
.long sys_sysfs /* 135 */
.long sys_personality
.long sys_ni_syscall /* for afs_syscall */
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index e88a930..b25b3e0 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -138,7 +138,7 @@ ENTRY(sys_call_table)
.long sys_quotactl
.long sys_getpgid
.long sys_fchdir
- .long sys_bdflush
+ .long sys_ni_syscall /* old "sys_bdflush" */
.long sys_sysfs /* 135 */
.long sys_personality
.long sys_ni_syscall /* reserved for afs_syscall */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7f5468b..ee7393d 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -370,7 +370,7 @@ einval: li v0, -ENOSYS
sys sys_quotactl 4
sys sys_getpgid 1
sys sys_fchdir 1
- sys sys_bdflush 2
+ sys sys_ni_syscall 0
sys sys_sysfs 3 /* 4135 */
sys sys_personality 1
sys sys_ni_syscall 0 /* for afs_syscall */
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 049a9c8..bcf4038 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -338,7 +338,7 @@ sys_call_table:
PTR sys_quotactl
PTR sys_getpgid
PTR sys_fchdir
- PTR sys_bdflush
+ PTR sys_ni_syscall
PTR sys_sysfs /* 4135 */
PTR sys_32_personality
PTR sys_ni_syscall /* for afs_syscall */
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index fb93ad7..fdaabc7 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -554,7 +554,7 @@ ENTRY(sys_call_table)
.long sys_quotactl
.long sys_getpgid
.long sys_fchdir
- .long sys_bdflush
+ .long sys_ni_syscall /* old "sys_bdflush" */
.long sys_sysfs /* 135 */
.long sys_personality
.long sys_ni_syscall /* reserved for afs_syscall */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 74867df..4b82982 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -214,7 +214,7 @@
ENTRY_SAME(quotactl)
ENTRY_SAME(getpgid)
ENTRY_SAME(fchdir)
- ENTRY_SAME(bdflush)
+ ENTRY_SAME(ni_syscall)
ENTRY_SAME(sysfs) /* 135 */
ENTRY_OURS(personality)
ENTRY_SAME(ni_syscall) /* for afs_syscall */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index aa0f1eb..22c5d25 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -137,7 +137,7 @@ SYSCALL(ni_syscall)
SYSCALL(quotactl)
COMPAT_SYS_SPU(getpgid)
SYSCALL_SPU(fchdir)
-SYSCALL_SPU(bdflush)
+SYSCALL_SPU(ni_syscall)
COMPAT_SYS(sysfs)
SYSX_SPU(ppc64_personality,ppc64_personality,sys_personality)
SYSCALL(ni_syscall)
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 1dc96ea..eecb1dd 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -599,12 +599,6 @@ sys32_fchdir_wrapper:
llgfr %r2,%r2 # unsigned int
jg sys_fchdir # branch to system call

- .globl sys32_bdflush_wrapper
-sys32_bdflush_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # long
- jg sys_bdflush # branch to system call
-
.globl sys32_sysfs_wrapper
sys32_sysfs_wrapper:
lgfr %r2,%r2 # int
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9c65fd4..04d7ae5 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -142,7 +142,7 @@ NI_SYSCALL /* 130: old get_kernel_syms */
SYSCALL(sys_quotactl,sys_quotactl,sys32_quotactl_wrapper)
SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper)
SYSCALL(sys_fchdir,sys_fchdir,sys32_fchdir_wrapper)
-SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper)
+NI_SYSCALL
SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */
SYSCALL(sys_personality,sys_s390_personality,sys32_personality_wrapper)
NI_SYSCALL /* for afs_syscall */
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 030966a..14d0b63 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -150,7 +150,7 @@ ENTRY(sys_call_table)
.long sys_quotactl
.long sys_getpgid
.long sys_fchdir
- .long sys_bdflush
+ .long sys_ni_syscall /* old "sys_bdflush" */
.long sys_sysfs /* 135 */
.long sys_personality
.long sys_ni_syscall /* for afs_syscall */
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index ca0a614..06d7298 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -154,7 +154,7 @@ sys_call_table:
.long sys_quotactl
.long sys_getpgid
.long sys_fchdir
- .long sys_bdflush
+ .long sys_ni_syscall /* old "sys_bdflush" */
.long sys_sysfs /* 135 */
.long sys_personality
.long sys_ni_syscall /* for afs_syscall */
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index 44e5faf..256f7b2 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -79,7 +79,6 @@ SIGN3(sys32_epoll_wait, sys_epoll_wait, %o0, %o2, %o3)
SIGN1(sys32_readahead, compat_sys_readahead, %o0)
SIGN2(sys32_fadvise64, compat_sys_fadvise64, %o0, %o4)
SIGN2(sys32_fadvise64_64, compat_sys_fadvise64_64, %o0, %o5)
-SIGN2(sys32_bdflush, sys_bdflush, %o0, %o1)
SIGN1(sys32_mlockall, sys_mlockall, %o0)
SIGN1(sys32_nfsservctl, compat_sys_nfsservctl, %o0)
SIGN1(sys32_clock_nanosleep, compat_sys_clock_nanosleep, %o1)
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 4b86eaf..224dab6 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -61,7 +61,7 @@ sys_call_table:
/*210*/ .long sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, sys_sysinfo
/*215*/ .long sys_ipc, sys_sigreturn, sys_clone, sys_ioprio_get, sys_adjtimex
/*220*/ .long sys_sigprocmask, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid
-/*225*/ .long sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16
+/*225*/ .long sys_ni_syscall, sys_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16
/*230*/ .long sys_select, sys_time, sys_splice, sys_stime, sys_statfs64
/* "We are the Knights of the Forest of Ni!!" */
/*235*/ .long sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 0331baf..72e9a9b 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -63,7 +63,7 @@ sys_call_table32:
/*210*/ .word sys32_fadvise64_64, sys32_tgkill, sys32_waitpid, sys_swapoff, compat_sys_sysinfo
.word compat_sys_ipc, sys32_sigreturn, sys_clone, sys32_ioprio_get, compat_sys_adjtimex
/*220*/ .word sys32_sigprocmask, sys_ni_syscall, sys32_delete_module, sys_ni_syscall, sys32_getpgid
- .word sys32_bdflush, sys32_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16
+ .word sys_ni_syscall, sys32_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16
/*230*/ .word sys32_select, compat_sys_time, sys32_splice, compat_sys_stime, compat_sys_statfs64
.word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys32_mlockall
/*240*/ .word sys_munlockall, sys32_sched_setparam, sys32_sched_getparam, sys32_sched_setscheduler, sys32_sched_getscheduler
@@ -139,7 +139,7 @@ sys_call_table:
/*210*/ .word sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, sys_sysinfo
.word sys_sparc_ipc, sys_nis_syscall, sys_clone, sys_ioprio_get, sys_adjtimex
/*220*/ .word sys_nis_syscall, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid
- .word sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid, sys_setfsgid
+ .word sys_ni_syscall, sys_sysfs, sys_nis_syscall, sys_setfsuid, sys_setfsgid
/*230*/ .word sys_select, sys_nis_syscall, sys_splice, sys_stime, sys_statfs64
.word sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
/*240*/ .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index abce34d..de0ced0 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -133,7 +133,7 @@ ENTRY(sys_call_table)
.long sys_quotactl
.long sys_getpgid
.long sys_fchdir
- .long sys_bdflush
+ .long sys_ni_syscall /* old "sys_bdflush" */
.long sys_sysfs /* 135 */
.long sys_personality
.long sys_ni_syscall /* reserved for afs_syscall */
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index 528042c..788adab 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -459,7 +459,7 @@ __SYSCALL(205, sys_nfsservctl, 3)
#define __NR__sysctl 206
__SYSCALL(206, sys_sysctl, 1)
#define __NR_bdflush 207
-__SYSCALL(207, sys_bdflush, 2)
+__SYSCALL(207, sys_ni_syscall,0)
#define __NR_uname 208
__SYSCALL(208, sys_newuname, 1)
#define __NR_sysinfo 209
diff --git a/fs/buffer.c b/fs/buffer.c
index a08bb8e..9c7b1e5 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3127,33 +3127,6 @@ out:
EXPORT_SYMBOL(try_to_free_buffers);

/*
- * There are no bdflush tunables left. But distributions are
- * still running obsolete flush daemons, so we terminate them here.
- *
- * Use of bdflush() is deprecated and will be removed in a future kernel.
- * The `flush-X' kernel threads fully replace bdflush daemons and this call.
- */
-SYSCALL_DEFINE2(bdflush, int, func, long, data)
-{
- static int msg_count;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (msg_count < 5) {
- msg_count++;
- printk(KERN_INFO
- "warning: process `%s' used the obsolete bdflush"
- " system call\n", current->comm);
- printk(KERN_INFO "Fix your initscripts?\n");
- }
-
- if (func == 1)
- do_exit(0);
- return 0;
-}
-
-/*
* Buffer-head allocation
*/
static struct kmem_cache *bh_cachep;
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 07c40d5..cf5c1d2 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -800,7 +800,7 @@ __SYSCALL(__NR_recv, sys_recv)
#define __NR_send 1074
__SYSCALL(__NR_send, sys_send)
#define __NR_bdflush 1075
-__SYSCALL(__NR_bdflush, sys_bdflush)
+__SYSCALL(__NR_bdflush, sys_ni_syscall)
#define __NR_umount 1076
__SYSCALL(__NR_umount, sys_oldumount)
#define __ARCH_WANT_SYS_OLDUMOUNT
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 16ee8b4..ea485a6 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -248,7 +248,6 @@ struct cpu_vfs_cap_data {
/* Allow examination and configuration of disk quotas */
/* Allow setting the domainname */
/* Allow setting the hostname */
-/* Allow calling bdflush() */
/* Allow mount() and umount(), setting up new smb connection */
/* Allow some autofs root ioctls */
/* Allow nfsservctl */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 83ecc17..88372fe 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -391,7 +391,6 @@ asmlinkage long sys_pause(void);
asmlinkage long sys_sync(void);
asmlinkage long sys_fsync(unsigned int fd);
asmlinkage long sys_fdatasync(unsigned int fd);
-asmlinkage long sys_bdflush(int func, long data);
asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name,
char __user *type, unsigned long flags,
void __user *data);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 25cc41c..6c67e4d 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -163,7 +163,6 @@ cond_syscall(compat_sys_move_pages);
cond_syscall(compat_sys_migrate_pages);

/* block-layer dependent */
-cond_syscall(sys_bdflush);
cond_syscall(sys_ioprio_set);
cond_syscall(sys_ioprio_get);

--
1.7.1




2011-03-31 18:15:46

by Mike Frysinger

[permalink] [raw]
Subject: Re: [PATCH] bitmap, irq: Add smp_affinity_list interface to /proc/irq

On Thu, Mar 31, 2011 at 03:37, KOSAKI Motohiro wrote:
>> > [If it was up to me, I'd eliminate the bitmask interfaces and just keep the
>> > list interfaces.  That's the stupid interface that's not needed, and far more
>> > shortsighted.]
>>
>> Agree.
>>
>> It's not impossible to remove those interfaces.  My preferred approach
>> is to add a once-per-boot warning printk if anyone uses the old
>> interface and to remove the thing altogether in three or five years.
>>
>> That reminds me.  It's been like ten years.  Someone please delete sys_bdflush().
>
> This?
>
> But to be honest, this long diffstat seems tell me the worth is doubious. ;-)

you dont need to update the arches. just leave the compat option in
kernel/sys_ni.c. there is really no overhead at runtime as the func
is aliased to the ENOSYS stub.
-mike