Since it is becoming clear that there are just enough users of the
binary sysctl interface that completely removing the binary interface
from the kernel will not be an option for foreseeable future, we
need to find a way to address the sysctl maintenance issues.
The basic problem is that sysctl requires one central authority
to allocate sysctl numbers, or else conflicts and ABI breakage
occur. The proc interface to sysctl does not have that problem,
as names are not densely allocated.
By not terminating a sysctl table until I have neither a ctl_name
nor a procname, it becomes simple to add sysctl entries that don't
show up in the binary sysctl interface. Which allows people to avoid
allocating a binary sysctl value when not needed.
I have audited the kernel code and in my reading I have not found a
single sysctl table that wasn't terminated by a completely zero filled
entry. So this change in behavior should not affect anything.
I think this mechanism eases the pain enough that combined with a little
disciple we can solve the reoccurring sysctl ABI breakage.
Signed-off-by: Eric W. Biederman <[email protected]>
---
include/linux/sysctl.h | 9 ++++++---
kernel/sysctl.c | 8 +++++---
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 1b24bd4..c184732 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -961,8 +961,8 @@ extern ctl_handler sysctl_ms_jiffies;
/*
* Register a set of sysctl names by calling register_sysctl_table
* with an initialised array of ctl_table's. An entry with zero
- * ctl_name terminates the table. table->de will be set up by the
- * registration and need not be initialised in advance.
+ * ctl_name and NULL procname terminates the table. table->de will be
+ * set up by the registration and need not be initialised in advance.
*
* sysctl names can be mirrored automatically under /proc/sys. The
* procname supplied controls /proc naming.
@@ -973,7 +973,10 @@ extern ctl_handler sysctl_ms_jiffies;
* Leaf nodes in the sysctl tree will be represented by a single file
* under /proc; non-leaf nodes will be represented by directories. A
* null procname disables /proc mirroring at this node.
- *
+ *
+ * sysctl entries with a zero ctl_name will not be available through
+ * the binary sysctl interface.
+ *
* sysctl(2) can automatically manage read and write requests through
* the sysctl table. The data and maxlen fields of the ctl_table
* struct enable minimal validation of the values being written to be
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8bff2c1..09530ae 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1315,7 +1315,9 @@ repeat:
return -ENOTDIR;
if (get_user(n, name))
return -EFAULT;
- for ( ; table->ctl_name; table++) {
+ for ( ; table->ctl_name || table->procname; table++) {
+ if (!table->ctl_name)
+ continue;
if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
int error;
if (table->child) {
@@ -1532,7 +1534,7 @@ static void register_proc_table(ctl_tabl
int len;
mode_t mode;
- for (; table->ctl_name; table++) {
+ for (; table->ctl_name || table->procname; table++) {
/* Can't do anything without a proc name. */
if (!table->procname)
continue;
@@ -1579,7 +1581,7 @@ static void register_proc_table(ctl_tabl
static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
{
struct proc_dir_entry *de;
- for (; table->ctl_name; table++) {
+ for (; table->ctl_name || table->procname; table++) {
if (!(de = table->de))
continue;
if (de->mode & S_IFDIR) {
--
1.4.2.rc3.g7e18e-dirty
This patch takes the CTL_UNNUMBERD concept from NFS and makes
it available to all new sysctl users.
At the same time the sysctl binary interface maintenance documentation
is updated to mention and to describe what is needed to successfully
maintain the sysctl binary interface.
Signed-off-by: Eric W. Biederman <[email protected]>
---
fs/lockd/svc.c | 3 ---
fs/nfs/sysctl.c | 5 -----
include/linux/sysctl.h | 14 +++++++++++---
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 6341392..8ca1808 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -353,9 +353,6 @@ EXPORT_SYMBOL(lockd_down);
* Sysctl parameters (same as module parameters, different interface).
*/
-/* Something that isn't CTL_ANY, CTL_NONE or a value that may clash. */
-#define CTL_UNNUMBERED -2
-
static ctl_table nlm_sysctls[] = {
{
.ctl_name = CTL_UNNUMBERED,
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 2fe3403..3ea50ac 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -18,11 +18,6 @@ #include "callback.h"
static const int nfs_set_port_min = 0;
static const int nfs_set_port_max = 65535;
static struct ctl_table_header *nfs_callback_sysctl_table;
-/*
- * Something that isn't CTL_ANY, CTL_NONE or a value that may clash.
- * Use the same values as fs/lockd/svc.c
- */
-#define CTL_UNNUMBERED -2
static ctl_table nfs_cb_sysctls[] = {
#ifdef CONFIG_NFS_V4
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index c184732..91e0bf0 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -6,10 +6,17 @@
****************************************************************
****************************************************************
**
+ ** WARNING:
** The values in this file are exported to user space via
- ** the sysctl() binary interface. However this interface
- ** is unstable and deprecated and will be removed in the future.
- ** For a stable interface use /proc/sys.
+ ** the sysctl() binary interface. Do *NOT* change the
+ ** numbering of any existing values here, and do not change
+ ** any numbers within any one set of values. If you have to
+ ** have to redefine an existing interface, use a new number for it.
+ ** The kernel will then return -ENOTDIR to any application using
+ ** the old binary interface.
+ **
+ ** For new interfaces unless you really need a binary number
+ ** please use CTL_UNNUMBERED.
**
****************************************************************
****************************************************************
@@ -48,6 +55,7 @@ struct __sysctl_args {
#ifdef __KERNEL__
#define CTL_ANY -1 /* Matches any name */
#define CTL_NONE 0
+#define CTL_UNNUMBERED CTL_NONE /* sysctl without a binary number */
#endif
enum
--
1.4.2.rc3.g7e18e-dirty
Ar Llu, 2006-10-23 am 01:22 -0600, ysgrifennodd Eric W. Biederman:
> I think this mechanism eases the pain enough that combined with a little
> disciple we can solve the reoccurring sysctl ABI breakage.
Agreed
>
> Signed-off-by: Eric W. Biederman <[email protected]>
Acked-by: Alan Cox <[email protected]>
On Oct 23, 2006, at 03:25:13, Eric W. Biederman wrote:
> --- a/fs/lockd/svc.c
> -/* Something that isn't CTL_ANY, CTL_NONE or a value that may
> clash. */
> -#define CTL_UNNUMBERED -2
> -
> --- a/fs/nfs/sysctl.c
> -/*
> - * Something that isn't CTL_ANY, CTL_NONE or a value that may clash.
> - * Use the same values as fs/lockd/svc.c
> - */
> -#define CTL_UNNUMBERED -2
> +++ b/include/linux/sysctl.h
> #ifdef __KERNEL__
> #define CTL_ANY -1 /* Matches any name */
> #define CTL_NONE 0
> +#define CTL_UNNUMBERED CTL_NONE /* sysctl without a binary number */
> #endif
This change looks totally broken. Before this patch, CTL_UNNUMBERED
== -2, a number that isn't CTL_ANY, CTL_NONE, or a valid sysctl
number. After this patch, CTL_UNNUMBERED == 0, or CTL_NONE.
Cheers,
Kyle Moffett
Kyle Moffett <[email protected]> writes:
> On Oct 23, 2006, at 03:25:13, Eric W. Biederman wrote:
>> --- a/fs/lockd/svc.c
>> -/* Something that isn't CTL_ANY, CTL_NONE or a value that may clash. */
>> -#define CTL_UNNUMBERED -2
>> -
>
>> --- a/fs/nfs/sysctl.c
>> -/*
>> - * Something that isn't CTL_ANY, CTL_NONE or a value that may clash.
>> - * Use the same values as fs/lockd/svc.c
>> - */
>> -#define CTL_UNNUMBERED -2
>
>> +++ b/include/linux/sysctl.h
>> #ifdef __KERNEL__
>> #define CTL_ANY -1 /* Matches any name */
>> #define CTL_NONE 0
>> +#define CTL_UNNUMBERED CTL_NONE /* sysctl without a binary number */
>> #endif
>
> This change looks totally broken. Before this patch, CTL_UNNUMBERED == -2, a
> number that isn't CTL_ANY, CTL_NONE, or a valid sysctl number. After this
> patch, CTL_UNNUMBERED == 0, or CTL_NONE.
Exactly. The only reserved sysctl numbers we have are 0 and -1.
Until my previous patch there was on number you could place into the
sysctl table that meant we did not have a binary sysctl name. 0 was
the closest but since it terminated the table it is generally useless.
-2 was a hack attempting to implement that within in the confines of
the previous sysctl implementation.
Now that I have reserved ctl_name == 0 to explicitly mean no sysctl number
and require both ctl_name == 0 and procname == NULL to terminate a sysctl
table. We can remove the hack that nfs had, because we actually have a clean
way of implementing it.
There are sysctl tables currently in existence that use -2 as the index
of a data entry so that was out. Numbers around MIN_INT/MAX_INT are probably
still available to reserve for a new purpose globally but 0 seems perfectly
up to the job.
I kept the CTL_UNNUMBERED name because it seems a little clearer than CTL_NONE.
Eric
On Monday 23 October 2006 09:25, Eric W. Biederman wrote:
> This patch takes the CTL_UNNUMBERD concept from NFS and makes
> it available to all new sysctl users.
>
> At the same time the sysctl binary interface maintenance documentation
> is updated to mention and to describe what is needed to successfully
> maintain the sysctl binary interface.
Good. I've been using 999 for my sysctls for quite some time.
-Andi
>