2021-11-23 20:24:44

by Luis Chamberlain

[permalink] [raw]
Subject: [PATCH v2 2/9] sysctl: Move some boundary constants from sysctl.c to sysctl_vals

From: Xiaoming Ni <[email protected]>

sysctl has helpers which let us specify boundary values for a min or
max int value. Since these are used for a boundary check only they don't
change, so move these variables to sysctl_vals to avoid adding duplicate
variables. This will help with our cleanup of kernel/sysctl.c.

Signed-off-by: Xiaoming Ni <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
[mcgrof: major rebase]
Signed-off-by: Luis Chamberlain <[email protected]>
---
fs/proc/proc_sysctl.c | 2 +-
include/linux/sysctl.h | 12 +++++++++---
kernel/sysctl.c | 44 ++++++++++++++++++------------------------
3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b4950843d90a..6d462644bb00 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -26,7 +26,7 @@ static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;

/* shared constants to be used in various sysctls */
-const int sysctl_vals[] = { 0, 1, INT_MAX };
+const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, INT_MAX };
EXPORT_SYMBOL(sysctl_vals);

/* Support for permanently empty directories */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index d3ab7969b6b5..718492057c70 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -38,9 +38,15 @@ struct ctl_table_header;
struct ctl_dir;

/* Keep the same order as in fs/proc/proc_sysctl.c */
-#define SYSCTL_ZERO ((void *)&sysctl_vals[0])
-#define SYSCTL_ONE ((void *)&sysctl_vals[1])
-#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2])
+#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0])
+#define SYSCTL_ZERO ((void *)&sysctl_vals[1])
+#define SYSCTL_ONE ((void *)&sysctl_vals[2])
+#define SYSCTL_TWO ((void *)&sysctl_vals[3])
+#define SYSCTL_FOUR ((void *)&sysctl_vals[4])
+#define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5])
+#define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6])
+#define SYSCTL_ONE_THOUSAND ((void *)&sysctl_vals[7])
+#define SYSCTL_INT_MAX ((void *)&sysctl_vals[8])

extern const int sysctl_vals[];

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 857c1ccad9e8..3097f0286504 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -113,15 +113,9 @@
static int sixty = 60;
#endif

-static int __maybe_unused neg_one = -1;
-static int __maybe_unused two = 2;
-static int __maybe_unused four = 4;
static unsigned long zero_ul;
static unsigned long one_ul = 1;
static unsigned long long_max = LONG_MAX;
-static int one_hundred = 100;
-static int two_hundred = 200;
-static int one_thousand = 1000;
#ifdef CONFIG_PRINTK
static int ten_thousand = 10000;
#endif
@@ -1962,7 +1956,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &neg_one,
+ .extra1 = SYSCTL_NEG_ONE,
.extra2 = SYSCTL_ONE,
},
#endif
@@ -2304,7 +2298,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax_sysadmin,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
#endif
{
@@ -2564,7 +2558,7 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &neg_one,
+ .extra1 = SYSCTL_NEG_ONE,
},
#endif
#ifdef CONFIG_RT_MUTEXES
@@ -2626,7 +2620,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = perf_cpu_time_max_percent_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "perf_event_max_stack",
@@ -2644,7 +2638,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = perf_event_max_stack_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_thousand,
+ .extra2 = SYSCTL_ONE_THOUSAND,
},
#endif
{
@@ -2675,7 +2669,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = bpf_unpriv_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "bpf_stats_enabled",
@@ -2729,7 +2723,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = overcommit_policy_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "panic_on_oom",
@@ -2738,7 +2732,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "oom_kill_allocating_task",
@@ -2783,7 +2777,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = dirty_background_ratio_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "dirty_background_bytes",
@@ -2800,7 +2794,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = dirty_ratio_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "dirty_bytes",
@@ -2840,7 +2834,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two_hundred,
+ .extra2 = SYSCTL_TWO_HUNDRED,
},
#ifdef CONFIG_HUGETLB_PAGE
{
@@ -2897,7 +2891,7 @@ static struct ctl_table vm_table[] = {
.mode = 0200,
.proc_handler = drop_caches_sysctl_handler,
.extra1 = SYSCTL_ONE,
- .extra2 = &four,
+ .extra2 = SYSCTL_FOUR,
},
#ifdef CONFIG_COMPACTION
{
@@ -2914,7 +2908,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = compaction_proactiveness_sysctl_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "extfrag_threshold",
@@ -2959,7 +2953,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = watermark_scale_factor_sysctl_handler,
.extra1 = SYSCTL_ONE,
- .extra2 = &one_thousand,
+ .extra2 = SYSCTL_ONE_THOUSAND,
},
{
.procname = "percpu_pagelist_high_fraction",
@@ -3038,7 +3032,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname = "min_slab_ratio",
@@ -3047,7 +3041,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = sysctl_min_slab_ratio_sysctl_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &one_hundred,
+ .extra2 = SYSCTL_ONE_HUNDRED,
},
#endif
#ifdef CONFIG_SMP
@@ -3337,7 +3331,7 @@ static struct ctl_table fs_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "protected_regular",
@@ -3346,7 +3340,7 @@ static struct ctl_table fs_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "suid_dumpable",
@@ -3355,7 +3349,7 @@ static struct ctl_table fs_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax_coredump,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
{
--
2.33.0



2021-11-24 04:53:47

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH v2 2/9] sysctl: Move some boundary constants from sysctl.c to sysctl_vals

Luis Chamberlain <[email protected]> writes:

> From: Xiaoming Ni <[email protected]>
>
> sysctl has helpers which let us specify boundary values for a min or
> max int value. Since these are used for a boundary check only they don't
> change, so move these variables to sysctl_vals to avoid adding duplicate
> variables. This will help with our cleanup of kernel/sysctl.c.

Ouch.

I kind of, sort of, have to protest.

Where the macros that use sysctl_vals don't have a type they have caused
mysterious code breakage because people did not realize they can not be
used with sysctls that take a long instead of an int.

This came up with when the internal storage for ucounts see
kernel/ucount.c changed from an int to a long. We were quite a while
tracking what was going on until we realized that the code could not use
SYSCTL_ZERO and SYSCTL_INT_MAX and that we had to defined our own that
were long.

So before we extend something like this can we please change the
macro naming convention so that it includes the size of the type
we want.


I am also not a fan of sysctl_vals living in proc_sysctl. They
have nothing to do with the code in that file. They would do much
better in kernel/sysctl.c close to the helpers that use them.

Eric


> Signed-off-by: Xiaoming Ni <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> [mcgrof: major rebase]
> Signed-off-by: Luis Chamberlain <[email protected]>
> ---
> fs/proc/proc_sysctl.c | 2 +-
> include/linux/sysctl.h | 12 +++++++++---
> kernel/sysctl.c | 44 ++++++++++++++++++------------------------
> 3 files changed, 29 insertions(+), 29 deletions(-)
>
> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index b4950843d90a..6d462644bb00 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -26,7 +26,7 @@ static const struct file_operations proc_sys_dir_file_operations;
> static const struct inode_operations proc_sys_dir_operations;
>
> /* shared constants to be used in various sysctls */
> -const int sysctl_vals[] = { 0, 1, INT_MAX };
> +const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, INT_MAX };
> EXPORT_SYMBOL(sysctl_vals);
>
> /* Support for permanently empty directories */
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index d3ab7969b6b5..718492057c70 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -38,9 +38,15 @@ struct ctl_table_header;
> struct ctl_dir;
>
> /* Keep the same order as in fs/proc/proc_sysctl.c */
> -#define SYSCTL_ZERO ((void *)&sysctl_vals[0])
> -#define SYSCTL_ONE ((void *)&sysctl_vals[1])
> -#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2])
> +#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0])
> +#define SYSCTL_ZERO ((void *)&sysctl_vals[1])
> +#define SYSCTL_ONE ((void *)&sysctl_vals[2])
> +#define SYSCTL_TWO ((void *)&sysctl_vals[3])
> +#define SYSCTL_FOUR ((void *)&sysctl_vals[4])
> +#define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5])
> +#define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6])
> +#define SYSCTL_ONE_THOUSAND ((void *)&sysctl_vals[7])
> +#define SYSCTL_INT_MAX ((void *)&sysctl_vals[8])
>
> extern const int sysctl_vals[];
>
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 857c1ccad9e8..3097f0286504 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -113,15 +113,9 @@
> static int sixty = 60;
> #endif
>
> -static int __maybe_unused neg_one = -1;
> -static int __maybe_unused two = 2;
> -static int __maybe_unused four = 4;
> static unsigned long zero_ul;
> static unsigned long one_ul = 1;
> static unsigned long long_max = LONG_MAX;
> -static int one_hundred = 100;
> -static int two_hundred = 200;
> -static int one_thousand = 1000;
> #ifdef CONFIG_PRINTK
> static int ten_thousand = 10000;
> #endif
> @@ -1962,7 +1956,7 @@ static struct ctl_table kern_table[] = {
> .maxlen = sizeof(int),
> .mode = 0644,
> .proc_handler = proc_dointvec_minmax,
> - .extra1 = &neg_one,
> + .extra1 = SYSCTL_NEG_ONE,
> .extra2 = SYSCTL_ONE,
> },
> #endif
> @@ -2304,7 +2298,7 @@ static struct ctl_table kern_table[] = {
> .mode = 0644,
> .proc_handler = proc_dointvec_minmax_sysadmin,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &two,
> + .extra2 = SYSCTL_TWO,
> },
> #endif
> {
> @@ -2564,7 +2558,7 @@ static struct ctl_table kern_table[] = {
> .maxlen = sizeof(int),
> .mode = 0644,
> .proc_handler = proc_dointvec_minmax,
> - .extra1 = &neg_one,
> + .extra1 = SYSCTL_NEG_ONE,
> },
> #endif
> #ifdef CONFIG_RT_MUTEXES
> @@ -2626,7 +2620,7 @@ static struct ctl_table kern_table[] = {
> .mode = 0644,
> .proc_handler = perf_cpu_time_max_percent_handler,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &one_hundred,
> + .extra2 = SYSCTL_ONE_HUNDRED,
> },
> {
> .procname = "perf_event_max_stack",
> @@ -2644,7 +2638,7 @@ static struct ctl_table kern_table[] = {
> .mode = 0644,
> .proc_handler = perf_event_max_stack_handler,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &one_thousand,
> + .extra2 = SYSCTL_ONE_THOUSAND,
> },
> #endif
> {
> @@ -2675,7 +2669,7 @@ static struct ctl_table kern_table[] = {
> .mode = 0644,
> .proc_handler = bpf_unpriv_handler,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &two,
> + .extra2 = SYSCTL_TWO,
> },
> {
> .procname = "bpf_stats_enabled",
> @@ -2729,7 +2723,7 @@ static struct ctl_table vm_table[] = {
> .mode = 0644,
> .proc_handler = overcommit_policy_handler,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &two,
> + .extra2 = SYSCTL_TWO,
> },
> {
> .procname = "panic_on_oom",
> @@ -2738,7 +2732,7 @@ static struct ctl_table vm_table[] = {
> .mode = 0644,
> .proc_handler = proc_dointvec_minmax,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &two,
> + .extra2 = SYSCTL_TWO,
> },
> {
> .procname = "oom_kill_allocating_task",
> @@ -2783,7 +2777,7 @@ static struct ctl_table vm_table[] = {
> .mode = 0644,
> .proc_handler = dirty_background_ratio_handler,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &one_hundred,
> + .extra2 = SYSCTL_ONE_HUNDRED,
> },
> {
> .procname = "dirty_background_bytes",
> @@ -2800,7 +2794,7 @@ static struct ctl_table vm_table[] = {
> .mode = 0644,
> .proc_handler = dirty_ratio_handler,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &one_hundred,
> + .extra2 = SYSCTL_ONE_HUNDRED,
> },
> {
> .procname = "dirty_bytes",
> @@ -2840,7 +2834,7 @@ static struct ctl_table vm_table[] = {
> .mode = 0644,
> .proc_handler = proc_dointvec_minmax,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &two_hundred,
> + .extra2 = SYSCTL_TWO_HUNDRED,
> },
> #ifdef CONFIG_HUGETLB_PAGE
> {
> @@ -2897,7 +2891,7 @@ static struct ctl_table vm_table[] = {
> .mode = 0200,
> .proc_handler = drop_caches_sysctl_handler,
> .extra1 = SYSCTL_ONE,
> - .extra2 = &four,
> + .extra2 = SYSCTL_FOUR,
> },
> #ifdef CONFIG_COMPACTION
> {
> @@ -2914,7 +2908,7 @@ static struct ctl_table vm_table[] = {
> .mode = 0644,
> .proc_handler = compaction_proactiveness_sysctl_handler,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &one_hundred,
> + .extra2 = SYSCTL_ONE_HUNDRED,
> },
> {
> .procname = "extfrag_threshold",
> @@ -2959,7 +2953,7 @@ static struct ctl_table vm_table[] = {
> .mode = 0644,
> .proc_handler = watermark_scale_factor_sysctl_handler,
> .extra1 = SYSCTL_ONE,
> - .extra2 = &one_thousand,
> + .extra2 = SYSCTL_ONE_THOUSAND,
> },
> {
> .procname = "percpu_pagelist_high_fraction",
> @@ -3038,7 +3032,7 @@ static struct ctl_table vm_table[] = {
> .mode = 0644,
> .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &one_hundred,
> + .extra2 = SYSCTL_ONE_HUNDRED,
> },
> {
> .procname = "min_slab_ratio",
> @@ -3047,7 +3041,7 @@ static struct ctl_table vm_table[] = {
> .mode = 0644,
> .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &one_hundred,
> + .extra2 = SYSCTL_ONE_HUNDRED,
> },
> #endif
> #ifdef CONFIG_SMP
> @@ -3337,7 +3331,7 @@ static struct ctl_table fs_table[] = {
> .mode = 0644,
> .proc_handler = proc_dointvec_minmax,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &two,
> + .extra2 = SYSCTL_TWO,
> },
> {
> .procname = "protected_regular",
> @@ -3346,7 +3340,7 @@ static struct ctl_table fs_table[] = {
> .mode = 0644,
> .proc_handler = proc_dointvec_minmax,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &two,
> + .extra2 = SYSCTL_TWO,
> },
> {
> .procname = "suid_dumpable",
> @@ -3355,7 +3349,7 @@ static struct ctl_table fs_table[] = {
> .mode = 0644,
> .proc_handler = proc_dointvec_minmax_coredump,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = &two,
> + .extra2 = SYSCTL_TWO,
> },
> #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
> {

2021-11-24 07:05:17

by Xiaoming Ni

[permalink] [raw]
Subject: Re: [PATCH v2 2/9] sysctl: Move some boundary constants from sysctl.c to sysctl_vals

On 2021/11/24 12:51, Eric W. Biederman wrote:
> Luis Chamberlain <[email protected]> writes:
>
>> From: Xiaoming Ni <[email protected]>
>>
>> sysctl has helpers which let us specify boundary values for a min or
>> max int value. Since these are used for a boundary check only they don't
>> change, so move these variables to sysctl_vals to avoid adding duplicate
>> variables. This will help with our cleanup of kernel/sysctl.c.
>
> Ouch.
>
> I kind of, sort of, have to protest.
>
> Where the macros that use sysctl_vals don't have a type they have caused
> mysterious code breakage because people did not realize they can not be
> used with sysctls that take a long instead of an int.
>
> This came up with when the internal storage for ucounts see
> kernel/ucount.c changed from an int to a long. We were quite a while
> tracking what was going on until we realized that the code could not use
> SYSCTL_ZERO and SYSCTL_INT_MAX and that we had to defined our own thatSYSCTL_ZERO and SYSCTL_ZERO involve dozens of files and are used in hundreds of places.
> were long.
>
static unsigned long zero_ul;
static unsigned long one_ul = 1;
static unsigned long long_max = LONG_MAX;
EXPORT_SYMBOL(proc_doulongvec_minmax);

Yes, min/max of type unsigned long is used in multiple sysctl
interfaces. It is necessary to add an unsigned long sysctl_val array to
avoid repeated definitions in different .c files.

> So before we extend something like this can we please change the
> macro naming convention so that it includes the size of the type
> we want.
>
The int type is the most widely used type. By default, numeric constants
are also of the int type. SYSCTL_ZERO and SYSCTL_ZERO involve dozens of
files and are used in hundreds of places. Whether only non-int macros
need to be named with their type size?

>
> I am also not a fan of sysctl_vals living in proc_sysctl. They
> have nothing to do with the code in that file. They would do much
> better in kernel/sysctl.c close to the helpers that use them.
>
yes

Thanks
Xiaoming Ni


> Eric
>
>
>> Signed-off-by: Xiaoming Ni <[email protected]>
>> Reviewed-by: Kees Cook <[email protected]>
>> [mcgrof: major rebase]
>> Signed-off-by: Luis Chamberlain <[email protected]>
>> ---
>> fs/proc/proc_sysctl.c | 2 +-
>> include/linux/sysctl.h | 12 +++++++++---
>> kernel/sysctl.c | 44 ++++++++++++++++++------------------------
>> 3 files changed, 29 insertions(+), 29 deletions(-)
>>
>> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
>> index b4950843d90a..6d462644bb00 100644
>> --- a/fs/proc/proc_sysctl.c
>> +++ b/fs/proc/proc_sysctl.c
>> @@ -26,7 +26,7 @@ static const struct file_operations proc_sys_dir_file_operations;
>> static const struct inode_operations proc_sys_dir_operations;
>>
>> /* shared constants to be used in various sysctls */
>> -const int sysctl_vals[] = { 0, 1, INT_MAX };
>> +const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, INT_MAX };
>> EXPORT_SYMBOL(sysctl_vals);
>>
>> /* Support for permanently empty directories */
>> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
>> index d3ab7969b6b5..718492057c70 100644
>> --- a/include/linux/sysctl.h
>> +++ b/include/linux/sysctl.h
>> @@ -38,9 +38,15 @@ struct ctl_table_header;
>> struct ctl_dir;
>>
>> /* Keep the same order as in fs/proc/proc_sysctl.c */
>> -#define SYSCTL_ZERO ((void *)&sysctl_vals[0])
>> -#define SYSCTL_ONE ((void *)&sysctl_vals[1])
>> -#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2])
>> +#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0])
>> +#define SYSCTL_ZERO ((void *)&sysctl_vals[1])
>> +#define SYSCTL_ONE ((void *)&sysctl_vals[2])
>> +#define SYSCTL_TWO ((void *)&sysctl_vals[3])
>> +#define SYSCTL_FOUR ((void *)&sysctl_vals[4])
>> +#define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5])
>> +#define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6])
>> +#define SYSCTL_ONE_THOUSAND ((void *)&sysctl_vals[7])
>> +#define SYSCTL_INT_MAX ((void *)&sysctl_vals[8])
>>
>> extern const int sysctl_vals[];
>>
>> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
>> index 857c1ccad9e8..3097f0286504 100644
>> --- a/kernel/sysctl.c
>> +++ b/kernel/sysctl.c
>> @@ -113,15 +113,9 @@
>> static int sixty = 60;
>> #endif
>>
>> -static int __maybe_unused neg_one = -1;
>> -static int __maybe_unused two = 2;
>> -static int __maybe_unused four = 4;
>> static unsigned long zero_ul;
>> static unsigned long one_ul = 1;
>> static unsigned long long_max = LONG_MAX;
>> -static int one_hundred = 100;
>> -static int two_hundred = 200;
>> -static int one_thousand = 1000;
>> #ifdef CONFIG_PRINTK
>> static int ten_thousand = 10000;
>> #endif
>> @@ -1962,7 +1956,7 @@ static struct ctl_table kern_table[] = {
>> .maxlen = sizeof(int),
>> .mode = 0644,
>> .proc_handler = proc_dointvec_minmax,
>> - .extra1 = &neg_one,
>> + .extra1 = SYSCTL_NEG_ONE,
>> .extra2 = SYSCTL_ONE,
>> },
>> #endif
>> @@ -2304,7 +2298,7 @@ static struct ctl_table kern_table[] = {
>> .mode = 0644,
>> .proc_handler = proc_dointvec_minmax_sysadmin,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &two,
>> + .extra2 = SYSCTL_TWO,
>> },
>> #endif
>> {
>> @@ -2564,7 +2558,7 @@ static struct ctl_table kern_table[] = {
>> .maxlen = sizeof(int),
>> .mode = 0644,
>> .proc_handler = proc_dointvec_minmax,
>> - .extra1 = &neg_one,
>> + .extra1 = SYSCTL_NEG_ONE,
>> },
>> #endif
>> #ifdef CONFIG_RT_MUTEXES
>> @@ -2626,7 +2620,7 @@ static struct ctl_table kern_table[] = {
>> .mode = 0644,
>> .proc_handler = perf_cpu_time_max_percent_handler,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &one_hundred,
>> + .extra2 = SYSCTL_ONE_HUNDRED,
>> },
>> {
>> .procname = "perf_event_max_stack",
>> @@ -2644,7 +2638,7 @@ static struct ctl_table kern_table[] = {
>> .mode = 0644,
>> .proc_handler = perf_event_max_stack_handler,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &one_thousand,
>> + .extra2 = SYSCTL_ONE_THOUSAND,
>> },
>> #endif
>> {
>> @@ -2675,7 +2669,7 @@ static struct ctl_table kern_table[] = {
>> .mode = 0644,
>> .proc_handler = bpf_unpriv_handler,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &two,
>> + .extra2 = SYSCTL_TWO,
>> },
>> {
>> .procname = "bpf_stats_enabled",
>> @@ -2729,7 +2723,7 @@ static struct ctl_table vm_table[] = {
>> .mode = 0644,
>> .proc_handler = overcommit_policy_handler,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &two,
>> + .extra2 = SYSCTL_TWO,
>> },
>> {
>> .procname = "panic_on_oom",
>> @@ -2738,7 +2732,7 @@ static struct ctl_table vm_table[] = {
>> .mode = 0644,
>> .proc_handler = proc_dointvec_minmax,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &two,
>> + .extra2 = SYSCTL_TWO,
>> },
>> {
>> .procname = "oom_kill_allocating_task",
>> @@ -2783,7 +2777,7 @@ static struct ctl_table vm_table[] = {
>> .mode = 0644,
>> .proc_handler = dirty_background_ratio_handler,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &one_hundred,
>> + .extra2 = SYSCTL_ONE_HUNDRED,
>> },
>> {
>> .procname = "dirty_background_bytes",
>> @@ -2800,7 +2794,7 @@ static struct ctl_table vm_table[] = {
>> .mode = 0644,
>> .proc_handler = dirty_ratio_handler,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &one_hundred,
>> + .extra2 = SYSCTL_ONE_HUNDRED,
>> },
>> {
>> .procname = "dirty_bytes",
>> @@ -2840,7 +2834,7 @@ static struct ctl_table vm_table[] = {
>> .mode = 0644,
>> .proc_handler = proc_dointvec_minmax,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &two_hundred,
>> + .extra2 = SYSCTL_TWO_HUNDRED,
>> },
>> #ifdef CONFIG_HUGETLB_PAGE
>> {
>> @@ -2897,7 +2891,7 @@ static struct ctl_table vm_table[] = {
>> .mode = 0200,
>> .proc_handler = drop_caches_sysctl_handler,
>> .extra1 = SYSCTL_ONE,
>> - .extra2 = &four,
>> + .extra2 = SYSCTL_FOUR,
>> },
>> #ifdef CONFIG_COMPACTION
>> {
>> @@ -2914,7 +2908,7 @@ static struct ctl_table vm_table[] = {
>> .mode = 0644,
>> .proc_handler = compaction_proactiveness_sysctl_handler,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &one_hundred,
>> + .extra2 = SYSCTL_ONE_HUNDRED,
>> },
>> {
>> .procname = "extfrag_threshold",
>> @@ -2959,7 +2953,7 @@ static struct ctl_table vm_table[] = {
>> .mode = 0644,
>> .proc_handler = watermark_scale_factor_sysctl_handler,
>> .extra1 = SYSCTL_ONE,
>> - .extra2 = &one_thousand,
>> + .extra2 = SYSCTL_ONE_THOUSAND,
>> },
>> {
>> .procname = "percpu_pagelist_high_fraction",
>> @@ -3038,7 +3032,7 @@ static struct ctl_table vm_table[] = {
>> .mode = 0644,
>> .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &one_hundred,
>> + .extra2 = SYSCTL_ONE_HUNDRED,
>> },
>> {
>> .procname = "min_slab_ratio",
>> @@ -3047,7 +3041,7 @@ static struct ctl_table vm_table[] = {
>> .mode = 0644,
>> .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &one_hundred,
>> + .extra2 = SYSCTL_ONE_HUNDRED,
>> },
>> #endif
>> #ifdef CONFIG_SMP
>> @@ -3337,7 +3331,7 @@ static struct ctl_table fs_table[] = {
>> .mode = 0644,
>> .proc_handler = proc_dointvec_minmax,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &two,
>> + .extra2 = SYSCTL_TWO,
>> },
>> {
>> .procname = "protected_regular",
>> @@ -3346,7 +3340,7 @@ static struct ctl_table fs_table[] = {
>> .mode = 0644,
>> .proc_handler = proc_dointvec_minmax,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &two,
>> + .extra2 = SYSCTL_TWO,
>> },
>> {
>> .procname = "suid_dumpable",
>> @@ -3355,7 +3349,7 @@ static struct ctl_table fs_table[] = {
>> .mode = 0644,
>> .proc_handler = proc_dointvec_minmax_coredump,
>> .extra1 = SYSCTL_ZERO,
>> - .extra2 = &two,
>> + .extra2 = SYSCTL_TWO,
>> },
>> #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
>> {
> .
>


2021-11-24 17:39:10

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH v2 2/9] sysctl: Move some boundary constants from sysctl.c to sysctl_vals

Xiaoming Ni <[email protected]> writes:

> On 2021/11/24 12:51, Eric W. Biederman wrote:
>> Luis Chamberlain <[email protected]> writes:
>>
>>> From: Xiaoming Ni <[email protected]>
>>>
>>> sysctl has helpers which let us specify boundary values for a min or
>>> max int value. Since these are used for a boundary check only they don't
>>> change, so move these variables to sysctl_vals to avoid adding duplicate
>>> variables. This will help with our cleanup of kernel/sysctl.c.
>>
>> Ouch.
>>
>> I kind of, sort of, have to protest.
>>
>> Where the macros that use sysctl_vals don't have a type they have caused
>> mysterious code breakage because people did not realize they can not be
>> used with sysctls that take a long instead of an int.
>>
>> This came up with when the internal storage for ucounts see
>> kernel/ucount.c changed from an int to a long. We were quite a while
>> tracking what was going on until we realized that the code could not use
>> SYSCTL_ZERO and SYSCTL_INT_MAX and that we had to defined our own thatSYSCTL_ZERO and SYSCTL_ZERO involve dozens of files and are used in hundreds of places.
>> were long.
>>
> static unsigned long zero_ul;
> static unsigned long one_ul = 1;
> static unsigned long long_max = LONG_MAX;
> EXPORT_SYMBOL(proc_doulongvec_minmax);
>
> Yes, min/max of type unsigned long is used in multiple sysctl
> interfaces. It is necessary to add an unsigned long sysctl_val array
> to avoid repeated definitions in different .c files.
>
>> So before we extend something like this can we please change the
>> macro naming convention so that it includes the size of the type
>> we want.
>>
> The int type is the most widely used type. By default, numeric
> constants are also of the int type. SYSCTL_ZERO and SYSCTL_ZERO
> involve dozens of files and are used in hundreds of places. Whether
> only non-int macros need to be named with their type size?
>
>>
>> I am also not a fan of sysctl_vals living in proc_sysctl. They
>> have nothing to do with the code in that file. They would do much
>> better in kernel/sysctl.c close to the helpers that use them.
>>
> yes
>

Looking a little more. I think it makes sense to do something like:

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 1fa2b69c6fc3..c299009421ea 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -121,8 +121,8 @@ struct ctl_table {
struct ctl_table *child; /* Deprecated */
proc_handler *proc_handler; /* Callback for text formatting */
struct ctl_table_poll *poll;
- void *extra1;
- void *extra2;
+ long min;
+ long max;
} __randomize_layout;

Nearly every use of .extra1 and .extra2 are for min and max values.
A long takes the same storage as a void * parameter.

So it would be a net saving in storage as you don't have separate
storage for the values anywhere.

There are a few cases where .extra1 is used for something else
so keeping a "void *extra" field will probably be needed.

By finishing the removal of the child field adding a "void *extra"
field can be done at no storage cost.

Having the min and max parameters in the structure has the major
advantage that there is no redirection, and no fancy games. People
can just read the value from the structure initializer. Plus a
conversion from int to long won't requiring changing the min and
max constants.

So really I think instead of doubling down on the error prone case
that we have and extending sysctl_vals we should just get rid of
it entirely.

It is a bit more work to make that change but the long term result is
much better.

Any chance we can do that for a cleanup instead of extending sysctl_vals?

Eric

2021-11-24 23:12:56

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [PATCH v2 2/9] sysctl: Move some boundary constants from sysctl.c to sysctl_vals

On Wed, Nov 24, 2021 at 11:38:19AM -0600, Eric W. Biederman wrote:
> Looking a little more. I think it makes sense to do something like:
>
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index 1fa2b69c6fc3..c299009421ea 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -121,8 +121,8 @@ struct ctl_table {
> struct ctl_table *child; /* Deprecated */
> proc_handler *proc_handler; /* Callback for text formatting */
> struct ctl_table_poll *poll;
> - void *extra1;
> - void *extra2;
> + long min;
> + long max;
> } __randomize_layout;
>
> Any chance we can do that for a cleanup instead of extending sysctl_vals?

Essentially the change is *big*. We either do that *not yet implemented*
change *now, and then have to rebase all the non-upstream work to adapt
to that, or we do that after the cleanup.

Both I think are going to cause some suffering. I'd prefer to do it
after though. Would you be OK with that?

Luis