We can now handle sysctl parameters on kernel command line, but historically
some parameters introduced their own command line equivalent, which we don't
want to remove for compatibility reasons. We can however convert them to the
generic infrastructure with a table translating the legacy command line
parameters to their sysctl names, and removing the one-off param handlers.
This patch adds the support and makes the first conversion to demonstrate it,
on the (deprecated) numa_zonelist_order parameter.
Signed-off-by: Vlastimil Babka <[email protected]>
---
kernel/sysctl.c | 39 +++++++++++++++++++++++++++++++++++----
mm/page_alloc.c | 9 ---------
2 files changed, 35 insertions(+), 13 deletions(-)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 18c7f5606d55..fd72853396f9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1971,6 +1971,22 @@ static struct ctl_table dev_table[] = {
{ }
};
+struct sysctl_alias {
+ char *kernel_param;
+ char *sysctl_param;
+};
+
+/*
+ * Historically some settings had both sysctl and a command line parameter.
+ * With the generic sysctl. parameter support, we can handle them at a single
+ * place and only keep the historical name for compatibility. This is not meant
+ * to add brand new aliases.
+ */
+static struct sysctl_alias sysctl_aliases[] = {
+ {"numa_zonelist_order", "vm.numa_zonelist_order" },
+ { }
+};
+
int __init sysctl_init(void)
{
struct ctl_table_header *hdr;
@@ -1980,6 +1996,18 @@ int __init sysctl_init(void)
return 0;
}
+char *sysctl_find_alias(char *param)
+{
+ struct sysctl_alias *alias;
+
+ for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) {
+ if (strcmp(alias->kernel_param, param) == 0)
+ return alias->sysctl_param;
+ }
+
+ return NULL;
+}
+
/* Set sysctl value passed on kernel command line. */
int process_sysctl_arg(char *param, char *val,
const char *unused, void *arg)
@@ -1990,10 +2018,13 @@ int process_sysctl_arg(char *param, char *val,
loff_t ppos = 0;
struct ctl_table *ctl, *found = NULL;
- if (strncmp(param, "sysctl.", sizeof("sysctl.") - 1))
- return 0;
-
- param += sizeof("sysctl.") - 1;
+ if (strncmp(param, "sysctl.", sizeof("sysctl.") - 1) == 0) {
+ param += sizeof("sysctl.") - 1;
+ } else {
+ param = sysctl_find_alias(param);
+ if (!param)
+ return 0;
+ }
remaining = param;
ctl = &sysctl_base_table[0];
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3c4eb750a199..de7a134b1b8a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5460,15 +5460,6 @@ static int __parse_numa_zonelist_order(char *s)
return 0;
}
-static __init int setup_numa_zonelist_order(char *s)
-{
- if (!s)
- return 0;
-
- return __parse_numa_zonelist_order(s);
-}
-early_param("numa_zonelist_order", setup_numa_zonelist_order);
-
char numa_zonelist_order[] = "Node";
/*
--
2.25.1
Both patches look really great to me. I haven't really checked all the
details but from a quick glance they both seem ok.
I would just add a small clarification here. Unless I am mistaken
early_param is called earlier than it would be now. But that shouldn't
cause any problems because the underlying implementation is just a noop
for backward compatibility.
Thanks a lot this looks like a very nice improvement.
On Wed 25-03-20 13:03:45, Vlastimil Babka wrote:
[...]
> -static __init int setup_numa_zonelist_order(char *s)
> -{
> - if (!s)
> - return 0;
> -
> - return __parse_numa_zonelist_order(s);
> -}
> -early_param("numa_zonelist_order", setup_numa_zonelist_order);
> -
> char numa_zonelist_order[] = "Node";
>
> /*
> --
> 2.25.1
--
Michal Hocko
SUSE Labs
On 3/25/20 3:29 PM, Michal Hocko wrote:
> Both patches look really great to me. I haven't really checked all the
> details but from a quick glance they both seem ok.
Thanks.
> I would just add a small clarification here. Unless I am mistaken
> early_param is called earlier than it would be now. But that shouldn't
> cause any problems because the underlying implementation is just a noop
> for backward compatibility.
Yeah, indeed worth noting somewhere explicitly. The conversion can't be done
blindly, one has to consider whether the delay compared to early_param can be a
disadvantage or not. For example the nmi_watchdog parameter is probably best
left as it is?
> Thanks a lot this looks like a very nice improvement.
>
> On Wed 25-03-20 13:03:45, Vlastimil Babka wrote:
> [...]
>> -static __init int setup_numa_zonelist_order(char *s)
>> -{
>> - if (!s)
>> - return 0;
>> -
>> - return __parse_numa_zonelist_order(s);
>> -}
>> -early_param("numa_zonelist_order", setup_numa_zonelist_order);
>> -
>> char numa_zonelist_order[] = "Node";
>>
>> /*
>> --
>> 2.25.1
>
On Wed 25-03-20 15:36:23, Vlastimil Babka wrote:
> On 3/25/20 3:29 PM, Michal Hocko wrote:
> > Both patches look really great to me. I haven't really checked all the
> > details but from a quick glance they both seem ok.
>
> Thanks.
>
> > I would just add a small clarification here. Unless I am mistaken
> > early_param is called earlier than it would be now. But that shouldn't
> > cause any problems because the underlying implementation is just a noop
> > for backward compatibility.
>
> Yeah, indeed worth noting somewhere explicitly. The conversion can't be done
> blindly, one has to consider whether the delay compared to early_param can be a
> disadvantage or not. For example the nmi_watchdog parameter is probably best
> left as it is?
I wouldn't mind moving nmi_watchdog timeout initialization to later. If
there is a usecase to rely on an early initialization then the patch can
be reverted but I struggle to think of anything reasonable. If the early
init code needs a lonter timeout to prevent from false positives then
there is clearly a bug to be better fixed. And a necessary shorter timeout
sounds quite exotic to me TBH.
--
Michal Hocko
SUSE Labs
On Wed, Mar 25, 2020 at 01:03:45PM +0100, Vlastimil Babka wrote:
> We can now handle sysctl parameters on kernel command line, but historically
> some parameters introduced their own command line equivalent, which we don't
> want to remove for compatibility reasons. We can however convert them to the
> generic infrastructure with a table translating the legacy command line
> parameters to their sysctl names, and removing the one-off param handlers.
>
> This patch adds the support and makes the first conversion to demonstrate it,
> on the (deprecated) numa_zonelist_order parameter.
>
> Signed-off-by: Vlastimil Babka <[email protected]>
> ---
> kernel/sysctl.c | 39 +++++++++++++++++++++++++++++++++++----
> mm/page_alloc.c | 9 ---------
> 2 files changed, 35 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 18c7f5606d55..fd72853396f9 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1971,6 +1971,22 @@ static struct ctl_table dev_table[] = {
> { }
> };
>
> +struct sysctl_alias {
> + char *kernel_param;
const char ...
> + char *sysctl_param;
> +};
> +
> +/*
> + * Historically some settings had both sysctl and a command line parameter.
> + * With the generic sysctl. parameter support, we can handle them at a single
> + * place and only keep the historical name for compatibility. This is not meant
> + * to add brand new aliases.
> + */
> +static struct sysctl_alias sysctl_aliases[] = {
static const ...
> + {"numa_zonelist_order", "vm.numa_zonelist_order" },
> + { }
> +};
> +
> int __init sysctl_init(void)
> {
> struct ctl_table_header *hdr;
> @@ -1980,6 +1996,18 @@ int __init sysctl_init(void)
> return 0;
> }
>
> +char *sysctl_find_alias(char *param)
> +{
> + struct sysctl_alias *alias;
> +
> + for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) {
> + if (strcmp(alias->kernel_param, param) == 0)
> + return alias->sysctl_param;
> + }
> +
> + return NULL;
> +}
> +
> /* Set sysctl value passed on kernel command line. */
> int process_sysctl_arg(char *param, char *val,
> const char *unused, void *arg)
> @@ -1990,10 +2018,13 @@ int process_sysctl_arg(char *param, char *val,
> loff_t ppos = 0;
> struct ctl_table *ctl, *found = NULL;
>
> - if (strncmp(param, "sysctl.", sizeof("sysctl.") - 1))
> - return 0;
> -
> - param += sizeof("sysctl.") - 1;
> + if (strncmp(param, "sysctl.", sizeof("sysctl.") - 1) == 0) {
> + param += sizeof("sysctl.") - 1;
> + } else {
> + param = sysctl_find_alias(param);
> + if (!param)
> + return 0;
> + }
>
> remaining = param;
> ctl = &sysctl_base_table[0];
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 3c4eb750a199..de7a134b1b8a 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5460,15 +5460,6 @@ static int __parse_numa_zonelist_order(char *s)
> return 0;
> }
>
> -static __init int setup_numa_zonelist_order(char *s)
> -{
> - if (!s)
> - return 0;
> -
> - return __parse_numa_zonelist_order(s);
> -}
> -early_param("numa_zonelist_order", setup_numa_zonelist_order);
> -
> char numa_zonelist_order[] = "Node";
Nice. :) Effectively: -9 lines +1 line for the using aliasing. I think
it would be worth identifying the specific requirements for a sysctl
alias to be safe to use, and likely in a comment before the alias table:
- boot param parsing must be identical to the sysctl parsing
- temporal changes must be tolerable: i.e. early_param() runs earlier
than when the sysctl-in-boot-param runs -- must the variable be set
before the code's other __init functions run?
- must be for a non-module code (since we don't have the dynamic support
yet)
As it turns out, "numa_zonelist_order" has literally no effect on
anything -- it's a parsed but ignored setting:
static int __parse_numa_zonelist_order(char *s)
{
/*
* We used to support different zonlists modes but they turned
* out to be just not useful. Let's keep the warning in place
* if somebody still use the cmd line parameter so that we do
* not fail it silently
*/
if (!(*s == 'd' || *s == 'D' || *s == 'n' || *s == 'N')) {
pr_warn("Ignoring unsupported numa_zonelist_order value: %s\n", s);
return -EINVAL;
}
return 0;
}
But anyway, do you have a way to generate a list of potential candidates?
--
Kees Cook
On Wed, Mar 25, 2020 at 01:03:45PM +0100, Vlastimil Babka wrote:
> We can now handle sysctl parameters on kernel command line, but historically
> some parameters introduced their own command line equivalent, which we don't
> want to remove for compatibility reasons. We can however convert them to the
> generic infrastructure with a table translating the legacy command line
> parameters to their sysctl names, and removing the one-off param handlers.
>
> This patch adds the support and makes the first conversion to demonstrate it,
> on the (deprecated) numa_zonelist_order parameter.
>
> Signed-off-by: Vlastimil Babka <[email protected]>
> ---
> kernel/sysctl.c | 39 +++++++++++++++++++++++++++++++++++----
> mm/page_alloc.c | 9 ---------
> 2 files changed, 35 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 18c7f5606d55..fd72853396f9 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1971,6 +1971,22 @@ static struct ctl_table dev_table[] = {
> { }
> };
>
> +struct sysctl_alias {
> + char *kernel_param;
> + char *sysctl_param;
> +};
> +
> +/*
> + * Historically some settings had both sysctl and a command line parameter.
> + * With the generic sysctl. parameter support, we can handle them at a single
> + * place and only keep the historical name for compatibility. This is not meant
> + * to add brand new aliases.
> + */
> +static struct sysctl_alias sysctl_aliases[] = {
> + {"numa_zonelist_order", "vm.numa_zonelist_order" },
> + { }
> +};
> +
> int __init sysctl_init(void)
> {
> struct ctl_table_header *hdr;
> @@ -1980,6 +1996,18 @@ int __init sysctl_init(void)
> return 0;
> }
>
> +char *sysctl_find_alias(char *param)
This function should probably be declared static?
> +{
> + struct sysctl_alias *alias;
> +
> + for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) {
> + if (strcmp(alias->kernel_param, param) == 0)
> + return alias->sysctl_param;
> + }
> +
> + return NULL;
> +}
> +
> /* Set sysctl value passed on kernel command line. */
> int process_sysctl_arg(char *param, char *val,
> const char *unused, void *arg)
> @@ -1990,10 +2018,13 @@ int process_sysctl_arg(char *param, char *val,
> loff_t ppos = 0;
> struct ctl_table *ctl, *found = NULL;
>
> - if (strncmp(param, "sysctl.", sizeof("sysctl.") - 1))
> - return 0;
> -
> - param += sizeof("sysctl.") - 1;
> + if (strncmp(param, "sysctl.", sizeof("sysctl.") - 1) == 0) {
> + param += sizeof("sysctl.") - 1;
> + } else {
> + param = sysctl_find_alias(param);
> + if (!param)
> + return 0;
> + }
>
> remaining = param;
> ctl = &sysctl_base_table[0];
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 3c4eb750a199..de7a134b1b8a 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5460,15 +5460,6 @@ static int __parse_numa_zonelist_order(char *s)
> return 0;
> }
>
> -static __init int setup_numa_zonelist_order(char *s)
> -{
> - if (!s)
> - return 0;
> -
> - return __parse_numa_zonelist_order(s);
> -}
> -early_param("numa_zonelist_order", setup_numa_zonelist_order);
> -
> char numa_zonelist_order[] = "Node";
>
> /*
> --
> 2.25.1
>