Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
issues. Detect this and don't enable core scheduling as it can
needlessly slow those device down.
However, some users may want core scheduling even if the hardware is
secure. To support them, add a coresched= option which defaults to
'secure' and can be overridden to 'on' if the user wants to enable
coresched even if the HW is not vulnerable. 'off' would disable
core scheduling in any case.
Also add a sched_debug entry to indicate if core scheduling is turned on
or not.
Reviewed-by: Alexander Graf <[email protected]>
Signed-off-by: Joel Fernandes (Google) <[email protected]>
---
.../admin-guide/kernel-parameters.txt | 14 ++++++
arch/x86/kernel/cpu/bugs.c | 19 ++++++++
include/linux/cpu.h | 1 +
include/linux/sched/smt.h | 4 ++
kernel/cpu.c | 43 +++++++++++++++++++
kernel/sched/core.c | 6 +++
kernel/sched/debug.c | 4 ++
7 files changed, 91 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b185c6ed4aba..9cd2cf7c18d4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -698,6 +698,20 @@
/proc/<pid>/coredump_filter.
See also Documentation/filesystems/proc.rst.
+ coresched= [SCHED_CORE] This feature allows the Linux scheduler
+ to force hyperthread siblings of a CPU to only execute tasks
+ concurrently on all hyperthreads that are running within the
+ same core scheduling group.
+ Possible values are:
+ 'on' - Enable scheduler capability to core schedule.
+ By default, no tasks will be core scheduled, but the coresched
+ interface can be used to form groups of tasks that are forced
+ to share a core.
+ 'off' - Disable scheduler capability to core schedule.
+ 'secure' - Like 'on' but only enable on systems affected by
+ MDS or L1TF vulnerabilities. 'off' otherwise.
+ Default: 'secure'.
+
coresight_cpu_debug.enable
[ARM,ARM64]
Format: <bool>
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..f3163f4a805c 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -43,6 +43,7 @@ static void __init mds_select_mitigation(void);
static void __init mds_print_mitigation(void);
static void __init taa_select_mitigation(void);
static void __init srbds_select_mitigation(void);
+static void __init coresched_select(void);
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
u64 x86_spec_ctrl_base;
@@ -103,6 +104,9 @@ void __init check_bugs(void)
if (boot_cpu_has(X86_FEATURE_STIBP))
x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
+ /* Update whether core-scheduling is needed. */
+ coresched_select();
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -1808,4 +1812,19 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *
{
return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
}
+
+/*
+ * When coresched=secure command line option is passed (default), disable core
+ * scheduling if CPU does not have MDS/L1TF vulnerability.
+ */
+static void __init coresched_select(void)
+{
+#ifdef CONFIG_SCHED_CORE
+ if (coresched_cmd_secure() &&
+ !boot_cpu_has_bug(X86_BUG_MDS) &&
+ !boot_cpu_has_bug(X86_BUG_L1TF))
+ static_branch_disable(&sched_coresched_supported);
+#endif
+}
+
#endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d6428aaf67e7..d1f1e64316d6 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -228,4 +228,5 @@ static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0;
extern bool cpu_mitigations_off(void);
extern bool cpu_mitigations_auto_nosmt(void);
+extern bool coresched_cmd_secure(void);
#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
index 59d3736c454c..561064eb3268 100644
--- a/include/linux/sched/smt.h
+++ b/include/linux/sched/smt.h
@@ -17,4 +17,8 @@ static inline bool sched_smt_active(void) { return false; }
void arch_smt_update(void);
+#ifdef CONFIG_SCHED_CORE
+extern struct static_key_true sched_coresched_supported;
+#endif
+
#endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index fa535eaa4826..f22330c3ab4c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2559,3 +2559,46 @@ bool cpu_mitigations_auto_nosmt(void)
return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
}
EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
+
+/*
+ * These are used for a global "coresched=" cmdline option for controlling
+ * core scheduling. Note that core sched may be needed for usecases other
+ * than security as well.
+ */
+enum coresched_cmds {
+ CORE_SCHED_OFF,
+ CORE_SCHED_SECURE,
+ CORE_SCHED_ON,
+};
+
+static enum coresched_cmds coresched_cmd __ro_after_init = CORE_SCHED_SECURE;
+
+static int __init coresched_parse_cmdline(char *arg)
+{
+ if (!strcmp(arg, "off"))
+ coresched_cmd = CORE_SCHED_OFF;
+ else if (!strcmp(arg, "on"))
+ coresched_cmd = CORE_SCHED_ON;
+ else if (!strcmp(arg, "secure"))
+ /*
+ * On x86, coresched=secure means coresched is enabled only if
+ * system has MDS/L1TF vulnerability (see x86/bugs.c).
+ */
+ coresched_cmd = CORE_SCHED_SECURE;
+ else
+ pr_crit("Unsupported coresched=%s, defaulting to secure.\n",
+ arg);
+
+ if (coresched_cmd == CORE_SCHED_OFF)
+ static_branch_disable(&sched_coresched_supported);
+
+ return 0;
+}
+early_param("coresched", coresched_parse_cmdline);
+
+/* coresched=secure */
+bool coresched_cmd_secure(void)
+{
+ return coresched_cmd == CORE_SCHED_SECURE;
+}
+EXPORT_SYMBOL_GPL(coresched_cmd_secure);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5ef04bdc849f..01938a2154fd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -325,8 +325,12 @@ static void __sched_core_disable(void)
static_branch_disable(&__sched_core_enabled);
}
+DEFINE_STATIC_KEY_TRUE(sched_coresched_supported);
+
void sched_core_get(void)
{
+ if (!static_branch_likely(&sched_coresched_supported))
+ return;
mutex_lock(&sched_core_mutex);
if (!sched_core_count++)
__sched_core_enable();
@@ -335,6 +339,8 @@ void sched_core_get(void)
void sched_core_put(void)
{
+ if (!static_branch_likely(&sched_coresched_supported))
+ return;
mutex_lock(&sched_core_mutex);
if (!--sched_core_count)
__sched_core_disable();
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 8c452b8010ad..cffdfab7478e 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -782,6 +782,10 @@ static void sched_debug_header(struct seq_file *m)
"sysctl_sched_tunable_scaling",
sysctl_sched_tunable_scaling,
sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
+#ifdef CONFIG_SCHED_CORE
+ SEQ_printf(m, " .%-40s: %d\n", "core_sched_enabled",
+ !!static_branch_likely(&__sched_core_enabled));
+#endif
SEQ_printf(m, "\n");
}
--
2.29.2.299.gdc1121823c-goog
Hi Joel,
On 11/17/20 3:20 PM, Joel Fernandes (Google) wrote:
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index b185c6ed4aba..9cd2cf7c18d4 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -698,6 +698,20 @@
> /proc/<pid>/coredump_filter.
> See also Documentation/filesystems/proc.rst.
>
> + coresched= [SCHED_CORE] This feature allows the Linux scheduler
Unless I missed it somewhere else, this "SCHED_CORE" string should be
added to Documentation/admin-guide/kernel-parameters.rst, where there is
a list of "qualifiers" for kernel parameters.
(It looks like you are using it as the name of a Kconfig option, which
makes some sense, but that's not how it's [currently] done. :)
> + to force hyperthread siblings of a CPU to only execute tasks
> + concurrently on all hyperthreads that are running within the
> + same core scheduling group.
> + Possible values are:
> + 'on' - Enable scheduler capability to core schedule.
> + By default, no tasks will be core scheduled, but the coresched
> + interface can be used to form groups of tasks that are forced
> + to share a core.
> + 'off' - Disable scheduler capability to core schedule.
> + 'secure' - Like 'on' but only enable on systems affected by
> + MDS or L1TF vulnerabilities. 'off' otherwise.
> + Default: 'secure'.
> +
> coresight_cpu_debug.enable
> [ARM,ARM64]
> Format: <bool>
thanks.
--
~Randy
On Tue, Nov 17, 2020 at 06:20:01PM -0500, Joel Fernandes (Google) wrote:
> Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
> issues. Detect this and don't enable core scheduling as it can
> needlessly slow those device down.
>
> However, some users may want core scheduling even if the hardware is
> secure. To support them, add a coresched= option which defaults to
> 'secure' and can be overridden to 'on' if the user wants to enable
> coresched even if the HW is not vulnerable. 'off' would disable
> core scheduling in any case.
This is all sorts of wrong, and the reason is because you hard-coded
that stupid policy.
Core scheduling should always be available on SMT (provided you did that
CONFIG_ thing). Even on AMD systems RT tasks might want to claim the
core exclusively.
On Wed, Nov 25, 2020 at 02:45:37PM +0100, Peter Zijlstra wrote:
> On Tue, Nov 17, 2020 at 06:20:01PM -0500, Joel Fernandes (Google) wrote:
> > Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
> > issues. Detect this and don't enable core scheduling as it can
> > needlessly slow those device down.
> >
> > However, some users may want core scheduling even if the hardware is
> > secure. To support them, add a coresched= option which defaults to
> > 'secure' and can be overridden to 'on' if the user wants to enable
> > coresched even if the HW is not vulnerable. 'off' would disable
> > core scheduling in any case.
>
> This is all sorts of wrong, and the reason is because you hard-coded
> that stupid policy.
>
> Core scheduling should always be available on SMT (provided you did that
> CONFIG_ thing). Even on AMD systems RT tasks might want to claim the
> core exclusively.
Agreed, specifically if we need to have special cgroup tag/association to
enable it.
Balbir Singh.