These are the remaining patches built on top of
https://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git/log/?h=x86/mm
for the series posted at
https://lore.kernel.org/lkml/[email protected]/
Changelog v7
- Split patch 5 from the previous series into two patches
- Reuse the prctl PR_GET/SET_SPECULATION_CTRL with PR_SPEC_L1D_FLUSH_OUT
as the ctrl parameter
- Add the BUILD_BUG_ON that went missing that checks the placement of
TIF_SPEC_L1D_FLUSH
- Update the documentation to reflect the changes
Balbir Singh (3):
x86/mm: Optionally flush L1D on context switch
prctl: Hook L1D flushing in via prctl
Documentation: Add L1D flushing Documentation
Documentation/admin-guide/hw-vuln/index.rst | 1 +
.../admin-guide/hw-vuln/l1d_flush.rst | 51 +++++++++++++++++++
Documentation/userspace-api/spec_ctrl.rst | 7 +++
arch/x86/include/asm/thread_info.h | 9 +++-
arch/x86/kernel/cpu/bugs.c | 28 ++++++++++
arch/x86/mm/tlb.c | 39 ++++++++++++--
include/uapi/linux/prctl.h | 1 +
7 files changed, 131 insertions(+), 5 deletions(-)
create mode 100644 Documentation/admin-guide/hw-vuln/l1d_flush.rst
--
2.17.1
Use the existing PR_GET/SET_SPECULATION_CTRL API to expose the L1D
flush capability. For L1D flushing PR_SPEC_FORCE_DISABLE and
PR_SPEC_DISABLE_NOEXEC are not supported.
There is also no seccomp integration for the feature.
Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Balbir Singh <[email protected]>
---
arch/x86/kernel/cpu/bugs.c | 28 ++++++++++++++++++++++++++++
include/uapi/linux/prctl.h | 1 +
2 files changed, 29 insertions(+)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ed54b3b21c39..3eb9139fcf50 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1121,6 +1121,19 @@ static void task_update_spec_tif(struct task_struct *tsk)
speculation_ctrl_update_current();
}
+static int l1d_flush_out_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ return enable_l1d_flush_for_task(task);
+ case PR_SPEC_DISABLE:
+ return disable_l1d_flush_for_task(task);
+ default:
+ return -ERANGE;
+ }
+ return 0;
+}
+
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
{
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
@@ -1206,6 +1219,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
return ssb_prctl_set(task, ctrl);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_set(task, ctrl);
+ case PR_SPEC_L1D_FLUSH_OUT:
+ return l1d_flush_out_prctl_set(task, ctrl);
default:
return -ENODEV;
}
@@ -1221,6 +1236,17 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
}
#endif
+static int l1d_flush_out_prctl_get(struct task_struct *task)
+{
+ int ret;
+
+ ret = test_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH);
+ if (ret)
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ else
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+}
+
static int ssb_prctl_get(struct task_struct *task)
{
switch (ssb_mode) {
@@ -1272,6 +1298,8 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
return ssb_prctl_get(task);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_get(task);
+ case PR_SPEC_L1D_FLUSH_OUT:
+ return l1d_flush_out_prctl_get(task);
default:
return -ENODEV;
}
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 07b4f8131e36..1e864867a367 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -213,6 +213,7 @@ struct prctl_mm_map {
/* Speculation control variants */
# define PR_SPEC_STORE_BYPASS 0
# define PR_SPEC_INDIRECT_BRANCH 1
+# define PR_SPEC_L1D_FLUSH_OUT 2
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
# define PR_SPEC_NOT_AFFECTED 0
# define PR_SPEC_PRCTL (1UL << 0)
--
2.17.1
Implement a mechanism to selectively flush the L1D cache. The goal is to
allow tasks that are paranoid due to the recent snoop assisted data sampling
vulnerabilites, to flush their L1D on being switched out. This protects
their data from being snooped or leaked via side channels after the task
has context switched out.
There are two scenarios we might want to protect against, a task leaving
the CPU with data still in L1D (which is the main concern of this patch),
the second scenario is a malicious task coming in (not so well trusted)
for which we want to clean up the cache before it starts. Only the case
for the former is addressed.
A new thread_info flag TIF_SPEC_L1D_FLUSH is added to track tasks which
opt-into L1D flushing. cpu_tlbstate.last_user_mm_spec is used to convert
the TIF flags into mm state (per cpu via last_user_mm_spec) in
cond_mitigation(), which then used to do decide when to flush the
L1D cache.
Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Balbir Singh <[email protected]>
---
arch/x86/include/asm/thread_info.h | 9 +++++--
arch/x86/mm/tlb.c | 39 +++++++++++++++++++++++++++---
2 files changed, 43 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8de8ceccb8bc..1655347f11b9 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -84,7 +84,7 @@ struct thread_info {
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
-#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
+#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_PATCH_PENDING 13 /* pending live patching update */
@@ -96,6 +96,7 @@ struct thread_info {
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
+#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
@@ -114,7 +115,7 @@ struct thread_info {
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
-#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
+#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
@@ -125,6 +126,7 @@ struct thread_info {
#define _TIF_SLD (1 << TIF_SLD)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
+#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
@@ -235,6 +237,9 @@ static inline int arch_within_stack_frames(const void * const stack,
current_thread_info()->status & TS_COMPAT)
#endif
+extern int enable_l1d_flush_for_task(struct task_struct *tsk);
+extern int disable_l1d_flush_for_task(struct task_struct *tsk);
+
extern void arch_task_cache_init(void);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
extern void arch_release_task_struct(struct task_struct *tsk);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 35017a040ec9..03166f7c2f16 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -13,6 +13,7 @@
#include <asm/mmu_context.h>
#include <asm/nospec-branch.h>
#include <asm/cache.h>
+#include <asm/cacheflush.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
@@ -43,11 +44,12 @@
*/
/*
- * Bits to mangle the TIF_SPEC_IB state into the mm pointer which is
+ * Bits to mangle the TIF_SPEC_* state into the mm pointer which is
* stored in cpu_tlb_state.last_user_mm_spec.
*/
#define LAST_USER_MM_IBPB 0x1UL
-#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB)
+#define LAST_USER_MM_L1D_FLUSH 0x2UL
+#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB | LAST_USER_MM_L1D_FLUSH)
/* Bits to set when tlbstate and flush is (re)initialized */
#define LAST_USER_MM_INIT LAST_USER_MM_IBPB
@@ -311,6 +313,23 @@ void leave_mm(int cpu)
}
EXPORT_SYMBOL_GPL(leave_mm);
+int enable_l1d_flush_for_task(struct task_struct *tsk)
+{
+ int ret = l1d_flush_init_once();
+
+ if (ret < 0)
+ return ret;
+
+ set_ti_thread_flag(&tsk->thread_info, TIF_SPEC_L1D_FLUSH);
+ return ret;
+}
+
+int disable_l1d_flush_for_task(struct task_struct *tsk)
+{
+ clear_ti_thread_flag(&tsk->thread_info, TIF_SPEC_L1D_FLUSH);
+ return 0;
+}
+
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -354,6 +373,7 @@ static inline unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)
unsigned long next_tif = task_thread_info(next)->flags;
unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK;
+ BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1);
return (unsigned long)next->mm | spec_bits;
}
@@ -431,6 +451,13 @@ static void cond_mitigation(struct task_struct *next)
indirect_branch_prediction_barrier();
}
+ if (prev_mm & LAST_USER_MM_L1D_FLUSH)
+ /*
+ * Don't populate the TLB for the software fallback flush.
+ * Populate TLB is not needed for our this use case.
+ */
+ arch_l1d_flush(0);
+
this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm);
}
@@ -681,7 +708,13 @@ void initialize_tlbstate_and_flush(void)
/* Force ASID 0 and force a TLB flush. */
write_cr3(build_cr3(mm->pgd, 0));
- /* Reinitialize tlbstate. */
+ /*
+ * Reinitialize tlbstate.
+ *
+ * Don't add LAST_USER_MM_L1D_FLUSH to last_user_mm_spec,
+ * this is also called during early boot and l1d_flush_pages
+ * are not yet allocated.
+ */
this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
this_cpu_write(cpu_tlbstate.next_asid, 1);
--
2.17.1
Add documentation of l1d flushing, explain the need for the
feature and how it can be used.
[tglx: Reword the documentation]
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Balbir Singh <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
Documentation/admin-guide/hw-vuln/index.rst | 1 +
.../admin-guide/hw-vuln/l1d_flush.rst | 51 +++++++++++++++++++
Documentation/userspace-api/spec_ctrl.rst | 7 +++
3 files changed, 59 insertions(+)
create mode 100644 Documentation/admin-guide/hw-vuln/l1d_flush.rst
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index 0795e3c2643f..35633b299d45 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -14,3 +14,4 @@ are configurable at compile, boot or run time.
mds
tsx_async_abort
multihit.rst
+ l1d_flush
diff --git a/Documentation/admin-guide/hw-vuln/l1d_flush.rst b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
new file mode 100644
index 000000000000..530a1e0ffbd3
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
@@ -0,0 +1,51 @@
+L1D Flushing for the paranoid
+=============================
+
+With an increasing number of vulnerabilities being reported around data
+leaks from the Level 1 Data cache (L1D) the kernel provides an opt-in
+mechanism to flush the L1D cache on context switch.
+
+This mechanism can be used to address e.g. CVE-2020-0550. For paranoid
+applications the mechanism keeps them safe from any yet to be discovered
+vulnerabilities, related to leaks from the L1D cache.
+
+
+Related CVEs
+------------
+At the present moment, the following CVEs can be addressed by this
+mechanism
+
+ ============= ======================== ==================
+ CVE-2020-0550 Improper Data Forwarding OS related aspects
+ ============= ======================== ==================
+
+Usage Guidelines
+----------------
+
+Please see document: :ref:`Documentation/userspace-api/spec_ctrl.rst` for
+details.
+
+**NOTE**: The feature is disabled by default, applications need to
+specifically opt into the feature to enable it.
+
+Mitigation
+----------
+
+When PR_SET_L1D_FLUSH is enabled for a task a flush of the L1D cache is
+performed when the task is scheduled out and the incoming task belongs to a
+different process and therefore to a different address space.
+
+If the underlying CPU supports L1D flushing in hardware, the hardware
+mechanism is used, otherwise a software fallback, similar to the L1TF
+mitigation, is invoked.
+
+Limitations
+-----------
+
+The mechanism does not mitigate L1D data leaks between tasks belonging to
+different processes which are concurrently executing on sibling threads of
+a physical CPU core when SMT is enabled on the system.
+
+This can be addressed by controlled placement of processes on physical CPU
+cores or by disabling SMT. See the relevant chapter in the L1TF mitigation
+document: :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`.
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
index 7ddd8f667459..b40afe97dbfb 100644
--- a/Documentation/userspace-api/spec_ctrl.rst
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -106,3 +106,10 @@ Speculation misfeature controls
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
+
+- PR_SPEC_L1D_FLUSH_OUT: Flush L1D Cache on context switch out of the task
+
+ Invocations:
+ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH_OUT, 0, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH_OUT, PR_SPEC_ENABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH_OUT, PR_SPEC_DISABLE, 0, 0);
--
2.17.1
The following commit has been merged into the x86/mm branch of tip:
Commit-ID: edf7ce0b231cb6cdc170125588cf71c70358fc74
Gitweb: https://git.kernel.org/tip/edf7ce0b231cb6cdc170125588cf71c70358fc74
Author: Balbir Singh <[email protected]>
AuthorDate: Sat, 16 May 2020 20:34:29 +10:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Fri, 22 May 2020 10:36:48 +02:00
prctl: Hook L1D flushing in via prctl
Use the existing PR_GET/SET_SPECULATION_CTRL API to expose the L1D
flush capability. For L1D flushing PR_SPEC_FORCE_DISABLE and
PR_SPEC_DISABLE_NOEXEC are not supported.
There is also no seccomp integration for the feature.
Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Balbir Singh <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/kernel/cpu/bugs.c | 28 ++++++++++++++++++++++++++++
include/uapi/linux/prctl.h | 1 +
2 files changed, 29 insertions(+)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ed54b3b..3eb9139 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1121,6 +1121,19 @@ static void task_update_spec_tif(struct task_struct *tsk)
speculation_ctrl_update_current();
}
+static int l1d_flush_out_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ return enable_l1d_flush_for_task(task);
+ case PR_SPEC_DISABLE:
+ return disable_l1d_flush_for_task(task);
+ default:
+ return -ERANGE;
+ }
+ return 0;
+}
+
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
{
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
@@ -1206,6 +1219,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
return ssb_prctl_set(task, ctrl);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_set(task, ctrl);
+ case PR_SPEC_L1D_FLUSH_OUT:
+ return l1d_flush_out_prctl_set(task, ctrl);
default:
return -ENODEV;
}
@@ -1221,6 +1236,17 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
}
#endif
+static int l1d_flush_out_prctl_get(struct task_struct *task)
+{
+ int ret;
+
+ ret = test_ti_thread_flag(&task->thread_info, TIF_SPEC_L1D_FLUSH);
+ if (ret)
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ else
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+}
+
static int ssb_prctl_get(struct task_struct *task)
{
switch (ssb_mode) {
@@ -1272,6 +1298,8 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
return ssb_prctl_get(task);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_get(task);
+ case PR_SPEC_L1D_FLUSH_OUT:
+ return l1d_flush_out_prctl_get(task);
default:
return -ENODEV;
}
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 07b4f81..1e86486 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -213,6 +213,7 @@ struct prctl_mm_map {
/* Speculation control variants */
# define PR_SPEC_STORE_BYPASS 0
# define PR_SPEC_INDIRECT_BRANCH 1
+# define PR_SPEC_L1D_FLUSH_OUT 2
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
# define PR_SPEC_NOT_AFFECTED 0
# define PR_SPEC_PRCTL (1UL << 0)
The following commit has been merged into the x86/mm branch of tip:
Commit-ID: 20fc9f6f9f2fefefb694c9e447f80b4772021e0a
Gitweb: https://git.kernel.org/tip/20fc9f6f9f2fefefb694c9e447f80b4772021e0a
Author: Balbir Singh <[email protected]>
AuthorDate: Sat, 16 May 2020 20:34:28 +10:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Fri, 22 May 2020 10:36:48 +02:00
x86/mm: Optionally flush L1D on context switch
Implement a mechanism to selectively flush the L1D cache. The goal is to
allow tasks that are paranoid due to the recent snoop assisted data sampling
vulnerabilites, to flush their L1D on being switched out. This protects
their data from being snooped or leaked via side channels after the task
has context switched out.
There are two scenarios we might want to protect against, a task leaving
the CPU with data still in L1D (which is the main concern of this patch),
the second scenario is a malicious task coming in (not so well trusted)
for which we want to clean up the cache before it starts. Only the case
for the former is addressed.
A new thread_info flag TIF_SPEC_L1D_FLUSH is added to track tasks which
opt-into L1D flushing. cpu_tlbstate.last_user_mm_spec is used to convert
the TIF flags into mm state (per cpu via last_user_mm_spec) in
cond_mitigation(), which then used to do decide when to flush the
L1D cache.
Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Balbir Singh <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/include/asm/thread_info.h | 9 +++++--
arch/x86/mm/tlb.c | 40 ++++++++++++++++++++++++++---
2 files changed, 44 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8de8cec..1655347 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -84,7 +84,7 @@ struct thread_info {
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
-#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
+#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_PATCH_PENDING 13 /* pending live patching update */
@@ -96,6 +96,7 @@ struct thread_info {
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
+#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
@@ -114,7 +115,7 @@ struct thread_info {
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
-#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
+#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
@@ -125,6 +126,7 @@ struct thread_info {
#define _TIF_SLD (1 << TIF_SLD)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
+#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
@@ -235,6 +237,9 @@ static inline int arch_within_stack_frames(const void * const stack,
current_thread_info()->status & TS_COMPAT)
#endif
+extern int enable_l1d_flush_for_task(struct task_struct *tsk);
+extern int disable_l1d_flush_for_task(struct task_struct *tsk);
+
extern void arch_task_cache_init(void);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
extern void arch_release_task_struct(struct task_struct *tsk);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 35017a0..c8524c5 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -13,6 +13,7 @@
#include <asm/mmu_context.h>
#include <asm/nospec-branch.h>
#include <asm/cache.h>
+#include <asm/cacheflush.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
@@ -43,13 +44,19 @@
*/
/*
- * Bits to mangle the TIF_SPEC_IB state into the mm pointer which is
+ * Bits to mangle the TIF_SPEC_* state into the mm pointer which is
* stored in cpu_tlb_state.last_user_mm_spec.
*/
#define LAST_USER_MM_IBPB 0x1UL
-#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB)
+#define LAST_USER_MM_L1D_FLUSH 0x2UL
+#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB | LAST_USER_MM_L1D_FLUSH)
-/* Bits to set when tlbstate and flush is (re)initialized */
+/*
+ * Bits to set when tlbstate and flush is (re)initialized
+ *
+ * Don't add LAST_USER_MM_L1D_FLUSH to the init bits as the initializaion
+ * is done during early boot and l1d_flush_pages are not yet allocated.
+ */
#define LAST_USER_MM_INIT LAST_USER_MM_IBPB
/*
@@ -311,6 +318,23 @@ void leave_mm(int cpu)
}
EXPORT_SYMBOL_GPL(leave_mm);
+int enable_l1d_flush_for_task(struct task_struct *tsk)
+{
+ int ret = l1d_flush_init_once();
+
+ if (ret < 0)
+ return ret;
+
+ set_ti_thread_flag(&tsk->thread_info, TIF_SPEC_L1D_FLUSH);
+ return ret;
+}
+
+int disable_l1d_flush_for_task(struct task_struct *tsk)
+{
+ clear_ti_thread_flag(&tsk->thread_info, TIF_SPEC_L1D_FLUSH);
+ return 0;
+}
+
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -354,6 +378,8 @@ static inline unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)
unsigned long next_tif = task_thread_info(next)->flags;
unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK;
+ BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1);
+
return (unsigned long)next->mm | spec_bits;
}
@@ -431,6 +457,14 @@ static void cond_mitigation(struct task_struct *next)
indirect_branch_prediction_barrier();
}
+ if (prev_mm & LAST_USER_MM_L1D_FLUSH) {
+ /*
+ * Don't populate the TLB for the software fallback flush.
+ * Populate TLB is not needed for this use case.
+ */
+ arch_l1d_flush(0);
+ }
+
this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm);
}
The following commit has been merged into the x86/mm branch of tip:
Commit-ID: 0fcfdf55db9e1ecf85edd6aa8d0bc78a448cb96a
Gitweb: https://git.kernel.org/tip/0fcfdf55db9e1ecf85edd6aa8d0bc78a448cb96a
Author: Balbir Singh <[email protected]>
AuthorDate: Sat, 16 May 2020 20:34:30 +10:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Fri, 22 May 2020 11:30:08 +02:00
Documentation: Add L1D flushing Documentation
Add documentation of l1d flushing, explain the need for the
feature and how it can be used.
[tglx: Reword the documentation]
Signed-off-by: Balbir Singh <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
Documentation/admin-guide/hw-vuln/index.rst | 1 +-
Documentation/admin-guide/hw-vuln/l1d_flush.rst | 51 ++++++++++++++++-
Documentation/userspace-api/spec_ctrl.rst | 7 ++-
3 files changed, 59 insertions(+)
create mode 100644 Documentation/admin-guide/hw-vuln/l1d_flush.rst
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index 0795e3c..35633b2 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -14,3 +14,4 @@ are configurable at compile, boot or run time.
mds
tsx_async_abort
multihit.rst
+ l1d_flush
diff --git a/Documentation/admin-guide/hw-vuln/l1d_flush.rst b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
new file mode 100644
index 0000000..530a1e0
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
@@ -0,0 +1,51 @@
+L1D Flushing for the paranoid
+=============================
+
+With an increasing number of vulnerabilities being reported around data
+leaks from the Level 1 Data cache (L1D) the kernel provides an opt-in
+mechanism to flush the L1D cache on context switch.
+
+This mechanism can be used to address e.g. CVE-2020-0550. For paranoid
+applications the mechanism keeps them safe from any yet to be discovered
+vulnerabilities, related to leaks from the L1D cache.
+
+
+Related CVEs
+------------
+At the present moment, the following CVEs can be addressed by this
+mechanism
+
+ ============= ======================== ==================
+ CVE-2020-0550 Improper Data Forwarding OS related aspects
+ ============= ======================== ==================
+
+Usage Guidelines
+----------------
+
+Please see document: :ref:`Documentation/userspace-api/spec_ctrl.rst` for
+details.
+
+**NOTE**: The feature is disabled by default, applications need to
+specifically opt into the feature to enable it.
+
+Mitigation
+----------
+
+When PR_SET_L1D_FLUSH is enabled for a task a flush of the L1D cache is
+performed when the task is scheduled out and the incoming task belongs to a
+different process and therefore to a different address space.
+
+If the underlying CPU supports L1D flushing in hardware, the hardware
+mechanism is used, otherwise a software fallback, similar to the L1TF
+mitigation, is invoked.
+
+Limitations
+-----------
+
+The mechanism does not mitigate L1D data leaks between tasks belonging to
+different processes which are concurrently executing on sibling threads of
+a physical CPU core when SMT is enabled on the system.
+
+This can be addressed by controlled placement of processes on physical CPU
+cores or by disabling SMT. See the relevant chapter in the L1TF mitigation
+document: :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`.
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
index 7ddd8f6..b40afe9 100644
--- a/Documentation/userspace-api/spec_ctrl.rst
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -106,3 +106,10 @@ Speculation misfeature controls
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
+
+- PR_SPEC_L1D_FLUSH_OUT: Flush L1D Cache on context switch out of the task
+
+ Invocations:
+ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH_OUT, 0, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH_OUT, PR_SPEC_ENABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_L1D_FLUSH_OUT, PR_SPEC_DISABLE, 0, 0);