2021-06-22 22:26:25

by Dave Hansen

[permalink] [raw]
Subject: [RFC][PATCH 0/8] x86/pkeys: remove PKRU from kernel XSAVE buffer

This is a rework of the kernel's Protection Keys Register code. It severs
the connection between PKRU and XSAVE as thoroughly as possible without
affecting the existing ABIs.

This compiles in a few configurations and passes the pkeys selftest, but
that's about it. It's not been pummeled enough yet for merging anywhere.

This is on top of the current:

git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git x86/fpu

specifically:

3d168301c78d ("x86/fpu/signal: Let xrstor handle the features to init")

--

PKRU is currently a strange beast. It can be XSAVE-managed and it has
space allocated in the thread 'fpstate' buffer. However, it is
switched more eagerly than other FPU state because PKRU affects things
like copy_to/from_user(). This is because PKRU affects user *PERMISSION*
accesses, not just accesses made from user *MODE* itself.

This leaves PKRU in a very odd position. It is stored in the kernel
XSAVE buffer but the XSAVE architecture is not used to manage it.

Move PKRU out of the 'fpstate' buffer. Instead, allocate space in the
thread_struct for it and save/restore it in the context-switch path
separately from the XSAVE-managed features. This removes the ambiguity
of having PKRU state in two places for each task.

include/asm/fpu/internal.h | 2 -
include/asm/fpu/xstate.h | 2 -
include/asm/pkru.h | 10 +++--
kernel/cpu/common.c | 19 +++++++++-
kernel/fpu/core.c | 8 ++--
kernel/fpu/signal.c | 12 +++++-
kernel/fpu/xstate.c | 83 ++++++++++++++++++++++++++++++---------------
kernel/process_64.c | 9 ++--
kernel/signal.c | 1
kvm/x86.c | 8 ++--
mm/pkeys.c | 21 ++---------
11 files changed, 113 insertions(+), 62 deletions(-)

Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: Andy Lutomirski <[email protected]>


2021-06-22 22:26:33

by Dave Hansen

[permalink] [raw]
Subject: [RFC][PATCH 1/8] x86/pkeys: add PKRU storage outside of task XSAVE buffer


From: Dave Hansen <[email protected]>

PKRU has space in the task XSAVE buffer, but is not context-switched by
XSAVE/XRSTOR. It is switched more eagerly than other FPU state because
PKRU affects things like copy_to/from_user(). This is because PKRU
affects user *PERMISSION* accesses, not just accesses made from user
*MODE* itself.

Prepare to move PKRU away from being XSAVE-managed. Allocate space in
the thread_struct for it and save/restore it in the context-switch path
separately from the XSAVE-managed features.

Leave the XSAVE storage in place for now to ensure bisectability.

Signed-off-by: Dave Hansen <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: Andy Lutomirski <[email protected]>
---

b/arch/x86/include/asm/pkru.h | 5 +++++
b/arch/x86/kernel/cpu/common.c | 3 +++
b/arch/x86/kernel/process_64.c | 9 ++++-----
b/arch/x86/mm/pkeys.c | 2 ++
4 files changed, 14 insertions(+), 5 deletions(-)

diff -puN arch/x86/include/asm/pkru.h~pkru-stash-thread-value arch/x86/include/asm/pkru.h
--- a/arch/x86/include/asm/pkru.h~pkru-stash-thread-value 2021-06-22 14:49:06.594051763 -0700
+++ b/arch/x86/include/asm/pkru.h 2021-06-22 14:49:06.607051763 -0700
@@ -44,11 +44,16 @@ static inline void write_pkru(u32 pkru)
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return;
/*
+ * Update the actual register.
+ *
* WRPKRU is relatively expensive compared to RDPKRU.
* Avoid WRPKRU when it would not change the value.
*/
if (pkru != rdpkru())
wrpkru(pkru);
+
+ /* Update the thread-local, context-switched value: */
+ current->thread.pkru = pkru;
}

static inline void pkru_write_default(void)
diff -puN arch/x86/kernel/cpu/common.c~pkru-stash-thread-value arch/x86/kernel/cpu/common.c
--- a/arch/x86/kernel/cpu/common.c~pkru-stash-thread-value 2021-06-22 14:49:06.596051763 -0700
+++ b/arch/x86/kernel/cpu/common.c 2021-06-22 14:49:06.608051763 -0700
@@ -482,6 +482,9 @@ static __always_inline void setup_pku(st
cr4_set_bits(X86_CR4_PKE);
/* Load the default PKRU value */
pkru_write_default();
+
+ /* Establish the default value for future tasks: */
+ init_task.thread.pkru = init_pkru_value;
}

#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
diff -puN arch/x86/kernel/process_64.c~pkru-stash-thread-value arch/x86/kernel/process_64.c
--- a/arch/x86/kernel/process_64.c~pkru-stash-thread-value 2021-06-22 14:49:06.599051763 -0700
+++ b/arch/x86/kernel/process_64.c 2021-06-22 14:49:06.608051763 -0700
@@ -349,15 +349,14 @@ static __always_inline void load_seg_leg
static __always_inline void x86_pkru_load(struct thread_struct *prev,
struct thread_struct *next)
{
- if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
- return;
+ u32 pkru = read_pkru();

/* Stash the prev task's value: */
- prev->pkru = rdpkru();
+ prev->pkru = pkru;

/*
- * PKRU writes are slightly expensive. Avoid them when not
- * strictly necessary:
+ * PKRU writes are slightly expensive. Avoid
+ * them when not strictly necessary:
*/
if (prev->pkru != next->pkru)
wrpkru(next->pkru);
diff -puN arch/x86/mm/pkeys.c~pkru-stash-thread-value arch/x86/mm/pkeys.c
--- a/arch/x86/mm/pkeys.c~pkru-stash-thread-value 2021-06-22 14:49:06.604051763 -0700
+++ b/arch/x86/mm/pkeys.c 2021-06-22 14:49:06.609051763 -0700
@@ -159,6 +159,8 @@ static ssize_t init_pkru_write_file(stru
return -EINVAL;

WRITE_ONCE(init_pkru_value, new_init_pkru);
+ WRITE_ONCE(init_task.thread.pkru, new_init_pkru);
+
return count;
}

_

2021-06-22 22:26:41

by Dave Hansen

[permalink] [raw]
Subject: [RFC][PATCH 4/8] x86/fpu: remove PKRU from FPU user state clearing


From: Dave Hansen <[email protected]>

The signal code has a few points where it initializes user FPU state.
Remove PKRU from the set of features which are initialized in the FPU
code. Use write_pkru() to explicitly initialize PKRU instead of using
the fpstate/XSAVE infrastructure.

Signed-off-by: Dave Hansen <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: Andy Lutomirski <[email protected]>
---

b/arch/x86/kernel/fpu/core.c | 8 +++++---
b/arch/x86/kernel/fpu/signal.c | 5 ++++-
b/arch/x86/kernel/signal.c | 1 +
3 files changed, 10 insertions(+), 4 deletions(-)

diff -puN arch/x86/kernel/fpu/core.c~no-pkru-in-fpu__clear_user_states arch/x86/kernel/fpu/core.c
--- a/arch/x86/kernel/fpu/core.c~no-pkru-in-fpu__clear_user_states 2021-06-22 14:49:10.026051754 -0700
+++ b/arch/x86/kernel/fpu/core.c 2021-06-22 14:49:10.039051754 -0700
@@ -209,7 +209,8 @@ static inline void fpstate_init_xstate(s
* XRSTORS requires these bits set in xcomp_bv, or it will
* trigger #GP:
*/
- xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
+ xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
+ xfeatures_mask_fpstate();
}

static inline void fpstate_init_fxstate(struct fxregs_state *fx)
@@ -389,8 +390,9 @@ void fpu__clear_user_states(struct fpu *
os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor());
}

- /* Reset user states in registers. */
- load_fpregs_from_init_fpstate(xfeatures_mask_restore_user());
+ /* Reset user registers maintained in fpstate. */
+ load_fpregs_from_init_fpstate(xfeatures_mask_fpstate() &
+ xfeatures_mask_restore_user());

/*
* Now all FPU registers have their desired values. Inform the FPU
diff -puN arch/x86/kernel/fpu/signal.c~no-pkru-in-fpu__clear_user_states arch/x86/kernel/fpu/signal.c
--- a/arch/x86/kernel/fpu/signal.c~no-pkru-in-fpu__clear_user_states 2021-06-22 14:49:10.031051754 -0700
+++ b/arch/x86/kernel/fpu/signal.c 2021-06-22 14:49:10.040051754 -0700
@@ -437,6 +437,7 @@ int fpu__restore_sig(void __user *buf, i
int ret;

if (unlikely(!buf)) {
+ write_pkru(pkru_get_init_value());
fpu__clear_user_states(&current->thread.fpu);
return 0;
}
@@ -468,8 +469,10 @@ int fpu__restore_sig(void __user *buf, i
ret = __fpu_restore_sig(buf, buf_fx, ia32_fxstate);

out:
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ write_pkru(pkru_get_init_value());
fpu__clear_user_states(&current->thread.fpu);
+ }

return ret;
}
diff -puN arch/x86/kernel/signal.c~no-pkru-in-fpu__clear_user_states arch/x86/kernel/signal.c
--- a/arch/x86/kernel/signal.c~no-pkru-in-fpu__clear_user_states 2021-06-22 14:49:10.033051754 -0700
+++ b/arch/x86/kernel/signal.c 2021-06-22 14:49:10.043051754 -0700
@@ -835,6 +835,7 @@ handle_signal(struct ksignal *ksig, stru
/*
* Ensure the signal handler starts with the new fpu state.
*/
+ write_pkru(pkru_get_init_value());
fpu__clear_user_states(fpu);
}
signal_setup_done(failed, ksig, stepping);
_

2021-06-22 22:26:41

by Dave Hansen

[permalink] [raw]
Subject: [RFC][PATCH 2/8] x86/fpu: hook up PKRU into signal user ABIs


From: Dave Hansen <[email protected]>

One nice thing about having PKRU be XSAVE-managed is that it gets naturally
exposed into the XSAVE-using ABIs. Now that XSAVE will not be used to
manage PKRU, these ABIs need to be manually enabled to deal with PKRU.

For signals (the restore_hwregs_from_user() path), it's quite
straightforward. restore_hwregs_from_user() will update PKRU in from
the user signal buffer. Ensure that PKRU is shuffled into the thread
storage.

Signed-off-by: Dave Hansen <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: Andy Lutomirski <[email protected]>
---

b/arch/x86/kernel/fpu/signal.c | 7 +++++++
1 file changed, 7 insertions(+)

diff -puN arch/x86/kernel/fpu/signal.c~fpu-sig-stash-pkru arch/x86/kernel/fpu/signal.c
--- a/arch/x86/kernel/fpu/signal.c~fpu-sig-stash-pkru 2021-06-22 14:49:07.899051760 -0700
+++ b/arch/x86/kernel/fpu/signal.c 2021-06-22 14:49:07.903051760 -0700
@@ -233,6 +233,13 @@ static int restore_hwregs_from_user(void

if (!ret && unlikely(init_bv))
os_xrstor(&init_fpstate.xsave, init_bv);
+
+ /*
+ * PKRU may have been modified by XRSTOR,
+ * save the possibly updated value:
+ */
+ current->thread.pkru = read_pkru();
+
return ret;
} else if (use_fxsr()) {
return fxrstor_from_user_sigframe(buf);
_

2021-06-22 22:26:48

by Dave Hansen

[permalink] [raw]
Subject: [RFC][PATCH 3/8] x86/fpu: separate the setup of xfeatures not in fpstate


From: Dave Hansen <[email protected]>

The goal of this series is to create a new class of xfeature: one which
is enabled in XCR0 so that XSAVE/XRSTOR continue to work on it, but
where the kernel does not use XSAVE*/XRSTOR* to manage the state or to
maintain it in the thread FPU buffer (fpstate).

Create a new helper in the XSAVE setup code: xstate_fpstate_enabled().
This helper returns whether or not an xfeature is being maintained
inside of the thread's fpstate.

For now, make xstate_fpstate_enabled() function identically to
xfeature_enabled(). This ensures that this series is bisectable
between this point and where PKRU is actually removed from
xfeature_fpstate_enabled().

This series originally introduced xfeatures_mask_fpstate() to mean:
"the set of features found in the kernel fpstate and managed by
XSAVE". However, upstream ripped me off and made an identically-
named function which refers to the features managed by XSAVE,
which *excludes* PKRU at this juncture.

That means that xfeature_fpstate_enabled() and
xfeatures_mask_fpstate() will diverge until the end of the series.

Signed-off-by: Dave Hansen <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: Andy Lutomirski <[email protected]>
---

b/arch/x86/kernel/fpu/xstate.c | 40 ++++++++++++++++++++++++++++++----------
1 file changed, 30 insertions(+), 10 deletions(-)

diff -puN arch/x86/kernel/fpu/xstate.c~xfeature-setup-fpstate arch/x86/kernel/fpu/xstate.c
--- a/arch/x86/kernel/fpu/xstate.c~xfeature-setup-fpstate 2021-06-22 14:49:08.956051757 -0700
+++ b/arch/x86/kernel/fpu/xstate.c 2021-06-22 14:49:08.965051757 -0700
@@ -156,16 +156,32 @@ void fpu__init_cpu_xstate(void)
}
}

+/* Can the XSAVE architecture be used to manage this feature? */
static bool xfeature_enabled(enum xfeature xfeature)
{
return xfeatures_mask_all & BIT_ULL(xfeature);
}

/*
+ * Is space for the feature present in the task->thread.fpu
+ * fpstate buffer and is the using XSAVE to context-switch it?
+ */
+static bool xfeature_fpstate_enabled(enum xfeature xfeature)
+{
+ // For bisectability, mirror xfeature_enabled() for now.
+ //return xfeatures_mask_fpstate() & BIT_ULL(xfeature);
+ return xfeature_enabled(xfeature);
+}
+
+/*
* Record the offsets and sizes of various xstates contained
- * in the XSAVE state memory layout.
+ * in the non-compacted XSAVE state memory layout.
+ *
+ * These are always used in the XSAVE ABIs and are used for
+ * the kernel xstate buffer in cases where XSAVES (and thus
+ * the compacted format) is not supported.
*/
-static void __init setup_xstate_features(void)
+static void __init setup_xfeature_offsets(void)
{
u32 eax, ebx, ecx, edx, i;
/* start at the beginning of the "extended state" */
@@ -185,6 +201,10 @@ static void __init setup_xstate_features
xmm_space);

for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+ /*
+ * Set up any features enabled that are in the kernel
+ * xstate buffer *or* the user XSAVE ABIs.
+ */
if (!xfeature_enabled(i))
continue;

@@ -257,7 +277,7 @@ static int xfeature_is_aligned(int xfeat

CHECK_XFEATURE(xfeature_nr);

- if (!xfeature_enabled(xfeature_nr)) {
+ if (!xfeature_fpstate_enabled(xfeature_nr)) {
WARN_ONCE(1, "Checking alignment of disabled xfeature %d\n",
xfeature_nr);
return 0;
@@ -293,7 +313,7 @@ static void __init setup_xstate_comp_off

if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (xfeature_enabled(i))
+ if (xfeature_fpstate_enabled(i))
xstate_comp_offsets[i] = xstate_offsets[i];
}
return;
@@ -302,7 +322,7 @@ static void __init setup_xstate_comp_off
next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;

for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (!xfeature_enabled(i))
+ if (!xfeature_fpstate_enabled(i))
continue;

if (xfeature_is_aligned(i))
@@ -329,7 +349,7 @@ static void __init setup_supervisor_only
next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;

for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (!xfeature_enabled(i) || !xfeature_is_supervisor(i))
+ if (!xfeature_fpstate_enabled(i) || !xfeature_is_supervisor(i))
continue;

if (xfeature_is_aligned(i))
@@ -348,7 +368,7 @@ static void __init print_xstate_offset_s
int i;

for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (!xfeature_enabled(i))
+ if (!xfeature_fpstate_enabled(i))
continue;
pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
i, xstate_comp_offsets[i], i, xstate_sizes[i]);
@@ -391,7 +411,7 @@ static void __init setup_init_fpu_buf(vo
if (!boot_cpu_has(X86_FEATURE_XSAVE))
return;

- setup_xstate_features();
+ setup_xfeature_offsets();
print_xstate_features();

if (boot_cpu_has(X86_FEATURE_XSAVES))
@@ -562,7 +582,7 @@ static void do_extra_xstate_size_checks(
int i;

for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (!xfeature_enabled(i))
+ if (!xfeature_fpstate_enabled(i))
continue;

check_xstate_against_struct(i);
@@ -849,7 +869,7 @@ void fpu__resume_cpu(void)
*/
static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
- if (!xfeature_enabled(xfeature_nr)) {
+ if (!xfeature_fpstate_enabled(xfeature_nr)) {
WARN_ON_FPU(1);
return NULL;
}
_

2021-06-22 22:27:10

by Dave Hansen

[permalink] [raw]
Subject: [RFC][PATCH 6/8] x86/fpu: update xstate size calculations for non-XSAVE-managed features


From: Dave Hansen <[email protected]>

Now that PKRU will no longer be XSAVE-managed, it needs to be removed
from the XSAVE size calculations. get_xsaves_size_no_independent()
currently masks independent supervisor features out of XSS, but PKRU
must be masked out of XCR0 instead.

Also, instead of recalculating XSS (and XCR0), just save and restore
them. This will be more durable in case there are any future changes
to how they are calculated. The way it is now, the values must be
recalculated exactly in two separate places.

The save/restore approach also makes the code more obvious. For
instance, the old code does:

/* Disable independent features. */
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());

but the new code does:

/* Disable independent features. */
wrmsrl(MSR_IA32_XSS, old_xss & ~xfeatures_mask_independent());

The second is much more obviously correct and the comment could
probably even be removed; it's basically self-documenting.

There is a minor, temporary hack in here. PKRU is currently not in
xfeatures_mask_fpstate(), even though it is allocated in the fpstate.
To avoid size mismatch warnings, hack it into XCR0 for the size
calculation.

Signed-off-by: Dave Hansen <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: Andy Lutomirski <[email protected]>
---

b/arch/x86/kernel/fpu/xstate.c | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)

diff -puN arch/x86/kernel/fpu/xstate.c~xsave-checks arch/x86/kernel/fpu/xstate.c
--- a/arch/x86/kernel/fpu/xstate.c~xsave-checks 2021-06-22 14:49:12.547051748 -0700
+++ b/arch/x86/kernel/fpu/xstate.c 2021-06-22 14:49:12.556051748 -0700
@@ -643,14 +643,26 @@ static unsigned int __init get_xsaves_si
*/
static unsigned int __init get_xsaves_size_no_independent(void)
{
- u64 mask = xfeatures_mask_independent();
unsigned int size;
+ u64 xfeatures_in_xcr0;
+ u64 old_xss;
+ u64 old_xcr0;

- if (!mask)
- return get_xsaves_size();
+ /* Stash the old XSAVE control register values: */
+ rdmsrl(MSR_IA32_XSS, old_xss);
+ old_xcr0 = xgetbv(0);

/* Disable independent features. */
- wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
+ wrmsrl(MSR_IA32_XSS, old_xss & ~xfeatures_mask_independent());
+
+ /*
+ * *Temporarily* (to be removed in a later patch), ennsure there
+ * is still space for PKRU in the fpstate buffer even though it's
+ * essentially unused.
+ */
+ xfeatures_in_xcr0 = xfeatures_mask_fpstate() | XFEATURE_MASK_PKRU;
+ /* Disable user features which are not kept in the fpstate: */
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, old_xcr0 & xfeatures_in_xcr0);

/*
* Ask the hardware what size is required of the buffer.
@@ -658,8 +670,9 @@ static unsigned int __init get_xsaves_si
*/
size = get_xsaves_size();

- /* Re-enable independent features so XSAVES will work on them again. */
- wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
+ /* Re-enable original features so XSAVES will work on them again. */
+ wrmsrl(MSR_IA32_XSS, old_xss);
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, old_xcr0);

return size;
}
_

2021-06-22 22:27:32

by Dave Hansen

[permalink] [raw]
Subject: [RFC][PATCH 8/8] x86/pkeys: remove init_pkru_value variable


From: Dave Hansen <[email protected]>

The kernel maintains a "default" PKRU value. This is logically similar
to the hardware "init state", but the kernel chose a more restrictive
value.

This default is stored in a variable: 'init_pkru_value'. The default
is also mirrored into the 'init_task' so that the value is picked up
by things like new kernel threads.

Both copies are not needed. Remove the variable and depend instead
on the copy inside the 'init_task'.

Signed-off-by: Dave Hansen <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: Andy Lutomirski <[email protected]>
---

b/arch/x86/include/asm/pkru.h | 5 ++---
b/arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++----
b/arch/x86/mm/pkeys.c | 19 ++-----------------
3 files changed, 22 insertions(+), 24 deletions(-)

diff -puN arch/x86/include/asm/pkru.h~axe-init_pkru_value arch/x86/include/asm/pkru.h
--- a/arch/x86/include/asm/pkru.h~axe-init_pkru_value 2021-06-22 14:49:14.772051742 -0700
+++ b/arch/x86/include/asm/pkru.h 2021-06-22 14:49:14.781051742 -0700
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_PKRU_H
#define _ASM_X86_PKRU_H

+#include <linux/sched/task.h>
#include <asm/fpu/xstate.h>

#define PKRU_AD_BIT 0x1
@@ -9,10 +10,8 @@
#define PKRU_BITS_PER_PKEY 2

#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-extern u32 init_pkru_value;
-#define pkru_get_init_value() READ_ONCE(init_pkru_value)
+#define pkru_get_init_value() READ_ONCE(init_task.thread.pkru)
#else
-#define init_pkru_value 0
#define pkru_get_init_value() 0
#endif

diff -puN arch/x86/kernel/cpu/common.c~axe-init_pkru_value arch/x86/kernel/cpu/common.c
--- a/arch/x86/kernel/cpu/common.c~axe-init_pkru_value 2021-06-22 14:49:14.774051742 -0700
+++ b/arch/x86/kernel/cpu/common.c 2021-06-22 14:49:14.782051742 -0700
@@ -464,6 +464,8 @@ __setup("nofsgsbase", x86_nofsgsbase_set
*/
static bool pku_disabled;

+#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
+
static __always_inline void setup_pku(struct cpuinfo_x86 *c)
{
if (c == &boot_cpu_data) {
@@ -480,11 +482,23 @@ static __always_inline void setup_pku(st
}

cr4_set_bits(X86_CR4_PKE);
- /* Load the default PKRU value */
- pkru_write_default();

- /* Establish the default value for future tasks: */
- init_task.thread.pkru = init_pkru_value;
+ /*
+ * Establish the default value for future tasks.
+ *
+ * This is as restrictive as possible. It ensures that a threads
+ * clone()'d early in a process's lifetime will not accidentally
+ * get access to data which is pkey-protected later on.
+ */
+ init_task.thread.pkru =
+ PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
+ PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
+ PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
+ PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
+ PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
+
+ /* Load the default PKRU value into this CPU's register: */
+ pkru_write_default();
}

#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
diff -puN arch/x86/mm/pkeys.c~axe-init_pkru_value arch/x86/mm/pkeys.c
--- a/arch/x86/mm/pkeys.c~axe-init_pkru_value 2021-06-22 14:49:14.778051742 -0700
+++ b/arch/x86/mm/pkeys.c 2021-06-22 14:49:14.782051742 -0700
@@ -110,27 +110,13 @@ int __arch_override_mprotect_pkey(struct
return vma_pkey(vma);
}

-#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
-
-/*
- * Make the default PKRU value (at execve() time) as restrictive
- * as possible. This ensures that any threads clone()'d early
- * in the process's lifetime will not accidentally get access
- * to data which is pkey-protected later on.
- */
-u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
- PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
- PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
- PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
- PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
-
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
char buf[32];
unsigned int len;

- len = sprintf(buf, "0x%x\n", init_pkru_value);
+ len = sprintf(buf, "0x%x\n", init_task.thread.pkru);
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
}

@@ -158,7 +144,6 @@ static ssize_t init_pkru_write_file(stru
if (new_init_pkru & (PKRU_AD_BIT|PKRU_WD_BIT))
return -EINVAL;

- WRITE_ONCE(init_pkru_value, new_init_pkru);
WRITE_ONCE(init_task.thread.pkru, new_init_pkru);

return count;
@@ -185,7 +170,7 @@ static __init int setup_init_pkru(char *
if (kstrtouint(opt, 0, &new_init_pkru))
return 1;

- WRITE_ONCE(init_pkru_value, new_init_pkru);
+ WRITE_ONCE(init_task.thread.pkru, new_init_pkru);

return 1;
}
_

2021-06-22 22:27:55

by Dave Hansen

[permalink] [raw]
Subject: [RFC][PATCH 5/8] x86/fpu: XSAVE buffer access routine rename


From: Dave Hansen <[email protected]>

get_xsave_addr() sounds like it works on generic XSAVE buffers. It
does not. It only works on kernel XSAVE buffers which are part of the
FPU fpstate.

Give it a better name: get_fpstate_addr(). Also add warnings to it in
case non-fpstate features are requested (NULL should be returned for
these, but WARN() anyway).

Signed-off-by: Dave Hansen <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: Andy Lutomirski <[email protected]>
---

b/arch/x86/include/asm/fpu/xstate.h | 2 +-
b/arch/x86/kernel/fpu/xstate.c | 11 ++++++-----
b/arch/x86/kvm/x86.c | 8 ++++----
3 files changed, 11 insertions(+), 10 deletions(-)

diff -puN arch/x86/include/asm/fpu/xstate.h~get_xsave_addr-warning arch/x86/include/asm/fpu/xstate.h
--- a/arch/x86/include/asm/fpu/xstate.h~get_xsave_addr-warning 2021-06-22 14:49:11.268051751 -0700
+++ b/arch/x86/include/asm/fpu/xstate.h 2021-06-22 14:49:11.279051751 -0700
@@ -134,7 +134,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTAT
extern void __init update_regset_xstate_info(unsigned int size,
u64 xstate_mask);

-void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+void *get_fpstate_addr(struct xregs_state *xsave, int xfeature_nr);
int xfeature_size(int xfeature_nr);
int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
diff -puN arch/x86/kernel/fpu/xstate.c~get_xsave_addr-warning arch/x86/kernel/fpu/xstate.c
--- a/arch/x86/kernel/fpu/xstate.c~get_xsave_addr-warning 2021-06-22 14:49:11.270051751 -0700
+++ b/arch/x86/kernel/fpu/xstate.c 2021-06-22 14:49:11.279051751 -0700
@@ -878,7 +878,8 @@ static void *__raw_xsave_addr(struct xre
}
/*
* Given the xsave area and a state inside, this function returns the
- * address of the state.
+ * address of the state. This only works on kernel fpstate, not on
+ * generic buffers created with XSAVE*.
*
* This is the API that is called to get xstate address in either
* standard format or compacted format of xsave area.
@@ -894,7 +895,7 @@ static void *__raw_xsave_addr(struct xre
* address of the state in the xsave area, or NULL if the
* field is not present in the xsave buffer.
*/
-void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
+void *get_fpstate_addr(struct xregs_state *xsave, int xfeature_nr)
{
/*
* Do we even *have* xsave state?
@@ -906,8 +907,8 @@ void *get_xsave_addr(struct xregs_state
* We should not ever be requesting features that we
* have not enabled.
*/
- WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)),
- "get of unsupported state");
+ WARN_ONCE(!(xfeatures_mask_fpstate() & BIT_ULL(xfeature_nr)),
+ "get of unsupported fpstate");
/*
* This assumes the last 'xsave*' instruction to
* have requested that 'xfeature_nr' be saved.
@@ -924,7 +925,7 @@ void *get_xsave_addr(struct xregs_state

return __raw_xsave_addr(xsave, xfeature_nr);
}
-EXPORT_SYMBOL_GPL(get_xsave_addr);
+EXPORT_SYMBOL_GPL(get_fpstate_addr);

#ifdef CONFIG_ARCH_HAS_PKEYS

diff -puN arch/x86/kvm/x86.c~get_xsave_addr-warning arch/x86/kvm/x86.c
--- a/arch/x86/kvm/x86.c~get_xsave_addr-warning 2021-06-22 14:49:11.274051751 -0700
+++ b/arch/x86/kvm/x86.c 2021-06-22 14:49:11.284051751 -0700
@@ -4602,7 +4602,7 @@ static void fill_xsave(u8 *dest, struct
memcpy(dest + offset, &vcpu->arch.pkru,
sizeof(vcpu->arch.pkru));
} else {
- src = get_xsave_addr(xsave, xfeature_nr);
+ src = get_fpstate_addr(xsave, xfeature_nr);
if (src)
memcpy(dest + offset, src, size);
}
@@ -4645,7 +4645,7 @@ static void load_xsave(struct kvm_vcpu *
memcpy(&vcpu->arch.pkru, src + offset,
sizeof(vcpu->arch.pkru));
} else {
- void *dest = get_xsave_addr(xsave, xfeature_nr);
+ void *dest = get_fpstate_addr(xsave, xfeature_nr);

if (dest)
memcpy(dest, src + offset, size);
@@ -10479,11 +10479,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcp
*/
if (init_event)
kvm_put_guest_fpu(vcpu);
- mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
+ mpx_state_buffer = get_fpstate_addr(&vcpu->arch.guest_fpu->state.xsave,
XFEATURE_BNDREGS);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
- mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
+ mpx_state_buffer = get_fpstate_addr(&vcpu->arch.guest_fpu->state.xsave,
XFEATURE_BNDCSR);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
_

2021-06-22 22:27:57

by Dave Hansen

[permalink] [raw]
Subject: [RFC][PATCH 7/8] x86/fpu: actually stop using XSAVE on PKRU


From: Dave Hansen <[email protected]>

Previously, 'xfeatures_mask_all' represented all of the features that
might be found in a kernel XSAVE buffer. It also happened to be
(XCR0|XSS): the set of XSAVE features enabled in the hardware. In
other words, if an feature was XSAVE-enabled in the hardware, it was
guaranteed to be found in the kernel XSAVE buffer.

The goal of this series is to remove PKRU from the kernel XSAVE
buffer, but to also leave it XSAVE-enabled in the hardware. This ensures
that applications which read the hardware enabling status of PKRU directly
with xgetbv(0) will not notice a thing.

Locate all the places where 'xfeatures_mask_all' is used to represent
the set of features in a kernel XSAVE buffer and replace it with
xfeatures_mask_fpstate(). Update the fpstate access function
(__raw_xsave_addr()) to WARN() if a caller attempts to access a
non-present feature. Lastly, remove the get_xsaves_size() hack now
that PKRU can be removed from the fpstate size calculations.

The most visible effect if this an 8-byte drop in the "context size":

Before:
x86/fpu: Enabled xstate features 0x2ff, context size is 2568 bytes, using 'compacted' format.
After:
x86/fpu: XSAVE managing features 0xff, context size is 2560 bytes, using 'compacted' format.

Signed-off-by: Dave Hansen <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: Andy Lutomirski <[email protected]>
---

b/arch/x86/include/asm/fpu/internal.h | 2 +-
b/arch/x86/kernel/fpu/xstate.c | 29 +++++++++++++----------------
2 files changed, 14 insertions(+), 17 deletions(-)

diff -puN arch/x86/include/asm/fpu/internal.h~warn-on-seeing-pkey-xfeature arch/x86/include/asm/fpu/internal.h
--- a/arch/x86/include/asm/fpu/internal.h~warn-on-seeing-pkey-xfeature 2021-06-22 14:49:13.609051745 -0700
+++ b/arch/x86/include/asm/fpu/internal.h 2021-06-22 14:49:13.622051745 -0700
@@ -286,7 +286,7 @@ static inline void os_xrstor_booting(str
*/
static inline void os_xsave(struct xregs_state *xstate)
{
- u64 mask = xfeatures_mask_all;
+ u64 mask = xfeatures_mask_fpstate();
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
diff -puN arch/x86/kernel/fpu/xstate.c~warn-on-seeing-pkey-xfeature arch/x86/kernel/fpu/xstate.c
--- a/arch/x86/kernel/fpu/xstate.c~warn-on-seeing-pkey-xfeature 2021-06-22 14:49:13.613051745 -0700
+++ b/arch/x86/kernel/fpu/xstate.c 2021-06-22 14:49:13.628051745 -0700
@@ -168,9 +168,7 @@ static bool xfeature_enabled(enum xfeatu
*/
static bool xfeature_fpstate_enabled(enum xfeature xfeature)
{
- // For bisectability, mirror xfeature_enabled() for now.
- //return xfeatures_mask_fpstate() & BIT_ULL(xfeature);
- return xfeature_enabled(xfeature);
+ return xfeatures_mask_fpstate() & BIT_ULL(xfeature);
}

/*
@@ -416,7 +414,7 @@ static void __init setup_init_fpu_buf(vo

if (boot_cpu_has(X86_FEATURE_XSAVES))
init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
- xfeatures_mask_all;
+ xfeatures_mask_fpstate();

/*
* Init all the features state with header.xfeatures being 0x0
@@ -644,7 +642,6 @@ static unsigned int __init get_xsaves_si
static unsigned int __init get_xsaves_size_no_independent(void)
{
unsigned int size;
- u64 xfeatures_in_xcr0;
u64 old_xss;
u64 old_xcr0;

@@ -655,14 +652,8 @@ static unsigned int __init get_xsaves_si
/* Disable independent features. */
wrmsrl(MSR_IA32_XSS, old_xss & ~xfeatures_mask_independent());

- /*
- * *Temporarily* (to be removed in a later patch), ennsure there
- * is still space for PKRU in the fpstate buffer even though it's
- * essentially unused.
- */
- xfeatures_in_xcr0 = xfeatures_mask_fpstate() | XFEATURE_MASK_PKRU;
/* Disable user features which are not kept in the fpstate: */
- xsetbv(XCR_XFEATURE_ENABLED_MASK, old_xcr0 & xfeatures_in_xcr0);
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, old_xcr0 & xfeatures_mask_fpstate());

/*
* Ask the hardware what size is required of the buffer.
@@ -843,8 +834,8 @@ void __init fpu__init_system_xstate(void
}

print_xstate_offset_size();
- pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
- xfeatures_mask_all,
+ pr_info("x86/fpu: XSAVE managing features 0x%llx, context size is %d bytes, using '%s' format.\n",
+ xfeatures_mask_fpstate(),
fpu_kernel_xstate_size,
boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
return;
@@ -879,9 +870,15 @@ void fpu__resume_cpu(void)
* Given an xstate feature nr, calculate where in the xsave
* buffer the state is. Callers should ensure that the buffer
* is valid.
+ *
+ * This only works on kernel FPU buffers, like task->thread.fpu.
*/
static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
+ if (!(xfeatures_mask_fpstate() & BIT_ULL(xfeature_nr))) {
+ WARN_ON_FPU(1);
+ return NULL;
+ }
if (!xfeature_fpstate_enabled(xfeature_nr)) {
WARN_ON_FPU(1);
return NULL;
@@ -1235,7 +1232,7 @@ void xsaves(struct xregs_state *xstate,
if (mask & xfeatures_mask_independent())
xchk = ~xfeatures_mask_independent();
else
- xchk = ~xfeatures_mask_all;
+ xchk = ~xfeatures_mask_fpstate();

if (WARN_ON_ONCE(!mask || mask & xchk))
return;
@@ -1272,7 +1269,7 @@ void xrstors(struct xregs_state *xstate,
if (mask & xfeatures_mask_independent())
xchk = ~xfeatures_mask_independent();
else
- xchk = ~xfeatures_mask_all;
+ xchk = ~xfeatures_mask_fpstate();

if (WARN_ON_ONCE(!mask || mask & xchk))
return;
_