2023-04-10 23:12:40

by Chang S. Bae

[permalink] [raw]
Subject: [PATCH v6 07/12] x86/cpu/keylocker: Load an internal wrapping key at boot-time

The Internal Wrapping Key (IWKey) is an entity of Key Locker to encode a
clear text key into a key handle. This key is a pivot in protecting user
keys. So the value has to be randomized before being loaded in the
software-invisible CPU state.

IWKey needs to be established before the first user. Given that the only
proposed Linux use case for Key Locker is dm-crypt, the feature could be
lazily enabled when the first dm-crypt user arrives, but there is no
precedent for late enabling of CPU features and it adds maintenance burden
without demonstrative benefit outside of minimizing the visibility of
Key Locker to userspace.

The kernel generates random bytes and load them at boot time. These bytes
are flushed out immediately.

Setting the CR4.KL bit does not always enable the feature so ensure the
dynamic CPU bit (CPUID.AESKLE) is set before loading the key.

Given that the Linux Key Locker support is only intended for bare metal
dm-crypt consumption, and that switching IWKey per VM is untenable,
explicitly skip Key Locker setup in the X86_FEATURE_HYPERVISOR case.

Signed-off-by: Chang S. Bae <[email protected]>
Reviewed-by: Dan Williams <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
Changes from v5:
* Call out the disabling when the feature is available on a virtual
machine. Then, it will turn off the feature flag

Changes from RFC v2:
* Make bare metal only.
* Clean up the code (e.g. dynamically allocate the key cache).
(Dan Williams)
* Massage the changelog.
* Move out the LOADIWKEY wrapper and the Key Locker CPUID defines.

Note, Dan wonders that given that the only proposed Linux use case for
Key Locker is dm-crypt, the feature could be lazily enabled when the
first dm-crypt user arrives, but as Dave notes there is no precedent
for late enabling of CPU features and it adds maintenance burden
without demonstrative benefit outside of minimizing the visibility of
Key Locker to userspace.
---
arch/x86/include/asm/keylocker.h | 9 ++++
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/cpu/common.c | 5 +-
arch/x86/kernel/keylocker.c | 82 ++++++++++++++++++++++++++++++++
arch/x86/kernel/smpboot.c | 2 +
5 files changed, 98 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/kernel/keylocker.c

diff --git a/arch/x86/include/asm/keylocker.h b/arch/x86/include/asm/keylocker.h
index e85dfb6c1524..820ac29c06d9 100644
--- a/arch/x86/include/asm/keylocker.h
+++ b/arch/x86/include/asm/keylocker.h
@@ -5,6 +5,7 @@

#ifndef __ASSEMBLY__

+#include <asm/processor.h>
#include <linux/bits.h>
#include <asm/fpu/types.h>

@@ -28,5 +29,13 @@ struct iwkey {
#define KEYLOCKER_CPUID_EBX_WIDE BIT(2)
#define KEYLOCKER_CPUID_EBX_BACKUP BIT(4)

+#ifdef CONFIG_X86_KEYLOCKER
+void setup_keylocker(struct cpuinfo_x86 *c);
+void destroy_keylocker_data(void);
+#else
+#define setup_keylocker(c) do { } while (0)
+#define destroy_keylocker_data() do { } while (0)
+#endif
+
#endif /*__ASSEMBLY__ */
#endif /* _ASM_KEYLOCKER_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b366641703e3..20babbaf8c0c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -133,6 +133,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
obj-$(CONFIG_X86_UMIP) += umip.o
+obj-$(CONFIG_X86_KEYLOCKER) += keylocker.o

obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3ea06b0b4570..a1edd5997e0a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -58,6 +58,8 @@
#include <asm/microcode_intel.h>
#include <asm/intel-family.h>
#include <asm/cpu_device_id.h>
+#include <asm/keylocker.h>
+
#include <asm/uv/uv.h>
#include <asm/sigframe.h>
#include <asm/traps.h>
@@ -1874,10 +1876,11 @@ static void identify_cpu(struct cpuinfo_x86 *c)
/* Disable the PN if appropriate */
squash_the_stupid_serial_number(c);

- /* Set up SMEP/SMAP/UMIP */
+ /* Setup various Intel-specific CPU security features */
setup_smep(c);
setup_smap(c);
setup_umip(c);
+ setup_keylocker(c);

/* Enable FSGSBASE instructions if available. */
if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
diff --git a/arch/x86/kernel/keylocker.c b/arch/x86/kernel/keylocker.c
new file mode 100644
index 000000000000..2519102f72f1
--- /dev/null
+++ b/arch/x86/kernel/keylocker.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Setup Key Locker feature and support internal wrapping key
+ * management.
+ */
+
+#include <linux/random.h>
+#include <linux/poison.h>
+
+#include <asm/fpu/api.h>
+#include <asm/keylocker.h>
+#include <asm/tlbflush.h>
+
+static __initdata struct keylocker_setup_data {
+ struct iwkey key;
+} kl_setup;
+
+static void __init generate_keylocker_data(void)
+{
+ get_random_bytes(&kl_setup.key.integrity_key, sizeof(kl_setup.key.integrity_key));
+ get_random_bytes(&kl_setup.key.encryption_key, sizeof(kl_setup.key.encryption_key));
+}
+
+void __init destroy_keylocker_data(void)
+{
+ memset(&kl_setup.key, KEY_DESTROY, sizeof(kl_setup.key));
+}
+
+static void __init load_keylocker(void)
+{
+ kernel_fpu_begin();
+ load_xmm_iwkey(&kl_setup.key);
+ kernel_fpu_end();
+}
+
+/**
+ * setup_keylocker - Enable the feature.
+ * @c: A pointer to struct cpuinfo_x86
+ */
+void __ref setup_keylocker(struct cpuinfo_x86 *c)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_KEYLOCKER))
+ goto out;
+
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) {
+ pr_debug("x86/keylocker: Not compatible with a hypervisor.\n");
+ goto disable;
+ }
+
+ cr4_set_bits(X86_CR4_KEYLOCKER);
+
+ if (c == &boot_cpu_data) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid_count(KEYLOCKER_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ /*
+ * Check the feature readiness via CPUID. Note that the
+ * CPUID AESKLE bit is conditionally set only when CR4.KL
+ * is set.
+ */
+ if (!(ebx & KEYLOCKER_CPUID_EBX_AESKLE) ||
+ !(eax & KEYLOCKER_CPUID_EAX_SUPERVISOR)) {
+ pr_debug("x86/keylocker: Not fully supported.\n");
+ goto disable;
+ }
+
+ generate_keylocker_data();
+ }
+
+ load_keylocker();
+
+ pr_info_once("x86/keylocker: Enabled.\n");
+ return;
+
+disable:
+ setup_clear_cpu_cap(X86_FEATURE_KEYLOCKER);
+ pr_info_once("x86/keylocker: Disabled.\n");
+out:
+ /* Make sure the feature disabled for kexec-reboot. */
+ cr4_clear_bits(X86_CR4_KEYLOCKER);
+}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 851477f7d728..880ed9cc8c53 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -84,6 +84,7 @@
#include <asm/hw_irq.h>
#include <asm/stackprotector.h>
#include <asm/sev.h>
+#include <asm/keylocker.h>

/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
@@ -1500,6 +1501,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
nmi_selftest();
impress_friends();
cache_aps_init();
+ destroy_keylocker_data();
}

static int __initdata setup_possible_cpus = -1;
--
2.17.1


2023-05-05 23:18:54

by Eric Biggers

[permalink] [raw]
Subject: Re: [PATCH v6 07/12] x86/cpu/keylocker: Load an internal wrapping key at boot-time

On Mon, Apr 10, 2023 at 03:59:31PM -0700, Chang S. Bae wrote:
> diff --git a/arch/x86/include/asm/keylocker.h b/arch/x86/include/asm/keylocker.h
> index e85dfb6c1524..820ac29c06d9 100644
> --- a/arch/x86/include/asm/keylocker.h
> +++ b/arch/x86/include/asm/keylocker.h
> @@ -5,6 +5,7 @@
>
> #ifndef __ASSEMBLY__
>
> +#include <asm/processor.h>
> #include <linux/bits.h>
> #include <asm/fpu/types.h>
>
> @@ -28,5 +29,13 @@ struct iwkey {
> #define KEYLOCKER_CPUID_EBX_WIDE BIT(2)
> #define KEYLOCKER_CPUID_EBX_BACKUP BIT(4)
>
> +#ifdef CONFIG_X86_KEYLOCKER
> +void setup_keylocker(struct cpuinfo_x86 *c);
> +void destroy_keylocker_data(void);
> +#else
> +#define setup_keylocker(c) do { } while (0)
> +#define destroy_keylocker_data() do { } while (0)
> +#endif

Shouldn't the !CONFIG_X86_KEYLOCKER stubs be static inline functions instead of
macros, so that type checking works?

- Eric

2023-05-08 18:32:16

by Chang S. Bae

[permalink] [raw]
Subject: Re: [PATCH v6 07/12] x86/cpu/keylocker: Load an internal wrapping key at boot-time

On 5/5/2023 4:05 PM, Eric Biggers wrote:
> On Mon, Apr 10, 2023 at 03:59:31PM -0700, Chang S. Bae wrote:
>>
>> +#ifdef CONFIG_X86_KEYLOCKER
>> +void setup_keylocker(struct cpuinfo_x86 *c);
>> +void destroy_keylocker_data(void);
>> +#else
>> +#define setup_keylocker(c) do { } while (0)
>> +#define destroy_keylocker_data() do { } while (0)
>> +#endif
>
> Shouldn't the !CONFIG_X86_KEYLOCKER stubs be static inline functions instead of
> macros, so that type checking works?

I think either way works here. This macro is just for nothing.

Thanks,
Chang

Subject: RE: [PATCH v6 07/12] x86/cpu/keylocker: Load an internal wrapping key at boot-time


> diff --git a/arch/x86/kernel/keylocker.c b/arch/x86/kernel/keylocker.c
...
> +void __init destroy_keylocker_data(void)
> +{
> + memset(&kl_setup.key, KEY_DESTROY, sizeof(kl_setup.key));
> +}

That's a special value for garbage collected keyring keys assigned
a keytype of ".dead". memzero() or memzero_explicit() might be better
for this use case.


2023-05-08 20:28:07

by Chang S. Bae

[permalink] [raw]
Subject: Re: [PATCH v6 07/12] x86/cpu/keylocker: Load an internal wrapping key at boot-time

On 5/8/2023 12:18 PM, Elliott, Robert (Servers) wrote:
>
>> diff --git a/arch/x86/kernel/keylocker.c b/arch/x86/kernel/keylocker.c
> ...
>> +void __init destroy_keylocker_data(void)
>> +{
>> + memset(&kl_setup.key, KEY_DESTROY, sizeof(kl_setup.key));
>> +}
>
> That's a special value for garbage collected keyring keys assigned
> a keytype of ".dead". memzero() or memzero_explicit() might be better
> for this use case.
memzero() looks to be the same as memset() in x86:

$ git grep memzero arch/x86/ | grep define
arch/x86/boot/compressed/misc.c:#define memzero(s, n) memset((s), 0, (n))

Instead, memzero_explicit() looks to be about the right call here:

/**
* memzero_explicit - Fill a region of memory (e.g. sensitive
* keying data) with 0s.
...
* Note: usually using memset() is just fine (!), but in cases
* where clearing out _local_ data at the end of a scope is
* necessary, memzero_explicit() should be used instead in
* order to prevent the compiler from optimising away zeroing.
...

Then,

void __init destroy_keylocker_data(void)
{
memzero_explicit(&kl_setup.key, sizeof(kl_setup.key));
}

Thanks,
Chang

2023-05-09 00:33:48

by Chang S. Bae

[permalink] [raw]
Subject: Re: [PATCH v6 07/12] x86/cpu/keylocker: Load an internal wrapping key at boot-time

On 5/8/2023 2:56 PM, Dave Hansen wrote:
> On 5/8/23 11:18, Chang S. Bae wrote:
>> On 5/5/2023 4:05 PM, Eric Biggers wrote:
>>> On Mon, Apr 10, 2023 at 03:59:31PM -0700, Chang S. Bae wrote:
>>>>   +#ifdef CONFIG_X86_KEYLOCKER
>>>> +void setup_keylocker(struct cpuinfo_x86 *c);
>>>> +void destroy_keylocker_data(void);
>>>> +#else
>>>> +#define setup_keylocker(c) do { } while (0)
>>>> +#define destroy_keylocker_data() do { } while (0)
>>>> +#endif
>>>
>>> Shouldn't the !CONFIG_X86_KEYLOCKER stubs be static inline functions
>>> instead of
>>> macros, so that type checking works?
>>
>> I think either way works here. This macro is just for nothing.
>
> Chang, I do prefer the 'static inline' as a general rule. Think of this:
>
> static inline void setup_keylocker(struct cpuinfo_x86 *c) {}
>
> versus:
>
> #define setup_keylocker(c) do { } while (0)
>
> Imagine some dope does:
>
> char c;
> ...
> setup_keylocker(c);
>
> With the macro, they'll get no type warning. The inline actually makes
> it easier to find bugs because folks will get _some_ type checking no
> matter how they compile the code.

Ah, when the prototype with one or more arguments, 'static inline'
allows the check. Then it is not an 'either-way' thing.

Looking at the x86 code, there are some seemingly related:

$ git grep "do { } while (0)" arch/x86 | grep -v "()"
arch/x86/include/asm/kprobes.h:#define flush_insn_slot(p) do { }
while (0)
arch/x86/include/asm/mc146818rtc.h:#define lock_cmos(reg) do { } while (0)
arch/x86/include/asm/pgtable.h:#define flush_tlb_fix_spurious_fault(vma,
address, ptep) do { } while (0)
arch/x86/include/asm/preempt.h:#define init_task_preempt_count(p) do { }
while (0)
arch/x86/kvm/ioapic.h:#define ASSERT(x) do { } while (0)
arch/x86/kvm/mmu/mmu_internal.h:#define pgprintk(x...) do { } while (0)
arch/x86/kvm/mmu/mmu_internal.h:#define rmap_printk(x...) do { } while (0)
arch/x86/kvm/mmu/mmu_internal.h:#define MMU_WARN_ON(x) do { } while (0)

Now I feel owed for some potential cleanup work.

Thanks,
Chang

2023-05-09 00:59:15

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH v6 07/12] x86/cpu/keylocker: Load an internal wrapping key at boot-time

On 5/8/23 17:31, Chang S. Bae wrote:
>> With the macro, they'll get no type warning.  The inline actually makes
>> it easier to find bugs because folks will get _some_ type checking no
>> matter how they compile the code.
>
> Ah, when the prototype with one or more arguments, 'static inline'
> allows the check. Then it is not an 'either-way' thing.
>
> Looking at the x86 code, there are some seemingly related:
>
> $ git grep "do { } while (0)" arch/x86 | grep -v "()"
...

Right. It's not a hard and fast rule. We certainly take code either
way and there can be real reasons to do it one way versus the other.