The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.
An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.
Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.
Signed-off-by: Tom Lendacky <[email protected]>
---
arch/x86/include/asm/sev-common.h | 13 ++
arch/x86/include/asm/sev.h | 32 +++++
arch/x86/include/uapi/asm/svm.h | 1 +
arch/x86/kernel/sev-shared.c | 94 +++++++++++++-
arch/x86/kernel/sev.c | 207 +++++++++++++++++++++++++-----
arch/x86/mm/mem_encrypt_amd.c | 8 +-
6 files changed, 320 insertions(+), 35 deletions(-)
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 1225744a069b..4cc716660d4b 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -96,6 +96,19 @@ enum psc_op {
/* GHCBData[63:32] */ \
(((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
+/* GHCB Run at VMPL Request/Response */
+#define GHCB_MSR_VMPL_REQ 0x016
+#define GHCB_MSR_VMPL_REQ_LEVEL(v) \
+ /* GHCBData[39:32] */ \
+ (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \
+ /* GHCBDdata[11:0] */ \
+ GHCB_MSR_VMPL_REQ)
+
+#define GHCB_MSR_VMPL_RESP 0x017
+#define GHCB_MSR_VMPL_RESP_VAL(v) \
+ /* GHCBData[63:32] */ \
+ (((u64)(v) & GENMASK_ULL(63, 32)) >> 32)
+
/* GHCB Hypervisor Feature Request/Response */
#define GHCB_MSR_HV_FT_REQ 0x080
#define GHCB_MSR_HV_FT_RESP 0x081
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 56f7d843f7a4..8f180fd3cbf0 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -178,6 +178,36 @@ struct svsm_ca {
u8 svsm_buffer[PAGE_SIZE - 8];
};
+#define SVSM_SUCCESS 0
+#define SVSM_ERR_INCOMPLETE 0x80000000
+#define SVSM_ERR_UNSUPPORTED_PROTOCOL 0x80000001
+#define SVSM_ERR_UNSUPPORTED_CALL 0x80000002
+#define SVSM_ERR_INVALID_ADDRESS 0x80000003
+#define SVSM_ERR_INVALID_FORMAT 0x80000004
+#define SVSM_ERR_INVALID_PARAMETER 0x80000005
+#define SVSM_ERR_INVALID_REQUEST 0x80000006
+#define SVSM_ERR_BUSY 0x80000007
+
+/*
+ * SVSM protocol structure
+ */
+struct svsm_call {
+ struct svsm_ca *caa;
+ u64 rax;
+ u64 rcx;
+ u64 rdx;
+ u64 r8;
+ u64 r9;
+ u64 rax_out;
+ u64 rcx_out;
+ u64 rdx_out;
+ u64 r8_out;
+ u64 r9_out;
+};
+
+#define SVSM_CORE_CALL(x) ((0ULL << 32) | (x))
+#define SVSM_CORE_REMAP_CA 0
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern void __sev_es_ist_enter(struct pt_regs *regs);
extern void __sev_es_ist_exit(void);
@@ -252,6 +282,7 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
void sev_show_status(void);
+void snp_remap_svsm_ca(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
@@ -281,6 +312,7 @@ static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
static inline void sev_show_status(void) { }
+static inline void snp_remap_svsm_ca(void) { }
#endif
#ifdef CONFIG_KVM_AMD_SEV
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 80e1df482337..1814b413fd57 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -115,6 +115,7 @@
#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
+#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 46ea4e5e118a..6f57eb804e70 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -18,9 +18,11 @@
#define sev_printk_rtl(fmt, ...) printk_ratelimited(fmt, ##__VA_ARGS__)
#else
#undef WARN
-#define WARN(condition, format...) (!!(condition))
+#define WARN(condition, format...) (!!(condition))
#define sev_printk(fmt, ...)
#define sev_printk_rtl(fmt, ...)
+#undef vc_forward_exception
+#define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n")
#endif
/*
@@ -244,6 +246,96 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt
return ES_VMM_ERROR;
}
+static __always_inline void issue_svsm_call(struct svsm_call *call, u8 *pending)
+{
+ /*
+ * Issue the VMGEXIT to run the SVSM:
+ * - Load the SVSM register state (RAX, RCX, RDX, R8 and R9)
+ * - Set the CA call pending field to 1
+ * - Issue VMGEXIT
+ * - Save the SVSM return register state (RAX, RCX, RDX, R8 and R9)
+ * - Perform atomic exchange of the CA call pending field
+ */
+ asm volatile("mov %9, %%r8\n\t"
+ "mov %10, %%r9\n\t"
+ "movb $1, %11\n\t"
+ "rep; vmmcall\n\t"
+ "mov %%r8, %3\n\t"
+ "mov %%r9, %4\n\t"
+ "xchgb %5, %11\n\t"
+ : "=a" (call->rax_out), "=c" (call->rcx_out), "=d" (call->rdx_out),
+ "=m" (call->r8_out), "=m" (call->r9_out),
+ "+r" (*pending)
+ : "a" (call->rax), "c" (call->rcx), "d" (call->rdx),
+ "r" (call->r8), "r" (call->r9),
+ "m" (call->caa->call_pending)
+ : "r8", "r9", "memory");
+}
+
+static int __svsm_msr_protocol(struct svsm_call *call)
+{
+ u64 val, resp;
+ u8 pending;
+
+ val = sev_es_rd_ghcb_msr();
+
+ sev_es_wr_ghcb_msr(GHCB_MSR_VMPL_REQ_LEVEL(0));
+
+ pending = 0;
+ issue_svsm_call(call, &pending);
+
+ resp = sev_es_rd_ghcb_msr();
+
+ sev_es_wr_ghcb_msr(val);
+
+ if (pending)
+ return -EINVAL;
+
+ if (GHCB_RESP_CODE(resp) != GHCB_MSR_VMPL_RESP)
+ return -EINVAL;
+
+ if (GHCB_MSR_VMPL_RESP_VAL(resp) != 0)
+ return -EINVAL;
+
+ return call->rax_out;
+}
+
+static int __svsm_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
+{
+ struct es_em_ctxt ctxt;
+ u8 pending;
+
+ vc_ghcb_invalidate(ghcb);
+
+ /* Fill in protocol and format specifiers */
+ ghcb->protocol_version = ghcb_version;
+ ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
+
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
+ ghcb_set_sw_exit_info_1(ghcb, 0);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+
+ pending = 0;
+ issue_svsm_call(call, &pending);
+
+ if (pending)
+ return -EINVAL;
+
+ switch (verify_exception_info(ghcb, &ctxt)) {
+ case ES_OK:
+ break;
+ case ES_EXCEPTION:
+ vc_forward_exception(&ctxt);
+ fallthrough;
+ default:
+ return -EINVAL;
+ }
+
+ return call->rax_out;
+}
+
static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
struct es_em_ctxt *ctxt,
u64 exit_code, u64 exit_info_1,
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index a500df807e79..21f3cc40d662 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -133,8 +133,13 @@ struct ghcb_state {
struct ghcb *ghcb;
};
+/* For early boot SVSM communication */
+static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
+
static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
+static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
+static DEFINE_PER_CPU(u64, svsm_caa_pa);
struct sev_config {
__u64 debug : 1,
@@ -150,6 +155,15 @@ struct sev_config {
*/
ghcbs_initialized : 1,
+ /*
+ * A flag used to indicate when the per-CPU SVSM CA is to be
+ * used instead of the boot SVSM CA.
+ *
+ * For APs, the per-CPU SVSM CA is created as part of the AP
+ * bringup, so this flag can be used globally for the BSP and APs.
+ */
+ cas_initialized : 1,
+
__reserved : 62;
};
@@ -572,9 +586,42 @@ static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t si
return ES_EXCEPTION;
}
+static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
+{
+ long error_code = ctxt->fi.error_code;
+ int trapnr = ctxt->fi.vector;
+
+ ctxt->regs->orig_ax = ctxt->fi.error_code;
+
+ switch (trapnr) {
+ case X86_TRAP_GP:
+ exc_general_protection(ctxt->regs, error_code);
+ break;
+ case X86_TRAP_UD:
+ exc_invalid_op(ctxt->regs);
+ break;
+ case X86_TRAP_PF:
+ write_cr2(ctxt->fi.cr2);
+ exc_page_fault(ctxt->regs, error_code);
+ break;
+ case X86_TRAP_AC:
+ exc_alignment_check(ctxt->regs, error_code);
+ break;
+ default:
+ pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
+ BUG();
+ }
+}
+
/* Include code shared with pre-decompression boot stage */
#include "sev-shared.c"
+static struct svsm_ca *__svsm_get_caa(void)
+{
+ return sev_cfg.cas_initialized ? this_cpu_read(svsm_caa)
+ : boot_svsm_caa;
+}
+
static noinstr void __sev_put_ghcb(struct ghcb_state *state)
{
struct sev_es_runtime_data *data;
@@ -600,6 +647,42 @@ static noinstr void __sev_put_ghcb(struct ghcb_state *state)
}
}
+static int svsm_protocol(struct svsm_call *call)
+{
+ struct ghcb_state state;
+ unsigned long flags;
+ struct ghcb *ghcb;
+ int ret;
+
+ /*
+ * This can be called very early in the boot, use native functions in
+ * order to avoid paravirt issues.
+ */
+ flags = native_save_fl();
+ if (flags & X86_EFLAGS_IF)
+ native_irq_disable();
+
+ if (sev_cfg.ghcbs_initialized)
+ ghcb = __sev_get_ghcb(&state);
+ else if (boot_ghcb)
+ ghcb = boot_ghcb;
+ else
+ ghcb = NULL;
+
+ do {
+ ret = ghcb ? __svsm_ghcb_protocol(ghcb, call)
+ : __svsm_msr_protocol(call);
+ } while (ret == SVSM_ERR_BUSY);
+
+ if (sev_cfg.ghcbs_initialized)
+ __sev_put_ghcb(&state);
+
+ if (flags & X86_EFLAGS_IF)
+ native_irq_enable();
+
+ return ret;
+}
+
void noinstr __sev_es_nmi_complete(void)
{
struct ghcb_state state;
@@ -1346,6 +1429,18 @@ static void __init alloc_runtime_data(int cpu)
panic("Can't allocate SEV-ES runtime data");
per_cpu(runtime_data, cpu) = data;
+
+ if (vmpl) {
+ struct svsm_ca *caa;
+
+ /* Allocate the SVSM CA page if an SVSM is present */
+ caa = memblock_alloc(sizeof(*caa), PAGE_SIZE);
+ if (!caa)
+ panic("Can't allocate SVSM CA page\n");
+
+ per_cpu(svsm_caa, cpu) = caa;
+ per_cpu(svsm_caa_pa, cpu) = __pa(caa);
+ }
}
static void __init init_ghcb(int cpu)
@@ -1395,6 +1490,31 @@ void __init sev_es_init_vc_handling(void)
init_ghcb(cpu);
}
+ /* If running under an SVSM, switch to the per-cpu CA */
+ if (vmpl) {
+ struct svsm_call call = {};
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+
+ /*
+ * SVSM_CORE_REMAP_CA call:
+ * RAX = 0 (Protocol=0, CallID=0)
+ * RCX = New CA GPA
+ */
+ call.caa = __svsm_get_caa();
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
+ call.rcx = this_cpu_read(svsm_caa_pa);
+ ret = svsm_protocol(&call);
+ if (ret != SVSM_SUCCESS)
+ panic("Can't remap the SVSM CA, ret=%#x (%d)\n", ret, ret);
+
+ sev_cfg.cas_initialized = true;
+
+ local_irq_restore(flags);
+ }
+
sev_es_setup_play_dead();
/* Secondary CPUs use the runtime #VC handler */
@@ -1819,33 +1939,6 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
return result;
}
-static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
-{
- long error_code = ctxt->fi.error_code;
- int trapnr = ctxt->fi.vector;
-
- ctxt->regs->orig_ax = ctxt->fi.error_code;
-
- switch (trapnr) {
- case X86_TRAP_GP:
- exc_general_protection(ctxt->regs, error_code);
- break;
- case X86_TRAP_UD:
- exc_invalid_op(ctxt->regs);
- break;
- case X86_TRAP_PF:
- write_cr2(ctxt->fi.cr2);
- exc_page_fault(ctxt->regs, error_code);
- break;
- case X86_TRAP_AC:
- exc_alignment_check(ctxt->regs, error_code);
- break;
- default:
- pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
- BUG();
- }
-}
-
static __always_inline bool is_vc2_stack(unsigned long sp)
{
return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
@@ -2095,6 +2188,50 @@ static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
return cc_info;
}
+static __head void setup_svsm(struct cc_blob_sev_info *cc_info)
+{
+ struct svsm_call call = {};
+ int ret;
+ u64 pa;
+
+ /*
+ * Record the SVSM Calling Area address (CAA) if the guest is not
+ * running at VMPL0. The CA will be used to communicate with the
+ * SVSM to perform the SVSM services.
+ */
+ setup_svsm_ca(cc_info);
+
+ /* Nothing to do if not running under an SVSM. */
+ if (!vmpl)
+ return;
+
+ /*
+ * It is very early in the boot and the kernel is running identity
+ * mapped but without having adjusted the pagetables to where the
+ * kernel was loaded (physbase), so the get the CA address using
+ * RIP-relative addressing.
+ */
+ pa = (u64)&RIP_REL_REF(boot_svsm_ca_page);
+
+ /*
+ * Switch over to the boot SVSM CA while the current CA is still
+ * addressable. There is no GHCB at this point so use the MSR protocol.
+ *
+ * SVSM_CORE_REMAP_CA call:
+ * RAX = 0 (Protocol=0, CallID=0)
+ * RCX = New CA GPA
+ */
+ call.caa = __svsm_get_caa();
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
+ call.rcx = pa;
+ ret = svsm_protocol(&call);
+ if (ret != SVSM_SUCCESS)
+ panic("Can't remap the SVSM CA, ret=%#x (%d)\n", ret, ret);
+
+ boot_svsm_caa = (struct svsm_ca *)pa;
+ boot_svsm_caa_pa = pa;
+}
+
bool __head snp_init(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
@@ -2108,12 +2245,7 @@ bool __head snp_init(struct boot_params *bp)
setup_cpuid_table(cc_info);
- /*
- * Record the SVSM Calling Area address (CAA) if the guest is not
- * running at VMPL0. The CA will be used to communicate with the
- * SVSM to perform the SVSM services.
- */
- setup_svsm_ca(cc_info);
+ setup_svsm(cc_info);
/*
* The CC blob will be used later to access the secrets page. Cache
@@ -2306,3 +2438,12 @@ void sev_show_status(void)
}
pr_cont("\n");
}
+
+void __init snp_remap_svsm_ca(void)
+{
+ if (!vmpl)
+ return;
+
+ /* Update the CAA to a proper kernel address */
+ boot_svsm_caa = &boot_svsm_ca_page;
+}
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 422602f6039b..6155020e4d2d 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -2,7 +2,7 @@
/*
* AMD Memory Encryption Support
*
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016-2024 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <[email protected]>
*/
@@ -510,6 +510,12 @@ void __init sme_early_init(void)
*/
x86_init.resources.dmi_setup = snp_dmi_setup;
}
+
+ /*
+ * Switch the SVSM CA mapping (if active) from identity mapped to
+ * kernel mapped.
+ */
+ snp_remap_svsm_ca();
}
void __init mem_encrypt_free_decrypted_mem(void)
--
2.43.2
This one would need multiple review mails.
Lemme make this part 1.
On Wed, Apr 24, 2024 at 10:58:01AM -0500, Tom Lendacky wrote:
> arch/x86/include/asm/sev-common.h | 13 ++
> arch/x86/include/asm/sev.h | 32 +++++
> arch/x86/include/uapi/asm/svm.h | 1 +
> arch/x86/kernel/sev-shared.c | 94 +++++++++++++-
> arch/x86/kernel/sev.c | 207 +++++++++++++++++++++++++-----
Ok, now would be as good time as any to start moving the SEV guest bits
to where we want them to live:
arch/x86/coco/sev/
so pls add the new SVSM guest support bits there:
arch/x86/coco/sev/svsm.c
arch/x86/coco/sev/svsm-shared.c
I guess.
And things which touch sev.c and sev-shared.c will have to add patches
which move bits to the new location.
> arch/x86/mm/mem_encrypt_amd.c | 8 +-
> 6 files changed, 320 insertions(+), 35 deletions(-)
>
> diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
> index 1225744a069b..4cc716660d4b 100644
> --- a/arch/x86/include/asm/sev-common.h
> +++ b/arch/x86/include/asm/sev-common.h
> @@ -96,6 +96,19 @@ enum psc_op {
> /* GHCBData[63:32] */ \
> (((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
>
> +/* GHCB Run at VMPL Request/Response */
Run?
> +#define GHCB_MSR_VMPL_REQ 0x016
> +#define GHCB_MSR_VMPL_REQ_LEVEL(v) \
> + /* GHCBData[39:32] */ \
> + (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \
> + /* GHCBDdata[11:0] */ \
> + GHCB_MSR_VMPL_REQ)
> +
> +#define GHCB_MSR_VMPL_RESP 0x017
> +#define GHCB_MSR_VMPL_RESP_VAL(v) \
> + /* GHCBData[63:32] */ \
> + (((u64)(v) & GENMASK_ULL(63, 32)) >> 32)
> +
> /* GHCB Hypervisor Feature Request/Response */
> #define GHCB_MSR_HV_FT_REQ 0x080
> #define GHCB_MSR_HV_FT_RESP 0x081
..
> diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
> index 46ea4e5e118a..6f57eb804e70 100644
> --- a/arch/x86/kernel/sev-shared.c
> +++ b/arch/x86/kernel/sev-shared.c
> @@ -18,9 +18,11 @@
> #define sev_printk_rtl(fmt, ...) printk_ratelimited(fmt, ##__VA_ARGS__)
> #else
> #undef WARN
> -#define WARN(condition, format...) (!!(condition))
> +#define WARN(condition, format...) (!!(condition))
> #define sev_printk(fmt, ...)
> #define sev_printk_rtl(fmt, ...)
> +#undef vc_forward_exception
> +#define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n")
> #endif
>
> /*
> @@ -244,6 +246,96 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt
> return ES_VMM_ERROR;
> }
>
> +static __always_inline void issue_svsm_call(struct svsm_call *call, u8 *pending)
svsm_issue_call()
I guess.
> +{
> + /*
> + * Issue the VMGEXIT to run the SVSM:
.. to call the SVSM:" I guess.
> + * - Load the SVSM register state (RAX, RCX, RDX, R8 and R9)
> + * - Set the CA call pending field to 1
> + * - Issue VMGEXIT
> + * - Save the SVSM return register state (RAX, RCX, RDX, R8 and R9)
> + * - Perform atomic exchange of the CA call pending field
> + */
That goes above the function name.
> + asm volatile("mov %9, %%r8\n\t"
> + "mov %10, %%r9\n\t"
> + "movb $1, %11\n\t"
> + "rep; vmmcall\n\t"
> + "mov %%r8, %3\n\t"
> + "mov %%r9, %4\n\t"
> + "xchgb %5, %11\n\t"
> + : "=a" (call->rax_out), "=c" (call->rcx_out), "=d" (call->rdx_out),
> + "=m" (call->r8_out), "=m" (call->r9_out),
> + "+r" (*pending)
> + : "a" (call->rax), "c" (call->rcx), "d" (call->rdx),
> + "r" (call->r8), "r" (call->r9),
> + "m" (call->caa->call_pending)
> + : "r8", "r9", "memory");
> +}
Btw, where are we documenting this calling convention?
Anyway, I think you can do it this way (pasting the whole thing for
easier review):
static __always_inline void issue_svsm_call(struct svsm_call *call, u8 *pending)
{
register unsigned long r8 asm("r8") = call->r8;
register unsigned long r9 asm("r9") = call->r9;
call->caa->call_pending = 1;
/*
* Issue the VMGEXIT to run the SVSM:
* - Load the SVSM register state (RAX, RCX, RDX, R8 and R9)
* - Set the CA call pending field to 1
* - Issue VMGEXIT
* - Save the SVSM return register state (RAX, RCX, RDX, R8 and R9)
* - Perform atomic exchange of the CA call pending field
*/
asm volatile("rep; vmmcall\n\t"
"xchgb %[pending], %[call_pending]"
: "=a" (call->rax_out),
"=c" (call->rcx_out),
"=d" (call->rdx_out),
[pending] "+r" (*pending),
"+r" (r8),
"+r" (r9)
: "a" (call->rax),
"c" (call->rcx),
"d" (call->rdx),
[call_pending] "m" (call->caa->call_pending)
: "memory");
call->r8_out = r8;
call->r9_out = r9;
}
I *think* the asm is the same but it needs more looking in detail. It
probably could be simplified even more.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
Ok,
I think this is very readable and clear what's going on:
static __always_inline void issue_svsm_call(struct svsm_call *call, u8 *pending)
{
register unsigned long rax asm("rax") = call->rax;
register unsigned long rcx asm("rcx") = call->rcx;
register unsigned long rdx asm("rdx") = call->rdx;
register unsigned long r8 asm("r8") = call->r8;
register unsigned long r9 asm("r9") = call->r9;
call->caa->call_pending = 1;
asm volatile("rep; vmmcall\n\t"
: "+r" (rax), "+r" (rcx), "+r" (rdx), "+r" (r8), "+r" (r9));
xchg(pending, 1);
call->rax_out = rax;
call->rcx_out = rcx;
call->rdx_out = rdx;
call->r8_out = r8;
call->r9_out = r9;
}
and the asm looks ok but the devil's in the detail.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 5/6/24 05:09, Borislav Petkov wrote:
> Ok,
>
> I think this is very readable and clear what's going on:
I'll test it out.
>
> static __always_inline void issue_svsm_call(struct svsm_call *call, u8 *pending)
> {
> register unsigned long rax asm("rax") = call->rax;
> register unsigned long rcx asm("rcx") = call->rcx;
> register unsigned long rdx asm("rdx") = call->rdx;
> register unsigned long r8 asm("r8") = call->r8;
> register unsigned long r9 asm("r9") = call->r9;
>
> call->caa->call_pending = 1;
>
> asm volatile("rep; vmmcall\n\t"
> : "+r" (rax), "+r" (rcx), "+r" (rdx), "+r" (r8), "+r" (r9));
>
> xchg(pending, 1);
This isn't quite right. The xchg has to occur between pending and
call->caa->call_pending.
Thanks,
Tom
>
> call->rax_out = rax;
> call->rcx_out = rcx;
> call->rdx_out = rdx;
> call->r8_out = r8;
> call->r9_out = r9;
> }
>
> and the asm looks ok but the devil's in the detail.
>
On Mon, May 06, 2024 at 08:14:34AM -0500, Tom Lendacky wrote:
> This isn't quite right. The xchg has to occur between pending and
> call->caa->call_pending.
Right, you can fix that up in your next revision. :-)
I only wanted to show the idea of how to make this a lot more
readable.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Wed, Apr 24, 2024 at 10:58:01AM -0500, Tom Lendacky wrote:
> +static int __svsm_msr_protocol(struct svsm_call *call)
All those protocol functions need a verb in the name:
__svsm_do_msr_protocol
__svsm_do_ghcb_protocol
or something slicker than the silly "do".
> +{
> + u64 val, resp;
> + u8 pending;
> +
> + val = sev_es_rd_ghcb_msr();
> +
> + sev_es_wr_ghcb_msr(GHCB_MSR_VMPL_REQ_LEVEL(0));
> +
> + pending = 0;
Do that assignment above, at declaration.
> + issue_svsm_call(call, &pending);
> +
> + resp = sev_es_rd_ghcb_msr();
> +
> + sev_es_wr_ghcb_msr(val);
The MSR SVSM protocol is supposed to restore the MSR? A comment pls.
> +
> + if (pending)
> + return -EINVAL;
> +
> + if (GHCB_RESP_CODE(resp) != GHCB_MSR_VMPL_RESP)
> + return -EINVAL;
> +
> + if (GHCB_MSR_VMPL_RESP_VAL(resp) != 0)
s/ != 0//
> + return -EINVAL;
> +
> + return call->rax_out;
rax_out is u64, your function returns int because of the negative
values. But then it'll truncate the u64 in the success case.
> +}
> +
> +static int __svsm_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
> +{
> + struct es_em_ctxt ctxt;
> + u8 pending;
> +
> + vc_ghcb_invalidate(ghcb);
> +
> + /* Fill in protocol and format specifiers */
> + ghcb->protocol_version = ghcb_version;
> + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
> +
> + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
> + ghcb_set_sw_exit_info_1(ghcb, 0);
> + ghcb_set_sw_exit_info_2(ghcb, 0);
> +
> + sev_es_wr_ghcb_msr(__pa(ghcb));
> +
> + pending = 0;
As above.
> + issue_svsm_call(call, &pending);
> +
> + if (pending)
> + return -EINVAL;
> +
> + switch (verify_exception_info(ghcb, &ctxt)) {
> + case ES_OK:
> + break;
> + case ES_EXCEPTION:
> + vc_forward_exception(&ctxt);
> + fallthrough;
> + default:
> + return -EINVAL;
> + }
> +
> + return call->rax_out;
As above.
> +}
> +
> static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
> struct es_em_ctxt *ctxt,
> u64 exit_code, u64 exit_info_1,
> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index a500df807e79..21f3cc40d662 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -133,8 +133,13 @@ struct ghcb_state {
> struct ghcb *ghcb;
> };
>
> +/* For early boot SVSM communication */
> +static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
> +
> static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
> static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
> +static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
> +static DEFINE_PER_CPU(u64, svsm_caa_pa);
>
> struct sev_config {
> __u64 debug : 1,
> @@ -150,6 +155,15 @@ struct sev_config {
> */
> ghcbs_initialized : 1,
>
> + /*
> + * A flag used to indicate when the per-CPU SVSM CA is to be
s/A flag used //
and above.
> + * used instead of the boot SVSM CA.
> + *
> + * For APs, the per-CPU SVSM CA is created as part of the AP
> + * bringup, so this flag can be used globally for the BSP and APs.
> + */
> + cas_initialized : 1,
> +
> __reserved : 62;
> };
>
> @@ -572,9 +586,42 @@ static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t si
> return ES_EXCEPTION;
> }
>
> +static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
> +{
> + long error_code = ctxt->fi.error_code;
> + int trapnr = ctxt->fi.vector;
> +
> + ctxt->regs->orig_ax = ctxt->fi.error_code;
> +
> + switch (trapnr) {
> + case X86_TRAP_GP:
> + exc_general_protection(ctxt->regs, error_code);
> + break;
> + case X86_TRAP_UD:
> + exc_invalid_op(ctxt->regs);
> + break;
> + case X86_TRAP_PF:
> + write_cr2(ctxt->fi.cr2);
> + exc_page_fault(ctxt->regs, error_code);
> + break;
> + case X86_TRAP_AC:
> + exc_alignment_check(ctxt->regs, error_code);
> + break;
> + default:
> + pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
> + BUG();
> + }
> +}
> +
> /* Include code shared with pre-decompression boot stage */
> #include "sev-shared.c"
>
> +static struct svsm_ca *__svsm_get_caa(void)
Why the "__" prefix?
I gon't see a svsm_get_caa() variant...
> +{
> + return sev_cfg.cas_initialized ? this_cpu_read(svsm_caa)
> + : boot_svsm_caa;
> +}
> +
> static noinstr void __sev_put_ghcb(struct ghcb_state *state)
> {
> struct sev_es_runtime_data *data;
> @@ -600,6 +647,42 @@ static noinstr void __sev_put_ghcb(struct ghcb_state *state)
> }
> }
>
> +static int svsm_protocol(struct svsm_call *call)
svsm_issue_protocol_call() or whatnot...
Btw, can all this svsm guest gunk live simply in a separate file? Or is
it going to need a lot of sev.c stuff exported through an internal.h
header?
Either that or prefix all SVSM handling functions with "svsm_" so that
there's at least some visibility which is which.
> +{
> + struct ghcb_state state;
> + unsigned long flags;
> + struct ghcb *ghcb;
> + int ret;
> +
> + /*
> + * This can be called very early in the boot, use native functions in
> + * order to avoid paravirt issues.
> + */
> + flags = native_save_fl();
> + if (flags & X86_EFLAGS_IF)
> + native_irq_disable();
Uff, conditional locking.
What's wrong with using local_irq_save/local_irq_restore?
> +
> + if (sev_cfg.ghcbs_initialized)
> + ghcb = __sev_get_ghcb(&state);
> + else if (boot_ghcb)
> + ghcb = boot_ghcb;
> + else
> + ghcb = NULL;
> +
> + do {
> + ret = ghcb ? __svsm_ghcb_protocol(ghcb, call)
> + : __svsm_msr_protocol(call);
> + } while (ret == SVSM_ERR_BUSY);
> +
> + if (sev_cfg.ghcbs_initialized)
> + __sev_put_ghcb(&state);
> +
> + if (flags & X86_EFLAGS_IF)
> + native_irq_enable();
> +
> + return ret;
> +}
..
> @@ -2095,6 +2188,50 @@ static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
> return cc_info;
> }
>
> +static __head void setup_svsm(struct cc_blob_sev_info *cc_info)
svsm_setup
> +{
> + struct svsm_call call = {};
> + int ret;
> + u64 pa;
> +
> + /*
> + * Record the SVSM Calling Area address (CAA) if the guest is not
> + * running at VMPL0. The CA will be used to communicate with the
> + * SVSM to perform the SVSM services.
> + */
> + setup_svsm_ca(cc_info);
> +
> + /* Nothing to do if not running under an SVSM. */
> + if (!vmpl)
> + return;
You set up stuff above and now you bail out?
Judging by setup_svsm_ca() you don't really need that vmpl var but you
can check
if (!boot_svsm_caa)
return;
to determine whether a SVSM was detected.
> +
> + /*
> + * It is very early in the boot and the kernel is running identity
> + * mapped but without having adjusted the pagetables to where the
> + * kernel was loaded (physbase), so the get the CA address using
s/the //
> + * RIP-relative addressing.
> + */
> + pa = (u64)&RIP_REL_REF(boot_svsm_ca_page);
> +
> + /*
> + * Switch over to the boot SVSM CA while the current CA is still
> + * addressable. There is no GHCB at this point so use the MSR protocol.
> + *
> + * SVSM_CORE_REMAP_CA call:
> + * RAX = 0 (Protocol=0, CallID=0)
> + * RCX = New CA GPA
> + */
> + call.caa = __svsm_get_caa();
> + call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
> + call.rcx = pa;
> + ret = svsm_protocol(&call);
> + if (ret != SVSM_SUCCESS)
> + panic("Can't remap the SVSM CA, ret=%#x (%d)\n", ret, ret);
> +
> + boot_svsm_caa = (struct svsm_ca *)pa;
> + boot_svsm_caa_pa = pa;
Huh, setup_svsm_ca() already assigned those...
> bool __head snp_init(struct boot_params *bp)
> {
> struct cc_blob_sev_info *cc_info;
> @@ -2108,12 +2245,7 @@ bool __head snp_init(struct boot_params *bp)
>
> setup_cpuid_table(cc_info);
>
> - /*
> - * Record the SVSM Calling Area address (CAA) if the guest is not
> - * running at VMPL0. The CA will be used to communicate with the
> - * SVSM to perform the SVSM services.
> - */
> - setup_svsm_ca(cc_info);
> + setup_svsm(cc_info);
>
> /*
> * The CC blob will be used later to access the secrets page. Cache
> @@ -2306,3 +2438,12 @@ void sev_show_status(void)
> }
> pr_cont("\n");
> }
> +
> +void __init snp_remap_svsm_ca(void)
> +{
> + if (!vmpl)
> + return;
> +
> + /* Update the CAA to a proper kernel address */
> + boot_svsm_caa = &boot_svsm_ca_page;
> +}
> diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
> index 422602f6039b..6155020e4d2d 100644
> --- a/arch/x86/mm/mem_encrypt_amd.c
> +++ b/arch/x86/mm/mem_encrypt_amd.c
> @@ -2,7 +2,7 @@
> /*
> * AMD Memory Encryption Support
> *
> - * Copyright (C) 2016 Advanced Micro Devices, Inc.
> + * Copyright (C) 2016-2024 Advanced Micro Devices, Inc.
Are we doing that now?
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 5/8/24 03:05, Borislav Petkov wrote:
> On Wed, Apr 24, 2024 at 10:58:01AM -0500, Tom Lendacky wrote:
>> +static int __svsm_msr_protocol(struct svsm_call *call)
>
> All those protocol functions need a verb in the name:
>
> __svsm_do_msr_protocol
> __svsm_do_ghcb_protocol
>
> or something slicker than the silly "do".
ok, maybe __perform_svsm_msr_protocol or such.
>
>> +{
>> + u64 val, resp;
>> + u8 pending;
>> +
>> + val = sev_es_rd_ghcb_msr();
>> +
>> + sev_es_wr_ghcb_msr(GHCB_MSR_VMPL_REQ_LEVEL(0));
>> +
>> + pending = 0;
>
> Do that assignment above, at declaration.
Ok
>
>> + issue_svsm_call(call, &pending);
>> +
>> + resp = sev_es_rd_ghcb_msr();
>> +
>> + sev_es_wr_ghcb_msr(val);
>
> The MSR SVSM protocol is supposed to restore the MSR? A comment pls.
Ok, I'll put it above the read where val is assigned.
>
>> +
>> + if (pending)
>> + return -EINVAL;
>> +
>> + if (GHCB_RESP_CODE(resp) != GHCB_MSR_VMPL_RESP)
>> + return -EINVAL;
>> +
>> + if (GHCB_MSR_VMPL_RESP_VAL(resp) != 0)
>
> s/ != 0//
Ok
>
>> + return -EINVAL;
>> +
>> + return call->rax_out;
>
> rax_out is u64, your function returns int because of the negative
> values. But then it'll truncate the u64 in the success case.
I'll fix that and return 0 here and check the rax_out value on the return.
>
>> +}
>> +
>> +static int __svsm_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
>> +{
>> + struct es_em_ctxt ctxt;
>> + u8 pending;
>> +
>> + vc_ghcb_invalidate(ghcb);
>> +
>> + /* Fill in protocol and format specifiers */
>> + ghcb->protocol_version = ghcb_version;
>> + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
>> +
>> + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
>> + ghcb_set_sw_exit_info_1(ghcb, 0);
>> + ghcb_set_sw_exit_info_2(ghcb, 0);
>> +
>> + sev_es_wr_ghcb_msr(__pa(ghcb));
>> +
>> + pending = 0;
>
> As above.
Yep
>
>> + issue_svsm_call(call, &pending);
>> +
>> + if (pending)
>> + return -EINVAL;
>> +
>> + switch (verify_exception_info(ghcb, &ctxt)) {
>> + case ES_OK:
>> + break;
>> + case ES_EXCEPTION:
>> + vc_forward_exception(&ctxt);
>> + fallthrough;
>> + default:
>> + return -EINVAL;
>> + }
>> +
>> + return call->rax_out;
>
> As above.
Ditto.
>
>> +}
>> +
>> static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
>> struct es_em_ctxt *ctxt,
>> u64 exit_code, u64 exit_info_1,
>> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
>> index a500df807e79..21f3cc40d662 100644
>> --- a/arch/x86/kernel/sev.c
>> +++ b/arch/x86/kernel/sev.c
>> @@ -133,8 +133,13 @@ struct ghcb_state {
>> struct ghcb *ghcb;
>> };
>>
>> +/* For early boot SVSM communication */
>> +static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
>> +
>> static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
>> static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
>> +static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
>> +static DEFINE_PER_CPU(u64, svsm_caa_pa);
>>
>> struct sev_config {
>> __u64 debug : 1,
>> @@ -150,6 +155,15 @@ struct sev_config {
>> */
>> ghcbs_initialized : 1,
>>
>> + /*
>> + * A flag used to indicate when the per-CPU SVSM CA is to be
>
> s/A flag used //
>
> and above.
Sure
>
>> + * used instead of the boot SVSM CA.
>> + *
>> + * For APs, the per-CPU SVSM CA is created as part of the AP
>> + * bringup, so this flag can be used globally for the BSP and APs.
>> + */
>> + cas_initialized : 1,
>> +
>> __reserved : 62;
>> };
>>
>> @@ -572,9 +586,42 @@ static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t si
>> return ES_EXCEPTION;
>> }
>>
>> +static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
>> +{
>> + long error_code = ctxt->fi.error_code;
>> + int trapnr = ctxt->fi.vector;
>> +
>> + ctxt->regs->orig_ax = ctxt->fi.error_code;
>> +
>> + switch (trapnr) {
>> + case X86_TRAP_GP:
>> + exc_general_protection(ctxt->regs, error_code);
>> + break;
>> + case X86_TRAP_UD:
>> + exc_invalid_op(ctxt->regs);
>> + break;
>> + case X86_TRAP_PF:
>> + write_cr2(ctxt->fi.cr2);
>> + exc_page_fault(ctxt->regs, error_code);
>> + break;
>> + case X86_TRAP_AC:
>> + exc_alignment_check(ctxt->regs, error_code);
>> + break;
>> + default:
>> + pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
>> + BUG();
>> + }
>> +}
>> +
>> /* Include code shared with pre-decompression boot stage */
>> #include "sev-shared.c"
>>
>> +static struct svsm_ca *__svsm_get_caa(void)
>
> Why the "__" prefix?
>
> I gon't see a svsm_get_caa() variant...
There probably was at one point and I missed removing the "__" prefix.
I'll take care of that.
>
>> +{
>> + return sev_cfg.cas_initialized ? this_cpu_read(svsm_caa)
>> + : boot_svsm_caa;
>> +}
>> +
>> static noinstr void __sev_put_ghcb(struct ghcb_state *state)
>> {
>> struct sev_es_runtime_data *data;
>> @@ -600,6 +647,42 @@ static noinstr void __sev_put_ghcb(struct ghcb_state *state)
>> }
>> }
>>
>> +static int svsm_protocol(struct svsm_call *call)
>
> svsm_issue_protocol_call() or whatnot...
>
> Btw, can all this svsm guest gunk live simply in a separate file? Or is
> it going to need a lot of sev.c stuff exported through an internal.h
> header?
>
> Either that or prefix all SVSM handling functions with "svsm_" so that
> there's at least some visibility which is which.
There's quite a bit of interaction so I'll make sure to prefix everything.
>
>> +{
>> + struct ghcb_state state;
>> + unsigned long flags;
>> + struct ghcb *ghcb;
>> + int ret;
>> +
>> + /*
>> + * This can be called very early in the boot, use native functions in
>> + * order to avoid paravirt issues.
>> + */
>> + flags = native_save_fl();
>> + if (flags & X86_EFLAGS_IF)
>> + native_irq_disable();
>
> Uff, conditional locking.
>
> What's wrong with using local_irq_save/local_irq_restore?
The paravirt versions of local_irq_save and local_irq_restore can't be
used as early as this routine is called.
>
>> +
>> + if (sev_cfg.ghcbs_initialized)
>> + ghcb = __sev_get_ghcb(&state);
>> + else if (boot_ghcb)
>> + ghcb = boot_ghcb;
>> + else
>> + ghcb = NULL;
>> +
>> + do {
>> + ret = ghcb ? __svsm_ghcb_protocol(ghcb, call)
>> + : __svsm_msr_protocol(call);
>> + } while (ret == SVSM_ERR_BUSY);
>> +
>> + if (sev_cfg.ghcbs_initialized)
>> + __sev_put_ghcb(&state);
>> +
>> + if (flags & X86_EFLAGS_IF)
>> + native_irq_enable();
>> +
>> + return ret;
>> +}
>
> ...
>
>> @@ -2095,6 +2188,50 @@ static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
>> return cc_info;
>> }
>>
>> +static __head void setup_svsm(struct cc_blob_sev_info *cc_info)
>
> svsm_setup
Yep
>
>> +{
>> + struct svsm_call call = {};
>> + int ret;
>> + u64 pa;
>> +
>> + /*
>> + * Record the SVSM Calling Area address (CAA) if the guest is not
>> + * running at VMPL0. The CA will be used to communicate with the
>> + * SVSM to perform the SVSM services.
>> + */
>> + setup_svsm_ca(cc_info);
>> +
>> + /* Nothing to do if not running under an SVSM. */
>> + if (!vmpl)
>> + return;
>
> You set up stuff above and now you bail out?
setup_svsm_ca() is what sets the vmpl variable. So nothing will have
been setup if the VMPL is zero, in which case we don't continue on.
>
> Judging by setup_svsm_ca() you don't really need that vmpl var but you
> can check
>
> if (!boot_svsm_caa)
> return;
>
> to determine whether a SVSM was detected.
Yes, but the vmpl var will be used for attestation requests, sysfs, etc.
>
>> +
>> + /*
>> + * It is very early in the boot and the kernel is running identity
>> + * mapped but without having adjusted the pagetables to where the
>> + * kernel was loaded (physbase), so the get the CA address using
>
> s/the //
hmmm... CA is Calling Area, so
"get the Calling Area address using RIP-relative"
"get Calling Area address using RIP-relative..."
The first one sound more correct to me.
>
>> + * RIP-relative addressing.
>> + */
>> + pa = (u64)&RIP_REL_REF(boot_svsm_ca_page);
>> +
>> + /*
>> + * Switch over to the boot SVSM CA while the current CA is still
>> + * addressable. There is no GHCB at this point so use the MSR protocol.
>> + *
>> + * SVSM_CORE_REMAP_CA call:
>> + * RAX = 0 (Protocol=0, CallID=0)
>> + * RCX = New CA GPA
>> + */
>> + call.caa = __svsm_get_caa();
>> + call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
>> + call.rcx = pa;
>> + ret = svsm_protocol(&call);
>> + if (ret != SVSM_SUCCESS)
>> + panic("Can't remap the SVSM CA, ret=%#x (%d)\n", ret, ret);
>> +
>> + boot_svsm_caa = (struct svsm_ca *)pa;
>> + boot_svsm_caa_pa = pa;
>
> Huh, setup_svsm_ca() already assigned those...
setup_svsm_ca() assigned the ones from the secrets page. The kernel now
switches to using its own CA.
>
>> bool __head snp_init(struct boot_params *bp)
>> {
>> struct cc_blob_sev_info *cc_info;
>> @@ -2108,12 +2245,7 @@ bool __head snp_init(struct boot_params *bp)
>>
>> setup_cpuid_table(cc_info);
>>
>> - /*
>> - * Record the SVSM Calling Area address (CAA) if the guest is not
>> - * running at VMPL0. The CA will be used to communicate with the
>> - * SVSM to perform the SVSM services.
>> - */
>> - setup_svsm_ca(cc_info);
>> + setup_svsm(cc_info);
>>
>> /*
>> * The CC blob will be used later to access the secrets page. Cache
>> @@ -2306,3 +2438,12 @@ void sev_show_status(void)
>> }
>> pr_cont("\n");
>> }
>> +
>> +void __init snp_remap_svsm_ca(void)
>> +{
>> + if (!vmpl)
>> + return;
>> +
>> + /* Update the CAA to a proper kernel address */
>> + boot_svsm_caa = &boot_svsm_ca_page;
>> +}
>> diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
>> index 422602f6039b..6155020e4d2d 100644
>> --- a/arch/x86/mm/mem_encrypt_amd.c
>> +++ b/arch/x86/mm/mem_encrypt_amd.c
>> @@ -2,7 +2,7 @@
>> /*
>> * AMD Memory Encryption Support
>> *
>> - * Copyright (C) 2016 Advanced Micro Devices, Inc.
>> + * Copyright (C) 2016-2024 Advanced Micro Devices, Inc.
>
> Are we doing that now?
Looks like a leftover change... will remove it.
Thanks,
Tom
>
On 5/8/24 14:13, Tom Lendacky wrote:
> On 5/8/24 03:05, Borislav Petkov wrote:
>> On Wed, Apr 24, 2024 at 10:58:01AM -0500, Tom Lendacky wrote:
>>> diff --git a/arch/x86/mm/mem_encrypt_amd.c
>>> b/arch/x86/mm/mem_encrypt_amd.c
>>> index 422602f6039b..6155020e4d2d 100644
>>> --- a/arch/x86/mm/mem_encrypt_amd.c
>>> +++ b/arch/x86/mm/mem_encrypt_amd.c
>>> @@ -2,7 +2,7 @@
>>> /*
>>> * AMD Memory Encryption Support
>>> *
>>> - * Copyright (C) 2016 Advanced Micro Devices, Inc.
>>> + * Copyright (C) 2016-2024 Advanced Micro Devices, Inc.
>>
>> Are we doing that now?
>
> Looks like a leftover change... will remove it.
Ah, there is a change in there. I guess I misunderstood your question
because the change was cutoff from the reply. Anyway, yes, as far as I
know, we should be doing that.
Thanks,
Tom
>
> Thanks,
> Tom
>
>>
On Wed, May 08, 2024 at 02:13:17PM -0500, Tom Lendacky wrote:
> ok, maybe __perform_svsm_msr_protocol or such.
We'll bikeshed it in the coming weeks.
> There's quite a bit of interaction so I'll make sure to prefix everything.
Ack.
> The paravirt versions of local_irq_save and local_irq_restore can't be used
> as early as this routine is called.
tglx says you should do native_local_irq_save()/.._restore() helpers
just like the arch_local_irq_save()/..._restore() ones but use only
native_ functions without the paravirt gunk.
In a prepatch pls.
> > > + struct svsm_call call = {};
> > > + int ret;
> > > + u64 pa;
> > > +
> > > + /*
> > > + * Record the SVSM Calling Area address (CAA) if the guest is not
> > > + * running at VMPL0. The CA will be used to communicate with the
> > > + * SVSM to perform the SVSM services.
> > > + */
> > > + setup_svsm_ca(cc_info);
> > > +
> > > + /* Nothing to do if not running under an SVSM. */
> > > + if (!vmpl)
> > > + return;
> >
> > You set up stuff above and now you bail out?
>
> setup_svsm_ca() is what sets the vmpl variable. So nothing will have been
> setup if the VMPL is zero, in which case we don't continue on.
You still assign
/*
* The CA is identity mapped when this routine is called, both by the
* decompressor code and the early kernel code.
*/
boot_svsm_caa = (struct svsm_ca *)caa;
boot_svsm_caa_pa = caa;
regardless of vmpl.
I think you should assign those only when vmpl != 0.
Otherwise the code is confusing.
> > Judging by setup_svsm_ca() you don't really need that vmpl var but you
> > can check
> >
> > if (!boot_svsm_caa)
> > return;
> >
> > to determine whether a SVSM was detected.
>
> Yes, but the vmpl var will be used for attestation requests, sysfs, etc.
I guess that comes later in the patchset...
> > Huh, setup_svsm_ca() already assigned those...
>
> setup_svsm_ca() assigned the ones from the secrets page. The kernel now
> switches to using its own CA.
Comment pls.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 5/8/24 14:58, Borislav Petkov wrote:
> On Wed, May 08, 2024 at 02:13:17PM -0500, Tom Lendacky wrote:
>> ok, maybe __perform_svsm_msr_protocol or such.
>
> We'll bikeshed it in the coming weeks.
:)
>
>> There's quite a bit of interaction so I'll make sure to prefix everything.
>
> Ack.
>
>> The paravirt versions of local_irq_save and local_irq_restore can't be used
>> as early as this routine is called.
>
> tglx says you should do native_local_irq_save()/.._restore() helpers
> just like the arch_local_irq_save()/..._restore() ones but use only
> native_ functions without the paravirt gunk.
>
> In a prepatch pls.
Will do.
>
>>>> + struct svsm_call call = {};
>>>> + int ret;
>>>> + u64 pa;
>>>> +
>>>> + /*
>>>> + * Record the SVSM Calling Area address (CAA) if the guest is not
>>>> + * running at VMPL0. The CA will be used to communicate with the
>>>> + * SVSM to perform the SVSM services.
>>>> + */
>>>> + setup_svsm_ca(cc_info);
>>>> +
>>>> + /* Nothing to do if not running under an SVSM. */
>>>> + if (!vmpl)
>>>> + return;
>>>
>>> You set up stuff above and now you bail out?
>>
>> setup_svsm_ca() is what sets the vmpl variable. So nothing will have been
>> setup if the VMPL is zero, in which case we don't continue on.
>
> You still assign
>
> /*
> * The CA is identity mapped when this routine is called, both by the
> * decompressor code and the early kernel code.
> */
> boot_svsm_caa = (struct svsm_ca *)caa;
> boot_svsm_caa_pa = caa;
>
> regardless of vmpl.
If we're not running at VMPL0 (based on the RMPADJUST check) and if the
SVSM doesn't advertise a non-zero VMPL value, we will self-terminate. So
those values are only set if we are not running at VMPL0 and the SVSM
has provided a non-zero value to us.
I'm going to turn the function into a bool function so that the call
becomes:
if (!svsm_setup_caa(cc_info))
return;
>
> I think you should assign those only when vmpl != 0.
I do. I think you're missing the RMPADJUST check that causes the
function to return early if we're running at VMPL0.
>
> Otherwise the code is confusing.
>
>>> Judging by setup_svsm_ca() you don't really need that vmpl var but you
>>> can check
>>>
>>> if (!boot_svsm_caa)
>>> return;
>>>
>>> to determine whether a SVSM was detected.
>>
>> Yes, but the vmpl var will be used for attestation requests, sysfs, etc.
>
> I guess that comes later in the patchset...
>
>>> Huh, setup_svsm_ca() already assigned those...
>>
>> setup_svsm_ca() assigned the ones from the secrets page. The kernel now
>> switches to using its own CA.
>
> Comment pls.
There's a block comment above it all, but maybe it isn't clear enough.
I'll rework it.
Thanks,
Tom
>
> Thx.
>
On Wed, May 08, 2024 at 03:09:02PM -0500, Tom Lendacky wrote:
> If we're not running at VMPL0 (based on the RMPADJUST check) and if the SVSM
> doesn't advertise a non-zero VMPL value, we will self-terminate. So those
> values are only set if we are not running at VMPL0 and the SVSM has provided
> a non-zero value to us.
>
> I'm going to turn the function into a bool function so that the call
> becomes:
>
> if (!svsm_setup_caa(cc_info))
> return;
Sure, I guess I'm misled by the
if (bla)
sev_es_terminate()
which is a function call but I need to read its name to realize that
after that point we're either terminated or we have all the stuff
required to run on a SVSM.
> I do. I think you're missing the RMPADJUST check that causes the function to
> return early if we're running at VMPL0.
Yap.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette