2022-11-19 03:56:36

by Tianyu Lan

[permalink] [raw]
Subject: [RFC PATCH V2 01/18] x86/sev: Pvalidate memory gab for decompressing kernel

From: Tianyu Lan <[email protected]>

Pvalidate needed pages for decompressing kernel. The E820_TYPE_RAM
entry includes only validated memory. The kernel expects that the
RAM entry's addr is fixed while the entry size is to be extended
to cover addresses to the start of next entry. This patch increases
the RAM entry size to cover all possilble memory addresses until
init_size.

Signed-off-by: Tianyu Lan <[email protected]>
---
arch/x86/boot/compressed/head_64.S | 8 +++
arch/x86/boot/compressed/sev.c | 84 ++++++++++++++++++++++++++++++
2 files changed, 92 insertions(+)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d33f060900d2..818edaf5d0cf 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -348,6 +348,14 @@ SYM_CODE_START(startup_64)
cld
cli

+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /* pvalidate memory on demand if SNP is enabled. */
+ pushq %rsi
+ movq %rsi, %rdi
+ call pvalidate_for_startup_64
+ popq %rsi
+#endif
+
/* Setup data segments. */
xorl %eax, %eax
movl %eax, %ds
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 960968f8bf75..3a5a1ab16095 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -12,8 +12,10 @@
*/
#include "misc.h"

+#include <asm/msr-index.h>
#include <asm/pgtable_types.h>
#include <asm/sev.h>
+#include <asm/svm.h>
#include <asm/trapnr.h>
#include <asm/trap_pf.h>
#include <asm/msr-index.h>
@@ -21,6 +23,7 @@
#include <asm/ptrace.h>
#include <asm/svm.h>
#include <asm/cpuid.h>
+#include <asm/e820/types.h>

#include "error.h"
#include "../msr.h"
@@ -117,6 +120,22 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
/* Include code for early handlers */
#include "../../kernel/sev-shared.c"

+/* Check SEV-SNP via MSR */
+static bool sev_snp_runtime_check(void)
+{
+ unsigned long low, high;
+ u64 val;
+
+ asm volatile("rdmsr\n" : "=a" (low), "=d" (high) :
+ "c" (MSR_AMD64_SEV));
+
+ val = (high << 32) | low;
+ if (val & MSR_AMD64_SEV_SNP_ENABLED)
+ return true;
+
+ return false;
+}
+
static inline bool sev_snp_enabled(void)
{
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
@@ -456,3 +475,68 @@ void sev_prep_identity_maps(unsigned long top_level_pgt)

sev_verify_cbit(top_level_pgt);
}
+
+static void extend_e820_on_demand(struct boot_e820_entry *e820_entry,
+ u64 needed_ram_end)
+{
+ u64 end, paddr;
+ unsigned long eflags;
+ int rc;
+
+ if (!e820_entry)
+ return;
+
+ /* Validated memory must be aligned by PAGE_SIZE. */
+ end = ALIGN(e820_entry->addr + e820_entry->size, PAGE_SIZE);
+ if (needed_ram_end > end && e820_entry->type == E820_TYPE_RAM) {
+ for (paddr = end; paddr < needed_ram_end; paddr += PAGE_SIZE) {
+ rc = pvalidate(paddr, RMP_PG_SIZE_4K, true);
+ if (rc) {
+ error("Failed to validate address.n");
+ return;
+ }
+ }
+ e820_entry->size = needed_ram_end - e820_entry->addr;
+ }
+}
+
+/*
+ * Explicitly pvalidate needed pages for decompressing the kernel.
+ * The E820_TYPE_RAM entry includes only validated memory. The kernel
+ * expects that the RAM entry's addr is fixed while the entry size is to be
+ * extended to cover addresses to the start of next entry.
+ * The function increases the RAM entry size to cover all possible memory
+ * addresses until init_size.
+ * For example, init_end = 0x4000000,
+ * [RAM: 0x0 - 0x0], M[RAM: 0x0 - 0xa0000]
+ * [RSVD: 0xa0000 - 0x10000] [RSVD: 0xa0000 - 0x10000]
+ * [ACPI: 0x10000 - 0x20000] ==> [ACPI: 0x10000 - 0x20000]
+ * [RSVD: 0x800000 - 0x900000] [RSVD: 0x800000 - 0x900000]
+ * [RAM: 0x1000000 - 0x2000000] M[RAM: 0x1000000 - 0x2001000]
+ * [RAM: 0x2001000 - 0x2007000] M[RAM: 0x2001000 - 0x4000000]
+ * Other RAM memory after init_end is pvalidated by ms_hyperv_init_platform
+ */
+__visible void pvalidate_for_startup_64(struct boot_params *boot_params)
+{
+ struct boot_e820_entry *e820_entry;
+ u64 init_end =
+ boot_params->hdr.pref_address + boot_params->hdr.init_size;
+ u8 i, nr_entries = boot_params->e820_entries;
+ u64 needed_end;
+
+ if (!sev_snp_runtime_check())
+ return;
+
+ for (i = 0; i < nr_entries; ++i) {
+ /* Pvalidate memory holes in e820 RAM entries. */
+ e820_entry = &boot_params->e820_table[i];
+ if (i < nr_entries - 1) {
+ needed_end = boot_params->e820_table[i + 1].addr;
+ if (needed_end < e820_entry->addr)
+ error("e820 table is not sorted.\n");
+ } else {
+ needed_end = init_end;
+ }
+ extend_e820_on_demand(e820_entry, needed_end);
+ }
+}
--
2.25.1



2022-11-29 13:12:00

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC PATCH V2 01/18] x86/sev: Pvalidate memory gab for decompressing kernel

On Fri, Nov 18, 2022 at 10:46:15PM -0500, Tianyu Lan wrote:
> Subject: Re: [RFC PATCH V2 01/18] x86/sev: Pvalidate memory gab for decompressing kernel

"gab"?

As in gabber? :-)

> From: Tianyu Lan <[email protected]>
>
> Pvalidate needed pages for decompressing kernel. The E820_TYPE_RAM

"Validate" - let's not start inventing new words. We're barely handling
the existing ones. :)

> entry includes only validated memory. The kernel expects that the
> RAM entry's addr is fixed while the entry size is to be extended

"addr"?

Commit message needs to be english - not a code/english hybrid. Pls be
more diligent here. Commit messages are not write-only.

> to cover addresses to the start of next entry. This patch increases

Avoid having "This patch" or "This commit" in the commit message. It is
tautologically useless.

Also, do

$ git grep 'This patch' Documentation/process

for more details.

Pls check your whole set.

Also, to the tone, from Documentation/process/submitting-patches.rst:

"Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
to do frotz", as if you are giving orders to the codebase to change
its behaviour."

> the RAM entry size to cover all possilble memory addresses until

Unknown word [possilble] in commit message.
Suggestions: ['possible', 'possibly', 'passable', 'plausible', 'assailable', 'pliable', 'passably']

Please introduce a spellchecker into your patch creation workflow.

> init_size.

This whole commit message doesn't tell me a whole lot. Please try
structuring it this way:

Problem is A.

It happens because of B.

Fix it by doing C.

(Potentially do D).

For more detailed info, see
Documentation/process/submitting-patches.rst, Section "2) Describe your
changes".

> Signed-off-by: Tianyu Lan <[email protected]>
> ---
> arch/x86/boot/compressed/head_64.S | 8 +++
> arch/x86/boot/compressed/sev.c | 84 ++++++++++++++++++++++++++++++
> 2 files changed, 92 insertions(+)
>
> diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
> index d33f060900d2..818edaf5d0cf 100644
> --- a/arch/x86/boot/compressed/head_64.S
> +++ b/arch/x86/boot/compressed/head_64.S
> @@ -348,6 +348,14 @@ SYM_CODE_START(startup_64)
> cld
> cli
>
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> + /* pvalidate memory on demand if SNP is enabled. */

So this is going to be executed unconditionally on *every* SNP guest - not
only Hyper-V ones.

Why is that ok?

> + pushq %rsi
> + movq %rsi, %rdi
> + call pvalidate_for_startup_64
> + popq %rsi
> +#endif
> +
> /* Setup data segments. */
> xorl %eax, %eax
> movl %eax, %ds
> diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
> index 960968f8bf75..3a5a1ab16095 100644
> --- a/arch/x86/boot/compressed/sev.c
> +++ b/arch/x86/boot/compressed/sev.c
> @@ -12,8 +12,10 @@
> */
> #include "misc.h"
>
> +#include <asm/msr-index.h>
> #include <asm/pgtable_types.h>
> #include <asm/sev.h>
> +#include <asm/svm.h>
> #include <asm/trapnr.h>
> #include <asm/trap_pf.h>
> #include <asm/msr-index.h>
> @@ -21,6 +23,7 @@
> #include <asm/ptrace.h>
> #include <asm/svm.h>
> #include <asm/cpuid.h>
> +#include <asm/e820/types.h>
>
> #include "error.h"
> #include "../msr.h"
> @@ -117,6 +120,22 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
> /* Include code for early handlers */
> #include "../../kernel/sev-shared.c"
>
> +/* Check SEV-SNP via MSR */
> +static bool sev_snp_runtime_check(void)

Functions need to have a verb in the name.

> +{
> + unsigned long low, high;
> + u64 val;
> +
> + asm volatile("rdmsr\n" : "=a" (low), "=d" (high) :
> + "c" (MSR_AMD64_SEV));
> +
> + val = (high << 32) | low;
> + if (val & MSR_AMD64_SEV_SNP_ENABLED)
> + return true;

There already is a sev_snp_enabled() in that very same file. Did you not
see it?

Why are you even adding such a function?!

> + return false;
> +}
> +
> static inline bool sev_snp_enabled(void)
> {
> return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
> @@ -456,3 +475,68 @@ void sev_prep_identity_maps(unsigned long top_level_pgt)
>
> sev_verify_cbit(top_level_pgt);
> }
> +
> +static void extend_e820_on_demand(struct boot_e820_entry *e820_entry,
> + u64 needed_ram_end)
> +{
> + u64 end, paddr;
> + unsigned long eflags;
> + int rc;
> +
> + if (!e820_entry)
> + return;
> +
> + /* Validated memory must be aligned by PAGE_SIZE. */
> + end = ALIGN(e820_entry->addr + e820_entry->size, PAGE_SIZE);
> + if (needed_ram_end > end && e820_entry->type == E820_TYPE_RAM) {
> + for (paddr = end; paddr < needed_ram_end; paddr += PAGE_SIZE) {
> + rc = pvalidate(paddr, RMP_PG_SIZE_4K, true);
> + if (rc) {
> + error("Failed to validate address.n");
> + return;
> + }
> + }
> + e820_entry->size = needed_ram_end - e820_entry->addr;
> + }
> +}
> +
> +/*
> + * Explicitly pvalidate needed pages for decompressing the kernel.
> + * The E820_TYPE_RAM entry includes only validated memory. The kernel
> + * expects that the RAM entry's addr is fixed while the entry size is to be
> + * extended to cover addresses to the start of next entry.
> + * The function increases the RAM entry size to cover all possible memory

Similar issue as above: you don't need to say "this function" above this
function. IOW, it should say:

"Increase the RAM entry size..."

I.e., imperative mood above.

> + * addresses until init_size.
> + * For example, init_end = 0x4000000,
> + * [RAM: 0x0 - 0x0], M[RAM: 0x0 - 0xa0000]
> + * [RSVD: 0xa0000 - 0x10000] [RSVD: 0xa0000 - 0x10000]
> + * [ACPI: 0x10000 - 0x20000] ==> [ACPI: 0x10000 - 0x20000]
> + * [RSVD: 0x800000 - 0x900000] [RSVD: 0x800000 - 0x900000]
> + * [RAM: 0x1000000 - 0x2000000] M[RAM: 0x1000000 - 0x2001000]
> + * [RAM: 0x2001000 - 0x2007000] M[RAM: 0x2001000 - 0x4000000]

What is this trying to tell me?

That the end range 0x2007000 gets raised to 0x4000000?

Why?

This all sounds like there is some requirement somewhere but nothing
says what that requirement is and why.

> + * Other RAM memory after init_end is pvalidated by ms_hyperv_init_platform
> + */
> +__visible void pvalidate_for_startup_64(struct boot_params *boot_params)

This doesn't do any validation. And yet it has "pvalidate" in the name.

> +{
> + struct boot_e820_entry *e820_entry;
> + u64 init_end =
> + boot_params->hdr.pref_address + boot_params->hdr.init_size;

Nope, we never break lines like that.

> + u8 i, nr_entries = boot_params->e820_entries;
> + u64 needed_end;

The tip-tree preferred ordering of variable declarations at the
beginning of a function is reverse fir tree order::

struct long_struct_name *descriptive_name;
unsigned long foo, bar;
unsigned int tmp;
int ret;

The above is faster to parse than the reverse ordering::

int ret;
unsigned int tmp;
unsigned long foo, bar;
struct long_struct_name *descriptive_name;

And even more so than random ordering::

unsigned long foo, bar;
int ret;
struct long_struct_name *descriptive_name;
unsigned int tmp;

> + if (!sev_snp_runtime_check())
> + return;
> +
> + for (i = 0; i < nr_entries; ++i) {
> + /* Pvalidate memory holes in e820 RAM entries. */
> + e820_entry = &boot_params->e820_table[i];
> + if (i < nr_entries - 1) {
> + needed_end = boot_params->e820_table[i + 1].addr;
> + if (needed_end < e820_entry->addr)
> + error("e820 table is not sorted.\n");
> + } else {
> + needed_end = init_end;
> + }
> + extend_e820_on_demand(e820_entry, needed_end);

Now *this* function does call pvalidate() and yet it doesn't have
"pvalidate" in the name. This all looks real confused.

So first of all, you need to explain *why* you're doing this.

It looks like it is because the guest needs to do the memory validation
by itself because nobody else does that.

If so, this needs to be explained in detail in the commit message.

Also, why is that ok for SNP guests on other hypervisors which get the
memory validated by the boot loader or firmware?

And so on and so on.

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2022-11-29 14:50:15

by Tianyu Lan

[permalink] [raw]
Subject: Re: [RFC PATCH V2 01/18] x86/sev: Pvalidate memory gab for decompressing kernel

On 11/29/2022 8:56 PM, Borislav Petkov wrote:
>> +/* Check SEV-SNP via MSR */
>> +static bool sev_snp_runtime_check(void)
> Functions need to have a verb in the name.
>
>> +{
>> + unsigned long low, high;
>> + u64 val;
>> +
>> + asm volatile("rdmsr\n" : "=a" (low), "=d" (high) :
>> + "c" (MSR_AMD64_SEV));
>> +
>> + val = (high << 32) | low;
>> + if (val & MSR_AMD64_SEV_SNP_ENABLED)
>> + return true;
> There already is a sev_snp_enabled() in that very same file. Did you not
> see it?
>
> Why are you even adding such a function?!
Hi Boris:
Thanks for your review. sev_snp_enabled() is used after
sev_status was initialized in sev_enable() while pvalidate_for_startup_
64() is called before sev_enable(). So add sev_snp_runtime_check() to
check sev snp capability before calling sev_enable().

>
>> + return false;
>> +}
>> +
>> static inline bool sev_snp_enabled(void)
>> {
>> return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
>> @@ -456,3 +475,68 @@ void sev_prep_identity_maps(unsigned long top_level_pgt)
>>
>> sev_verify_cbit(top_level_pgt);
>> }
>> +
>> +static void extend_e820_on_demand(struct boot_e820_entry *e820_entry,
>> + u64 needed_ram_end)
>> +{
>> + u64 end, paddr;
>> + unsigned long eflags;
>> + int rc;
>> +
>> + if (!e820_entry)
>> + return;
>> +
>> + /* Validated memory must be aligned by PAGE_SIZE. */
>> + end = ALIGN(e820_entry->addr + e820_entry->size, PAGE_SIZE);
>> + if (needed_ram_end > end && e820_entry->type == E820_TYPE_RAM) {
>> + for (paddr = end; paddr < needed_ram_end; paddr += PAGE_SIZE) {
>> + rc = pvalidate(paddr, RMP_PG_SIZE_4K, true);
>> + if (rc) {
>> + error("Failed to validate address.n");
>> + return;
>> + }
>> + }
>> + e820_entry->size = needed_ram_end - e820_entry->addr;
>> + }
>> +}
>> +
>> +/*
>> + * Explicitly pvalidate needed pages for decompressing the kernel.
>> + * The E820_TYPE_RAM entry includes only validated memory. The kernel
>> + * expects that the RAM entry's addr is fixed while the entry size is to be
>> + * extended to cover addresses to the start of next entry.
>> + * The function increases the RAM entry size to cover all possible memory
> Similar issue as above: you don't need to say "this function" above this
> function. IOW, it should say:
>
> "Increase the RAM entry size..."
>
> I.e., imperative mood above.
>
>> + * addresses until init_size.
>> + * For example, init_end = 0x4000000,
>> + * [RAM: 0x0 - 0x0], M[RAM: 0x0 - 0xa0000]
>> + * [RSVD: 0xa0000 - 0x10000] [RSVD: 0xa0000 - 0x10000]
>> + * [ACPI: 0x10000 - 0x20000] ==> [ACPI: 0x10000 - 0x20000]
>> + * [RSVD: 0x800000 - 0x900000] [RSVD: 0x800000 - 0x900000]
>> + * [RAM: 0x1000000 - 0x2000000] M[RAM: 0x1000000 - 0x2001000]
>> + * [RAM: 0x2001000 - 0x2007000] M[RAM: 0x2001000 - 0x4000000]
> What is this trying to tell me?
>
> That the end range 0x2007000 gets raised to 0x4000000?
>
> Why?
>
> This all sounds like there is some requirement somewhere but nothing
> says what that requirement is and why.
>
>> + * Other RAM memory after init_end is pvalidated by ms_hyperv_init_platform
>> + */
>> +__visible void pvalidate_for_startup_64(struct boot_params *boot_params)
> This doesn't do any validation. And yet it has "pvalidate" in the name.
>
>> +{
>> + struct boot_e820_entry *e820_entry;
>> + u64 init_end =
>> + boot_params->hdr.pref_address + boot_params->hdr.init_size;
> Nope, we never break lines like that.
>
>> + u8 i, nr_entries = boot_params->e820_entries;
>> + u64 needed_end;
> The tip-tree preferred ordering of variable declarations at the
> beginning of a function is reverse fir tree order::
>
> struct long_struct_name *descriptive_name;
> unsigned long foo, bar;
> unsigned int tmp;
> int ret;
>
> The above is faster to parse than the reverse ordering::
>
> int ret;
> unsigned int tmp;
> unsigned long foo, bar;
> struct long_struct_name *descriptive_name;
>
> And even more so than random ordering::
>
> unsigned long foo, bar;
> int ret;
> struct long_struct_name *descriptive_name;
> unsigned int tmp;
>
>> + if (!sev_snp_runtime_check())
>> + return;
>> +
>> + for (i = 0; i < nr_entries; ++i) {
>> + /* Pvalidate memory holes in e820 RAM entries. */
>> + e820_entry = &boot_params->e820_table[i];
>> + if (i < nr_entries - 1) {
>> + needed_end = boot_params->e820_table[i + 1].addr;
>> + if (needed_end < e820_entry->addr)
>> + error("e820 table is not sorted.\n");
>> + } else {
>> + needed_end = init_end;
>> + }
>> + extend_e820_on_demand(e820_entry, needed_end);
> Now*this* function does call pvalidate() and yet it doesn't have
> "pvalidate" in the name. This all looks real confused.
>
> So first of all, you need to explain*why* you're doing this.
>
> It looks like it is because the guest needs to do the memory validation
> by itself because nobody else does that.
>
> If so, this needs to be explained in detail in the commit message.

Yes, I will update in the next version. Thanks for suggestion.

>
> Also, why is that ok for SNP guests on other hypervisors which get the
> memory validated by the boot loader or firmware?

This is for Linux direct boot mode and so it needs to do such check
here. Will fix this in the next version.

2022-11-29 15:38:09

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC PATCH V2 01/18] x86/sev: Pvalidate memory gab for decompressing kernel

On Tue, Nov 29, 2022 at 10:42:48PM +0800, Tianyu Lan wrote:
> Thanks for your review. sev_snp_enabled() is used after sev_status
> was initialized in sev_enable() while pvalidate_for_startup_ 64() is
> called before sev_enable().

Then you're going to have to change the code so that sev_status is
initialized before you need it. And not break others in the process.

And lemme save you some time - I won't accept sloppy code. You need to
integrate the functionality you need in the code paths properly - not
bolt it on in complete disregard of the flow just because it is easier.

> This is for Linux direct boot mode and so it needs to do such check
> here.

I don't know what "Linux direct boot mode" is so until you define it
properly and explain everything in detail, this is not going anywhere.

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2022-12-06 10:17:44

by Gupta, Pankaj

[permalink] [raw]
Subject: Re: [RFC PATCH V2 01/18] x86/sev: Pvalidate memory gab for decompressing kernel

Hi Tianyu,

Some minor nits below.

> Pvalidate needed pages for decompressing kernel. The E820_TYPE_RAM
> entry includes only validated memory. The kernel expects that the
> RAM entry's addr is fixed while the entry size is to be extended
> to cover addresses to the start of next entry. This patch increases
> the RAM entry size to cover all possilble memory addresses until

s/possilble/possible

> init_size.
>
> Signed-off-by: Tianyu Lan <[email protected]>
> ---
> arch/x86/boot/compressed/head_64.S | 8 +++
> arch/x86/boot/compressed/sev.c | 84 ++++++++++++++++++++++++++++++
> 2 files changed, 92 insertions(+)
>
> diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
> index d33f060900d2..818edaf5d0cf 100644
> --- a/arch/x86/boot/compressed/head_64.S
> +++ b/arch/x86/boot/compressed/head_64.S
> @@ -348,6 +348,14 @@ SYM_CODE_START(startup_64)
> cld
> cli
>
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> + /* pvalidate memory on demand if SNP is enabled. */
> + pushq %rsi
> + movq %rsi, %rdi
> + call pvalidate_for_startup_64
> + popq %rsi
> +#endif
> +
> /* Setup data segments. */
> xorl %eax, %eax
> movl %eax, %ds
> diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
> index 960968f8bf75..3a5a1ab16095 100644
> --- a/arch/x86/boot/compressed/sev.c
> +++ b/arch/x86/boot/compressed/sev.c
> @@ -12,8 +12,10 @@
> */
> #include "misc.h"
>
> +#include <asm/msr-index.h>
> #include <asm/pgtable_types.h>
> #include <asm/sev.h>
> +#include <asm/svm.h>
> #include <asm/trapnr.h>
> #include <asm/trap_pf.h>
> #include <asm/msr-index.h>
> @@ -21,6 +23,7 @@
> #include <asm/ptrace.h>
> #include <asm/svm.h>
> #include <asm/cpuid.h>
> +#include <asm/e820/types.h>
>
> #include "error.h"
> #include "../msr.h"
> @@ -117,6 +120,22 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
> /* Include code for early handlers */
> #include "../../kernel/sev-shared.c"
>
> +/* Check SEV-SNP via MSR */
> +static bool sev_snp_runtime_check(void)
> +{
> + unsigned long low, high;
> + u64 val;
> +
> + asm volatile("rdmsr\n" : "=a" (low), "=d" (high) :
> + "c" (MSR_AMD64_SEV));
> +
> + val = (high << 32) | low;
> + if (val & MSR_AMD64_SEV_SNP_ENABLED)
> + return true;
> +
> + return false;
> +}
> +
> static inline bool sev_snp_enabled(void)
> {
> return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
> @@ -456,3 +475,68 @@ void sev_prep_identity_maps(unsigned long top_level_pgt)
>
> sev_verify_cbit(top_level_pgt);
> }
> +
> +static void extend_e820_on_demand(struct boot_e820_entry *e820_entry,
> + u64 needed_ram_end)
> +{
> + u64 end, paddr;
> + unsigned long eflags;
> + int rc;
> +
> + if (!e820_entry)
> + return;
> +
> + /* Validated memory must be aligned by PAGE_SIZE. */
> + end = ALIGN(e820_entry->addr + e820_entry->size, PAGE_SIZE);
> + if (needed_ram_end > end && e820_entry->type == E820_TYPE_RAM) {

maybe "if check" can be used to return from here, would read code better?

> + for (paddr = end; paddr < needed_ram_end; paddr += PAGE_SIZE)
Maybe we could reuse 'pvalidate_pages' here?

> + rc = pvalidate(paddr, RMP_PG_SIZE_4K, true);
> + if (rc) {
> + error("Failed to validate address.n");

Would it make sense to print the cause of failure here?

> + return;
> + }
> + }
> + e820_entry->size = needed_ram_end - e820_entry->addr;
> + }
> +}
> +
> +/*
> + * Explicitly pvalidate needed pages for decompressing the kernel.
> + * The E820_TYPE_RAM entry includes only validated memory. The kernel
> + * expects that the RAM entry's addr is fixed while the entry size is to be
> + * extended to cover addresses to the start of next entry.
> + * The function increases the RAM entry size to cover all possible memory
> + * addresses until init_size.
> + * For example, init_end = 0x4000000,
> + * [RAM: 0x0 - 0x0], M[RAM: 0x0 - 0xa0000]
> + * [RSVD: 0xa0000 - 0x10000] [RSVD: 0xa0000 - 0x10000]
> + * [ACPI: 0x10000 - 0x20000] ==> [ACPI: 0x10000 - 0x20000]
> + * [RSVD: 0x800000 - 0x900000] [RSVD: 0x800000 - 0x900000]
> + * [RAM: 0x1000000 - 0x2000000] M[RAM: 0x1000000 - 0x2001000]
> + * [RAM: 0x2001000 - 0x2007000] M[RAM: 0x2001000 - 0x4000000]
> + * Other RAM memory after init_end is pvalidated by ms_hyperv_init_platform
> + */
> +__visible void pvalidate_for_startup_64(struct boot_params *boot_params)
> +{
> + struct boot_e820_entry *e820_entry;
> + u64 init_end =
> + boot_params->hdr.pref_address + boot_params->hdr.init_size;
> + u8 i, nr_entries = boot_params->e820_entries;
> + u64 needed_end;

Could not very well interpret the name 'needed_end'.

> +
> + if (!sev_snp_runtime_check())
> + return;
> +
> + for (i = 0; i < nr_entries; ++i) {
> + /* Pvalidate memory holes in e820 RAM entries. */

Pvalidate and pvalidate names are used interchangeably in comments.

> + e820_entry = &boot_params->e820_table[i];
> + if (i < nr_entries - 1) {
> + needed_end = boot_params->e820_table[i + 1].addr;
> + if (needed_end < e820_entry->addr)
> + error("e820 table is not sorted.\n");
> + } else {
> + needed_end = init_end;
> + }
> + extend_e820_on_demand(e820_entry, needed_end);
> + }
> +}

2022-12-08 13:43:19

by Tianyu Lan

[permalink] [raw]
Subject: Re: [RFC PATCH V2 01/18] x86/sev: Pvalidate memory gab for decompressing kernel

On 12/6/2022 5:16 PM, Gupta, Pankaj wrote:
> Hi Tianyu,
>
> Some minor nits below.
>
Thanks a lot to review. I will change these in the next version.

>> Pvalidate needed pages for decompressing kernel. The E820_TYPE_RAM
>> entry includes only validated memory. The kernel expects that the
>> RAM entry's addr is fixed while the entry size is to be extended
>> to cover addresses to the start of next entry. This patch increases
>> the RAM entry size to cover all possilble memory addresses until
>
> s/possilble/possible
>
>> init_size.
>>
>> Signed-off-by: Tianyu Lan <[email protected]>
>> ---
>>   arch/x86/boot/compressed/head_64.S |  8 +++
>>   arch/x86/boot/compressed/sev.c     | 84 ++++++++++++++++++++++++++++++
>>   2 files changed, 92 insertions(+)
>>
>> diff --git a/arch/x86/boot/compressed/head_64.S
>> b/arch/x86/boot/compressed/head_64.S
>> index d33f060900d2..818edaf5d0cf 100644
>> --- a/arch/x86/boot/compressed/head_64.S
>> +++ b/arch/x86/boot/compressed/head_64.S
>> @@ -348,6 +348,14 @@ SYM_CODE_START(startup_64)
>>       cld
>>       cli
>> +#ifdef CONFIG_AMD_MEM_ENCRYPT
>> +    /* pvalidate memory on demand if SNP is enabled. */
>> +    pushq    %rsi
>> +    movq    %rsi, %rdi
>> +    call     pvalidate_for_startup_64
>> +    popq    %rsi
>> +#endif
>> +
>>       /* Setup data segments. */
>>       xorl    %eax, %eax
>>       movl    %eax, %ds
>> diff --git a/arch/x86/boot/compressed/sev.c
>> b/arch/x86/boot/compressed/sev.c
>> index 960968f8bf75..3a5a1ab16095 100644
>> --- a/arch/x86/boot/compressed/sev.c
>> +++ b/arch/x86/boot/compressed/sev.c
>> @@ -12,8 +12,10 @@
>>    */
>>   #include "misc.h"
>> +#include <asm/msr-index.h>
>>   #include <asm/pgtable_types.h>
>>   #include <asm/sev.h>
>> +#include <asm/svm.h>
>>   #include <asm/trapnr.h>
>>   #include <asm/trap_pf.h>
>>   #include <asm/msr-index.h>
>> @@ -21,6 +23,7 @@
>>   #include <asm/ptrace.h>
>>   #include <asm/svm.h>
>>   #include <asm/cpuid.h>
>> +#include <asm/e820/types.h>
>>   #include "error.h"
>>   #include "../msr.h"
>> @@ -117,6 +120,22 @@ static enum es_result vc_read_mem(struct
>> es_em_ctxt *ctxt,
>>   /* Include code for early handlers */
>>   #include "../../kernel/sev-shared.c"
>> +/* Check SEV-SNP via MSR */
>> +static bool sev_snp_runtime_check(void)
>> +{
>> +    unsigned long low, high;
>> +    u64 val;
>> +
>> +    asm volatile("rdmsr\n" : "=a" (low), "=d" (high) :
>> +            "c" (MSR_AMD64_SEV));
>> +
>> +    val = (high << 32) | low;
>> +    if (val & MSR_AMD64_SEV_SNP_ENABLED)
>> +        return true;
>> +
>> +    return false;
>> +}
>> +
>>   static inline bool sev_snp_enabled(void)
>>   {
>>       return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
>> @@ -456,3 +475,68 @@ void sev_prep_identity_maps(unsigned long
>> top_level_pgt)
>>       sev_verify_cbit(top_level_pgt);
>>   }
>> +
>> +static void extend_e820_on_demand(struct boot_e820_entry *e820_entry,
>> +                  u64 needed_ram_end)
>> +{
>> +    u64 end, paddr;
>> +    unsigned long eflags;
>> +    int rc;
>> +
>> +    if (!e820_entry)
>> +        return;
>> +
>> +    /* Validated memory must be aligned by PAGE_SIZE. */
>> +    end = ALIGN(e820_entry->addr + e820_entry->size, PAGE_SIZE);
>> +    if (needed_ram_end > end && e820_entry->type == E820_TYPE_RAM) {
>
> maybe "if check" can be used to return from here, would read code better?
>
>> +        for (paddr = end; paddr < needed_ram_end; paddr += PAGE_SIZE)
> Maybe we could reuse 'pvalidate_pages' here?
>
>> +            rc = pvalidate(paddr, RMP_PG_SIZE_4K, true);
>> +            if (rc) {
>> +                error("Failed to validate address.n");
>
> Would it make sense to print the cause of failure here?
>
>> +                return;
>> +            }
>> +        }
>> +        e820_entry->size = needed_ram_end - e820_entry->addr;
>> +    }
>> +}
>> +
>> +/*
>> + * Explicitly pvalidate needed pages for decompressing the kernel.
>> + * The E820_TYPE_RAM entry includes only validated memory. The kernel
>> + * expects that the RAM entry's addr is fixed while the entry size is
>> to be
>> + * extended to cover addresses to the start of next entry.
>> + * The function increases the RAM entry size to cover all possible
>> memory
>> + * addresses until init_size.
>> + * For example,  init_end = 0x4000000,
>> + * [RAM: 0x0 - 0x0],                       M[RAM: 0x0 - 0xa0000]
>> + * [RSVD: 0xa0000 - 0x10000]                [RSVD: 0xa0000 - 0x10000]
>> + * [ACPI: 0x10000 - 0x20000]      ==>       [ACPI: 0x10000 - 0x20000]
>> + * [RSVD: 0x800000 - 0x900000]              [RSVD: 0x800000 - 0x900000]
>> + * [RAM: 0x1000000 - 0x2000000]            M[RAM: 0x1000000 - 0x2001000]
>> + * [RAM: 0x2001000 - 0x2007000]            M[RAM: 0x2001000 - 0x4000000]
>> + * Other RAM memory after init_end is pvalidated by
>> ms_hyperv_init_platform
>> + */
>> +__visible void pvalidate_for_startup_64(struct boot_params *boot_params)
>> +{
>> +    struct boot_e820_entry *e820_entry;
>> +    u64 init_end =
>> +        boot_params->hdr.pref_address + boot_params->hdr.init_size;
>> +    u8 i, nr_entries = boot_params->e820_entries;
>> +    u64 needed_end;
>
> Could not very well interpret the name 'needed_end'.
>
>> +
>> +    if (!sev_snp_runtime_check())
>> +        return;
>> +
>> +    for (i = 0; i < nr_entries; ++i) {
>> +        /* Pvalidate memory holes in e820 RAM entries. */
>
> Pvalidate and pvalidate names are used interchangeably in comments.
>
>> +        e820_entry = &boot_params->e820_table[i];
>> +        if (i < nr_entries - 1) {
>> +            needed_end = boot_params->e820_table[i + 1].addr;
>> +            if (needed_end < e820_entry->addr)
>> +                error("e820 table is not sorted.\n");
>> +        } else {
>> +            needed_end = init_end;
>> +        }
>> +        extend_e820_on_demand(e820_entry, needed_end);
>> +    }
>> +}
>

2022-12-28 19:44:24

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [RFC PATCH V2 01/18] x86/sev: Pvalidate memory gab for decompressing kernel

From: Tianyu Lan <[email protected]> Sent: Tuesday, November 29, 2022 6:43 AM
>
> On 11/29/2022 8:56 PM, Borislav Petkov wrote:
> >> +/* Check SEV-SNP via MSR */
> >> +static bool sev_snp_runtime_check(void)
> > Functions need to have a verb in the name.
> >
> >> +{
> >> + unsigned long low, high;
> >> + u64 val;
> >> +
> >> + asm volatile("rdmsr\n" : "=a" (low), "=d" (high) :
> >> + "c" (MSR_AMD64_SEV));
> >> +
> >> + val = (high << 32) | low;
> >> + if (val & MSR_AMD64_SEV_SNP_ENABLED)
> >> + return true;
> > There already is a sev_snp_enabled() in that very same file. Did you not
> > see it?
> >
> > Why are you even adding such a function?!
>
> Hi Boris:
> Thanks for your review. sev_snp_enabled() is used after
> sev_status was initialized in sev_enable() while pvalidate_for_startup_
> 64() is called before sev_enable(). So add sev_snp_runtime_check() to
> check sev snp capability before calling sev_enable().

I understand needing to find out if SEV-SNP is enabled before sev_enable()
has set the value of sev_status. Unfortunately, just checking the
SEV_SNP_ENABLED bit in MSR_AMD64_SEV isn't sufficient. sev_enable()
correctly does a more complex check. The CPUID bit is checked first, and
the MSR is read only if the CPUID bit indicating SEV is set. The reason for
this behavior is described in the commit message for 009767dbf42a.
Furthermore, even if the MSR is safe to read, just checking the
SEV_SNP_ENABLED bit isn't sufficient because that bit is "1" in a vTOM VM.
The MSR check would need to be that SEV_SNP_ENABLED is set, and
VTOM_ENABLED is not set.

Michael

>
> >
> >> + return false;
> >> +}
> >> +
> >> static inline bool sev_snp_enabled(void)
> >> {
> >> return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
> >> @@ -456,3 +475,68 @@ void sev_prep_identity_maps(unsigned long top_level_pgt)
> >>
> >> sev_verify_cbit(top_level_pgt);
> >> }
> >> +
> >> +static void extend_e820_on_demand(struct boot_e820_entry *e820_entry,
> >> + u64 needed_ram_end)
> >> +{
> >> + u64 end, paddr;
> >> + unsigned long eflags;
> >> + int rc;
> >> +
> >> + if (!e820_entry)
> >> + return;
> >> +
> >> + /* Validated memory must be aligned by PAGE_SIZE. */
> >> + end = ALIGN(e820_entry->addr + e820_entry->size, PAGE_SIZE);
> >> + if (needed_ram_end > end && e820_entry->type == E820_TYPE_RAM) {
> >> + for (paddr = end; paddr < needed_ram_end; paddr += PAGE_SIZE) {
> >> + rc = pvalidate(paddr, RMP_PG_SIZE_4K, true);
> >> + if (rc) {
> >> + error("Failed to validate address.n");
> >> + return;
> >> + }
> >> + }
> >> + e820_entry->size = needed_ram_end - e820_entry->addr;
> >> + }
> >> +}
> >> +
> >> +/*
> >> + * Explicitly pvalidate needed pages for decompressing the kernel.
> >> + * The E820_TYPE_RAM entry includes only validated memory. The kernel
> >> + * expects that the RAM entry's addr is fixed while the entry size is to be
> >> + * extended to cover addresses to the start of next entry.
> >> + * The function increases the RAM entry size to cover all possible memory
> > Similar issue as above: you don't need to say "this function" above this
> > function. IOW, it should say:
> >
> > "Increase the RAM entry size..."
> >
> > I.e., imperative mood above.
> >
> >> + * addresses until init_size.
> >> + * For example, init_end = 0x4000000,
> >> + * [RAM: 0x0 - 0x0], M[RAM: 0x0 - 0xa0000]
> >> + * [RSVD: 0xa0000 - 0x10000] [RSVD: 0xa0000 - 0x10000]
> >> + * [ACPI: 0x10000 - 0x20000] ==> [ACPI: 0x10000 - 0x20000]
> >> + * [RSVD: 0x800000 - 0x900000] [RSVD: 0x800000 - 0x900000]
> >> + * [RAM: 0x1000000 - 0x2000000] M[RAM: 0x1000000 - 0x2001000]
> >> + * [RAM: 0x2001000 - 0x2007000] M[RAM: 0x2001000 - 0x4000000]
> > What is this trying to tell me?
> >
> > That the end range 0x2007000 gets raised to 0x4000000?
> >
> > Why?
> >
> > This all sounds like there is some requirement somewhere but nothing
> > says what that requirement is and why.
> >
> >> + * Other RAM memory after init_end is pvalidated by ms_hyperv_init_platform
> >> + */
> >> +__visible void pvalidate_for_startup_64(struct boot_params *boot_params)
> > This doesn't do any validation. And yet it has "pvalidate" in the name.
> >
> >> +{
> >> + struct boot_e820_entry *e820_entry;
> >> + u64 init_end =
> >> + boot_params->hdr.pref_address + boot_params->hdr.init_size;
> > Nope, we never break lines like that.
> >
> >> + u8 i, nr_entries = boot_params->e820_entries;
> >> + u64 needed_end;
> > The tip-tree preferred ordering of variable declarations at the
> > beginning of a function is reverse fir tree order::
> >
> > struct long_struct_name *descriptive_name;
> > unsigned long foo, bar;
> > unsigned int tmp;
> > int ret;
> >
> > The above is faster to parse than the reverse ordering::
> >
> > int ret;
> > unsigned int tmp;
> > unsigned long foo, bar;
> > struct long_struct_name *descriptive_name;
> >
> > And even more so than random ordering::
> >
> > unsigned long foo, bar;
> > int ret;
> > struct long_struct_name *descriptive_name;
> > unsigned int tmp;
> >
> >> + if (!sev_snp_runtime_check())
> >> + return;
> >> +
> >> + for (i = 0; i < nr_entries; ++i) {
> >> + /* Pvalidate memory holes in e820 RAM entries. */
> >> + e820_entry = &boot_params->e820_table[i];
> >> + if (i < nr_entries - 1) {
> >> + needed_end = boot_params->e820_table[i + 1].addr;
> >> + if (needed_end < e820_entry->addr)
> >> + error("e820 table is not sorted.\n");
> >> + } else {
> >> + needed_end = init_end;
> >> + }
> >> + extend_e820_on_demand(e820_entry, needed_end);
> > Now*this* function does call pvalidate() and yet it doesn't have
> > "pvalidate" in the name. This all looks real confused.
> >
> > So first of all, you need to explain*why* you're doing this.
> >
> > It looks like it is because the guest needs to do the memory validation
> > by itself because nobody else does that.
> >
> > If so, this needs to be explained in detail in the commit message.
>
> Yes, I will update in the next version. Thanks for suggestion.
>
> >
> > Also, why is that ok for SNP guests on other hypervisors which get the
> > memory validated by the boot loader or firmware?
>
> This is for Linux direct boot mode and so it needs to do such check
> here. Will fix this in the next version.