The firmware will pre-accept the memory used to run the stub. But, the
stub is responsible for accepting the memory into which it decompresses
the main kernel. Accept memory just before decompression starts.
The stub is also responsible for choosing a physical address in which to
place the decompressed kernel image. The KASLR mechanism will randomize
this physical address. Since the accepted memory region is relatively
small, KASLR would be quite ineffective if it only used the pre-accepted
area (EFI_CONVENTIONAL_MEMORY). Ensure that KASLR randomizes among the
entire physical address space by also including EFI_UNACCEPTED_MEMORY.
Signed-off-by: Kirill A. Shutemov <[email protected]>
Reviewed-by: Liam Merwick <[email protected]>
Reviewed-by: Tom Lendacky <[email protected]>
---
arch/x86/boot/compressed/efi.h | 10 +++++++++
arch/x86/boot/compressed/kaslr.c | 35 +++++++++++++++++++++-----------
arch/x86/boot/compressed/mem.c | 31 ++++++++++++++++++++++++++++
arch/x86/boot/compressed/misc.c | 7 +++++++
arch/x86/boot/compressed/misc.h | 6 ++++++
5 files changed, 77 insertions(+), 12 deletions(-)
diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
index 7db2f41b54cd..866c0af8b5b9 100644
--- a/arch/x86/boot/compressed/efi.h
+++ b/arch/x86/boot/compressed/efi.h
@@ -16,6 +16,7 @@ typedef guid_t efi_guid_t __aligned(__alignof__(u32));
#define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
#define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
#define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42)
+#define LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID EFI_GUID(0xd5d1de3c, 0x105c, 0x44f9, 0x9e, 0xa9, 0xbc, 0xef, 0x98, 0x12, 0x00, 0x31)
#define EFI32_LOADER_SIGNATURE "EL32"
#define EFI64_LOADER_SIGNATURE "EL64"
@@ -32,6 +33,7 @@ typedef struct {
} efi_table_hdr_t;
#define EFI_CONVENTIONAL_MEMORY 7
+#define EFI_UNACCEPTED_MEMORY 15
#define EFI_MEMORY_MORE_RELIABLE \
((u64)0x0000000000010000ULL) /* higher reliability */
@@ -104,6 +106,14 @@ struct efi_setup_data {
u64 reserved[8];
};
+struct efi_unaccepted_memory {
+ u32 version;
+ u32 unit_size;
+ u64 phys_base;
+ u64 size;
+ unsigned long bitmap[];
+};
+
static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right)
{
return memcmp(&left, &right, sizeof (efi_guid_t));
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 454757fbdfe5..749f0fe7e446 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -672,6 +672,28 @@ static bool process_mem_region(struct mem_vector *region,
}
#ifdef CONFIG_EFI
+
+/*
+ * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are
+ * guaranteed to be free.
+ *
+ * It is more conservative in picking free memory than the EFI spec allows:
+ *
+ * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory
+ * and thus available to place the kernel image into, but in practice there's
+ * firmware where using that memory leads to crashes.
+ */
+static inline bool memory_type_is_free(efi_memory_desc_t *md)
+{
+ if (md->type == EFI_CONVENTIONAL_MEMORY)
+ return true;
+
+ if (md->type == EFI_UNACCEPTED_MEMORY)
+ return IS_ENABLED(CONFIG_UNACCEPTED_MEMORY);
+
+ return false;
+}
+
/*
* Returns true if we processed the EFI memmap, which we prefer over the E820
* table if it is available.
@@ -716,18 +738,7 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
for (i = 0; i < nr_desc; i++) {
md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
- /*
- * Here we are more conservative in picking free memory than
- * the EFI spec allows:
- *
- * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
- * free memory and thus available to place the kernel image into,
- * but in practice there's firmware where using that memory leads
- * to crashes.
- *
- * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
- */
- if (md->type != EFI_CONVENTIONAL_MEMORY)
+ if (!memory_type_is_free(md))
continue;
if (efi_soft_reserve_enabled() &&
diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
index 67594fcb11d9..4ecf26576a77 100644
--- a/arch/x86/boot/compressed/mem.c
+++ b/arch/x86/boot/compressed/mem.c
@@ -1,9 +1,40 @@
// SPDX-License-Identifier: GPL-2.0-only
#include "error.h"
+#include "misc.h"
void arch_accept_memory(phys_addr_t start, phys_addr_t end)
{
/* Platform-specific memory-acceptance call goes here */
error("Cannot accept memory");
}
+
+void init_unaccepted_memory(void)
+{
+ guid_t guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID;
+ struct efi_unaccepted_memory *unaccepted_table;
+ unsigned long cfg_table_pa;
+ unsigned int cfg_table_len;
+ enum efi_type et;
+ int ret;
+
+ et = efi_get_type(boot_params);
+ if (et == EFI_TYPE_NONE)
+ return;
+
+ ret = efi_get_conf_table(boot_params, &cfg_table_pa, &cfg_table_len);
+ if (ret)
+ error("EFI config table not found.");
+
+ unaccepted_table = (void *)efi_find_vendor_table(boot_params,
+ cfg_table_pa,
+ cfg_table_len,
+ guid);
+ if (!unaccepted_table)
+ return;
+
+ if (unaccepted_table->version != 1)
+ error("Unknown version of unaccepted memory table\n");
+
+ set_unaccepted_table(unaccepted_table);
+}
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 014ff222bf4b..36535a3753f5 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -455,6 +455,13 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#endif
debug_putstr("\nDecompressing Linux... ");
+
+ if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) {
+ debug_putstr("Accepting memory... ");
+ init_unaccepted_memory();
+ accept_memory(__pa(output), __pa(output) + needed_size);
+ }
+
__decompress(input_data, input_len, NULL, NULL, output, output_len,
NULL, error);
entry_offset = parse_elf(output);
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 2f155a0e3041..e1a0b49e0ed2 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -247,4 +247,10 @@ static inline unsigned long efi_find_vendor_table(struct boot_params *bp,
}
#endif /* CONFIG_EFI */
+void init_unaccepted_memory(void);
+
+/* Implemented in EFI stub */
+void set_unaccepted_table(struct efi_unaccepted_memory *table);
+void accept_memory(phys_addr_t start, phys_addr_t end);
+
#endif /* BOOT_COMPRESSED_MISC_H */
--
2.39.3
On Thu, Jun 01, 2023 at 09:25:38PM +0300, Kirill A. Shutemov wrote:
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index 454757fbdfe5..749f0fe7e446 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -672,6 +672,28 @@ static bool process_mem_region(struct mem_vector *region,
> }
>
> #ifdef CONFIG_EFI
> +
> +/*
> + * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are
> + * guaranteed to be free.
> + *
> + * It is more conservative in picking free memory than the EFI spec allows:
"Pick free memory more conservatively than the EFI spec allows:
EFI_BOOT_SERVICES_ ..."
> + *
> + * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory
> + * and thus available to place the kernel image into, but in practice there's
> + * firmware where using that memory leads to crashes.
... because that firmware still scratches into that memory or why?
> + */
> +static inline bool memory_type_is_free(efi_memory_desc_t *md)
> +{
> + if (md->type == EFI_CONVENTIONAL_MEMORY)
> + return true;
> +
> + if (md->type == EFI_UNACCEPTED_MEMORY)
> + return IS_ENABLED(CONFIG_UNACCEPTED_MEMORY);
Make it plan and simple:
if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) &&
md->type == EFI_UNACCEPTED_MEMORY)
return true;
> +
> + return false;
> +}
> +
> /*
> * Returns true if we processed the EFI memmap, which we prefer over the E820
> * table if it is available.
> @@ -716,18 +738,7 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
> for (i = 0; i < nr_desc; i++) {
> md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
>
> - /*
> - * Here we are more conservative in picking free memory than
> - * the EFI spec allows:
> - *
> - * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
> - * free memory and thus available to place the kernel image into,
> - * but in practice there's firmware where using that memory leads
> - * to crashes.
> - *
> - * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
> - */
> - if (md->type != EFI_CONVENTIONAL_MEMORY)
> + if (!memory_type_is_free(md))
> continue;
>
> if (efi_soft_reserve_enabled() &&
> diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
> index 67594fcb11d9..4ecf26576a77 100644
> --- a/arch/x86/boot/compressed/mem.c
> +++ b/arch/x86/boot/compressed/mem.c
> @@ -1,9 +1,40 @@
> // SPDX-License-Identifier: GPL-2.0-only
>
> #include "error.h"
> +#include "misc.h"
>
> void arch_accept_memory(phys_addr_t start, phys_addr_t end)
> {
> /* Platform-specific memory-acceptance call goes here */
> error("Cannot accept memory");
> }
> +
> +void init_unaccepted_memory(void)
> +{
> + guid_t guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID;
An additional space after the "=".
> + struct efi_unaccepted_memory *unaccepted_table;
> + unsigned long cfg_table_pa;
> + unsigned int cfg_table_len;
> + enum efi_type et;
> + int ret;
> +
> + et = efi_get_type(boot_params);
> + if (et == EFI_TYPE_NONE)
> + return;
> +
> + ret = efi_get_conf_table(boot_params, &cfg_table_pa, &cfg_table_len);
> + if (ret)
> + error("EFI config table not found.");
What's the point in erroring out here?
> + unaccepted_table = (void *)efi_find_vendor_table(boot_params,
> + cfg_table_pa,
> + cfg_table_len,
> + guid);
> + if (!unaccepted_table)
> + return;
> +
> + if (unaccepted_table->version != 1)
> + error("Unknown version of unaccepted memory table\n");
> +
> + set_unaccepted_table(unaccepted_table);
Why is this a function at all and not a simple assignment?
> diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
> index 014ff222bf4b..36535a3753f5 100644
> --- a/arch/x86/boot/compressed/misc.c
> +++ b/arch/x86/boot/compressed/misc.c
> @@ -455,6 +455,13 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
> #endif
>
> debug_putstr("\nDecompressing Linux... ");
> +
> + if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) {
> + debug_putstr("Accepting memory... ");
This needs to happen...
> + init_unaccepted_memory();
... after the init, after the init has parsed the config table and has
found unaccepted memory.
If not, you don't need to issue anything as that would be wrong.
> + accept_memory(__pa(output), __pa(output) + needed_size);
> + }
> +
> __decompress(input_data, input_len, NULL, NULL, output, output_len,
> NULL, error);
> entry_offset = parse_elf(output);
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Fri, Jun 02, 2023 at 04:06:41PM +0200, Borislav Petkov wrote:
> On Thu, Jun 01, 2023 at 09:25:38PM +0300, Kirill A. Shutemov wrote:
> > diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> > index 454757fbdfe5..749f0fe7e446 100644
> > --- a/arch/x86/boot/compressed/kaslr.c
> > +++ b/arch/x86/boot/compressed/kaslr.c
> > @@ -672,6 +672,28 @@ static bool process_mem_region(struct mem_vector *region,
> > }
> >
> > #ifdef CONFIG_EFI
> > +
> > +/*
> > + * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are
> > + * guaranteed to be free.
> > + *
> > + * It is more conservative in picking free memory than the EFI spec allows:
>
> "Pick free memory more conservatively than the EFI spec allows:
> EFI_BOOT_SERVICES_ ..."
Okay.
> > + *
> > + * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory
> > + * and thus available to place the kernel image into, but in practice there's
> > + * firmware where using that memory leads to crashes.
>
> ... because that firmware still scratches into that memory or why?
I moved the existing comment. I don't have have any context beyond that.
Relevant commit: 0982adc74673 ("x86/boot/KASLR: Work around firmware bugs
by excluding EFI_BOOT_SERVICES_* and EFI_LOADER_* from KASLR's choice")
Ard, do you have anything to add here?
> > + */
> > +static inline bool memory_type_is_free(efi_memory_desc_t *md)
> > +{
> > + if (md->type == EFI_CONVENTIONAL_MEMORY)
> > + return true;
> > +
> > + if (md->type == EFI_UNACCEPTED_MEMORY)
> > + return IS_ENABLED(CONFIG_UNACCEPTED_MEMORY);
>
> Make it plan and simple:
>
> if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) &&
> md->type == EFI_UNACCEPTED_MEMORY)
> return true;
I don't see why it is simpler. It looks unnecessary noisy to me.
But okay.
> > +
> > + return false;
> > +}
> > +
> > /*
> > * Returns true if we processed the EFI memmap, which we prefer over the E820
> > * table if it is available.
> > @@ -716,18 +738,7 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
> > for (i = 0; i < nr_desc; i++) {
> > md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
> >
> > - /*
> > - * Here we are more conservative in picking free memory than
> > - * the EFI spec allows:
> > - *
> > - * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
> > - * free memory and thus available to place the kernel image into,
> > - * but in practice there's firmware where using that memory leads
> > - * to crashes.
> > - *
> > - * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
> > - */
> > - if (md->type != EFI_CONVENTIONAL_MEMORY)
> > + if (!memory_type_is_free(md))
> > continue;
> >
> > if (efi_soft_reserve_enabled() &&
> > diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
> > index 67594fcb11d9..4ecf26576a77 100644
> > --- a/arch/x86/boot/compressed/mem.c
> > +++ b/arch/x86/boot/compressed/mem.c
> > @@ -1,9 +1,40 @@
> > // SPDX-License-Identifier: GPL-2.0-only
> >
> > #include "error.h"
> > +#include "misc.h"
> >
> > void arch_accept_memory(phys_addr_t start, phys_addr_t end)
> > {
> > /* Platform-specific memory-acceptance call goes here */
> > error("Cannot accept memory");
> > }
> > +
> > +void init_unaccepted_memory(void)
> > +{
> > + guid_t guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID;
>
> An additional space after the "=".
Okay.
> > + struct efi_unaccepted_memory *unaccepted_table;
> > + unsigned long cfg_table_pa;
> > + unsigned int cfg_table_len;
> > + enum efi_type et;
> > + int ret;
> > +
> > + et = efi_get_type(boot_params);
> > + if (et == EFI_TYPE_NONE)
> > + return;
> > +
> > + ret = efi_get_conf_table(boot_params, &cfg_table_pa, &cfg_table_len);
> > + if (ret)
> > + error("EFI config table not found.");
>
> What's the point in erroring out here?
Configuration table suppose to be present, even if unaccepted memory is
not supported. Something is very wrong if it is missing.
I will downgrade it warn().
> > + unaccepted_table = (void *)efi_find_vendor_table(boot_params,
> > + cfg_table_pa,
> > + cfg_table_len,
> > + guid);
> > + if (!unaccepted_table)
> > + return;
> > +
> > + if (unaccepted_table->version != 1)
> > + error("Unknown version of unaccepted memory table\n");
> > +
> > + set_unaccepted_table(unaccepted_table);
>
> Why is this a function at all and not a simple assignment?
I wanted to keep unaccepted_table private to the libstub/unaccepted_memory.c.
The setter provides a good spot for documentation to guide unaccepted
memory enablers for other archs.
Still want replace it with direct assignment?
>
> > diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
> > index 014ff222bf4b..36535a3753f5 100644
> > --- a/arch/x86/boot/compressed/misc.c
> > +++ b/arch/x86/boot/compressed/misc.c
> > @@ -455,6 +455,13 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
> > #endif
> >
> > debug_putstr("\nDecompressing Linux... ");
> > +
> > + if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) {
> > + debug_putstr("Accepting memory... ");
>
> This needs to happen...
>
> > + init_unaccepted_memory();
>
> ... after the init, after the init has parsed the config table and has
> found unaccepted memory.
>
> If not, you don't need to issue anything as that would be wrong.
Okay, I will make init_unaccepted_memory() return true if unaccepted
memory is present and hide defined it always-false for !UNACCEPTED_MEMORY.
So this hunk will look this way:
if (init_unaccepted_memory()) {
debug_putstr("Accepting memory... ");
accept_memory(__pa(output), __pa(output) + needed_size);
}
--
Kiryl Shutsemau / Kirill A. Shutemov
On Fri, 2 Jun 2023 at 17:37, Kirill A. Shutemov
<[email protected]> wrote:
>
> On Fri, Jun 02, 2023 at 04:06:41PM +0200, Borislav Petkov wrote:
> > On Thu, Jun 01, 2023 at 09:25:38PM +0300, Kirill A. Shutemov wrote:
> > > diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> > > index 454757fbdfe5..749f0fe7e446 100644
> > > --- a/arch/x86/boot/compressed/kaslr.c
> > > +++ b/arch/x86/boot/compressed/kaslr.c
> > > @@ -672,6 +672,28 @@ static bool process_mem_region(struct mem_vector *region,
> > > }
> > >
> > > #ifdef CONFIG_EFI
> > > +
> > > +/*
> > > + * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are
> > > + * guaranteed to be free.
> > > + *
> > > + * It is more conservative in picking free memory than the EFI spec allows:
> >
> > "Pick free memory more conservatively than the EFI spec allows:
> > EFI_BOOT_SERVICES_ ..."
>
> Okay.
>
> > > + *
> > > + * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory
> > > + * and thus available to place the kernel image into, but in practice there's
> > > + * firmware where using that memory leads to crashes.
> >
> > ... because that firmware still scratches into that memory or why?
>
> I moved the existing comment. I don't have have any context beyond that.
>
> Relevant commit: 0982adc74673 ("x86/boot/KASLR: Work around firmware bugs
> by excluding EFI_BOOT_SERVICES_* and EFI_LOADER_* from KASLR's choice")
>
> Ard, do you have anything to add here?
>
The problem is that on x86, there is buggy vendor/OEM EFI code that
registers for internal events that trigger when SetVirtualAddressMap()
is called, and assume that at that point, EfiBootServicesData memory
regions have not been touched by the loader yet, which is probably
true if you are booting Windows.
So on x86, the kernel proper also preserves these regions until after
it calls SetVirtualAddressMap() (efi_free_boot_services() in
arch/x86/platform/efi/quirks.c)
So for the same reason, this code needs to disregard those regions as well.
> > > + */
> > > +static inline bool memory_type_is_free(efi_memory_desc_t *md)
> > > +{
> > > + if (md->type == EFI_CONVENTIONAL_MEMORY)
> > > + return true;
> > > +
> > > + if (md->type == EFI_UNACCEPTED_MEMORY)
> > > + return IS_ENABLED(CONFIG_UNACCEPTED_MEMORY);
> >
> > Make it plan and simple:
> >
> > if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) &&
> > md->type == EFI_UNACCEPTED_MEMORY)
> > return true;
>
> I don't see why it is simpler. It looks unnecessary noisy to me.
>
> But okay.
>
> > > +
> > > + return false;
> > > +}
> > > +
> > > /*
> > > * Returns true if we processed the EFI memmap, which we prefer over the E820
> > > * table if it is available.
> > > @@ -716,18 +738,7 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
> > > for (i = 0; i < nr_desc; i++) {
> > > md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
> > >
> > > - /*
> > > - * Here we are more conservative in picking free memory than
> > > - * the EFI spec allows:
> > > - *
> > > - * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
> > > - * free memory and thus available to place the kernel image into,
> > > - * but in practice there's firmware where using that memory leads
> > > - * to crashes.
> > > - *
> > > - * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
> > > - */
> > > - if (md->type != EFI_CONVENTIONAL_MEMORY)
> > > + if (!memory_type_is_free(md))
> > > continue;
> > >
> > > if (efi_soft_reserve_enabled() &&
> > > diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
> > > index 67594fcb11d9..4ecf26576a77 100644
> > > --- a/arch/x86/boot/compressed/mem.c
> > > +++ b/arch/x86/boot/compressed/mem.c
> > > @@ -1,9 +1,40 @@
> > > // SPDX-License-Identifier: GPL-2.0-only
> > >
> > > #include "error.h"
> > > +#include "misc.h"
> > >
> > > void arch_accept_memory(phys_addr_t start, phys_addr_t end)
> > > {
> > > /* Platform-specific memory-acceptance call goes here */
> > > error("Cannot accept memory");
> > > }
> > > +
> > > +void init_unaccepted_memory(void)
> > > +{
> > > + guid_t guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID;
> >
> > An additional space after the "=".
>
> Okay.
>
> > > + struct efi_unaccepted_memory *unaccepted_table;
> > > + unsigned long cfg_table_pa;
> > > + unsigned int cfg_table_len;
> > > + enum efi_type et;
> > > + int ret;
> > > +
> > > + et = efi_get_type(boot_params);
> > > + if (et == EFI_TYPE_NONE)
> > > + return;
> > > +
> > > + ret = efi_get_conf_table(boot_params, &cfg_table_pa, &cfg_table_len);
> > > + if (ret)
> > > + error("EFI config table not found.");
> >
> > What's the point in erroring out here?
>
> Configuration table suppose to be present, even if unaccepted memory is
> not supported. Something is very wrong if it is missing.
>
> I will downgrade it warn().
>
> > > + unaccepted_table = (void *)efi_find_vendor_table(boot_params,
> > > + cfg_table_pa,
> > > + cfg_table_len,
> > > + guid);
> > > + if (!unaccepted_table)
> > > + return;
> > > +
> > > + if (unaccepted_table->version != 1)
> > > + error("Unknown version of unaccepted memory table\n");
> > > +
> > > + set_unaccepted_table(unaccepted_table);
> >
> > Why is this a function at all and not a simple assignment?
>
> I wanted to keep unaccepted_table private to the libstub/unaccepted_memory.c.
> The setter provides a good spot for documentation to guide unaccepted
> memory enablers for other archs.
>
> Still want replace it with direct assignment?
>
> >
> > > diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
> > > index 014ff222bf4b..36535a3753f5 100644
> > > --- a/arch/x86/boot/compressed/misc.c
> > > +++ b/arch/x86/boot/compressed/misc.c
> > > @@ -455,6 +455,13 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
> > > #endif
> > >
> > > debug_putstr("\nDecompressing Linux... ");
> > > +
> > > + if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) {
> > > + debug_putstr("Accepting memory... ");
> >
> > This needs to happen...
> >
> > > + init_unaccepted_memory();
> >
> > ... after the init, after the init has parsed the config table and has
> > found unaccepted memory.
> >
> > If not, you don't need to issue anything as that would be wrong.
>
> Okay, I will make init_unaccepted_memory() return true if unaccepted
> memory is present and hide defined it always-false for !UNACCEPTED_MEMORY.
> So this hunk will look this way:
>
> if (init_unaccepted_memory()) {
> debug_putstr("Accepting memory... ");
> accept_memory(__pa(output), __pa(output) + needed_size);
> }
>
> --
> Kiryl Shutsemau / Kirill A. Shutemov
On Fri, Jun 02, 2023 at 06:36:44PM +0300, Kirill A. Shutemov wrote:
> I don't see why it is simpler. It looks unnecessary noisy to me.
Noisy?
I have no clue what you mean.
It is regular:
if (bla && flu)
vs
if (bla)
return flu();
It is about having regular patterns which can be recognized at a quick
glance by those who get to stare at that code constantly.
> Configuration table suppose to be present, even if unaccepted memory is
> not supported. Something is very wrong if it is missing.
I am not sure if it is the decompressor's job to do such validation
- I guess this is something the EFI code should do.
> I will downgrade it warn().
Yes, or simply return here without accepting memory - plain and simple.
> I wanted to keep unaccepted_table private to the libstub/unaccepted_memory.c.
> The setter provides a good spot for documentation to guide unaccepted
> memory enablers for other archs.
>
> Still want replace it with direct assignment?
No clue. Why would you want to keep a variable in the libstub private
which is not even in kernel proper, AFAICT?
> Okay, I will make init_unaccepted_memory() return true if unaccepted
> memory is present and hide defined it always-false for !UNACCEPTED_MEMORY.
> So this hunk will look this way:
>
> if (init_unaccepted_memory()) {
> debug_putstr("Accepting memory... ");
> accept_memory(__pa(output), __pa(output) + needed_size);
> }
Yap, thanks.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Fri, Jun 02, 2023 at 05:59:16PM +0200, Ard Biesheuvel wrote:
> The problem is that on x86, there is buggy vendor/OEM EFI code that
> registers for internal events that trigger when SetVirtualAddressMap()
> is called, and assume that at that point, EfiBootServicesData memory
> regions have not been touched by the loader yet, which is probably
> true if you are booting Windows.
>
> So on x86, the kernel proper also preserves these regions until after
> it calls SetVirtualAddressMap() (efi_free_boot_services() in
> arch/x86/platform/efi/quirks.c)
>
> So for the same reason, this code needs to disregard those regions as well.
I'd like for us to have this explanation in the comment since it is
being touched anyway.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Fri, 2 Jun 2023 at 18:09, Borislav Petkov <[email protected]> wrote:
>
> On Fri, Jun 02, 2023 at 06:36:44PM +0300, Kirill A. Shutemov wrote:
..
> > Configuration table suppose to be present, even if unaccepted memory is
> > not supported. Something is very wrong if it is missing.
>
> I am not sure if it is the decompressor's job to do such validation
> - I guess this is something the EFI code should do.
>
'EFI code' is ambiguous here.
Most of the decompressor code is constructed in a way that permits
- booting 'native EFI' via the EFI stub
- booting 'pseudo-EFI' where GRUB or another Linux/x86 specific
bootloader populates boot_params with all the EFI specific information
(system table, memory map, etc)
This distinction has been abstracted away here, and so we might be
dealing with the second case, and booting from a GRUB that does not
understand accepted memory, but simply copied the EFI memory map
(including unaccepted regions) as it normally does. (Note that the
second case also covers kexec boot, so we do need to support it)
On Fri, Jun 02, 2023 at 06:17:13PM +0200, Ard Biesheuvel wrote:
> 'EFI code' is ambiguous here.
>
> Most of the decompressor code is constructed in a way that permits
> - booting 'native EFI' via the EFI stub
> - booting 'pseudo-EFI' where GRUB or another Linux/x86 specific
> bootloader populates boot_params with all the EFI specific information
> (system table, memory map, etc)
>
> This distinction has been abstracted away here, and so we might be
> dealing with the second case, and booting from a GRUB that does not
> understand accepted memory, but simply copied the EFI memory map
> (including unaccepted regions) as it normally does. (Note that the
> second case also covers kexec boot, so we do need to support it)
Right, I was hoping there to be some glue which sanity-checks
boot_params.efi_info instead relying on users to do so and thus have
a bunch of duplicated code.
So, yes, right after populating the boot_params pointer...
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
Hi Borislav,
Below is updated patch that suppose to address all your concerns.
I also adjusted 3/9 to remove the setter and make the variable globally
visible.
All changes, including fixup for 9/9 has been pushed to the branch
https://github.com/intel/tdx.git guest-unaccepted-memory
Let me know if you see more problems with the patchset.
From ddb1c92d33217b5acd2ec3278f7ed751afd15bd1 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <[email protected]>
Date: Wed, 25 Aug 2021 21:42:51 +0300
Subject: [PATCH] x86/boot/compressed: Handle unaccepted memory
The firmware will pre-accept the memory used to run the stub. But, the
stub is responsible for accepting the memory into which it decompresses
the main kernel. Accept memory just before decompression starts.
The stub is also responsible for choosing a physical address in which to
place the decompressed kernel image. The KASLR mechanism will randomize
this physical address. Since the accepted memory region is relatively
small, KASLR would be quite ineffective if it only used the pre-accepted
area (EFI_CONVENTIONAL_MEMORY). Ensure that KASLR randomizes among the
entire physical address space by also including EFI_UNACCEPTED_MEMORY.
Signed-off-by: Kirill A. Shutemov <[email protected]>
Reviewed-by: Liam Merwick <[email protected]>
Reviewed-by: Tom Lendacky <[email protected]>
---
arch/x86/boot/compressed/efi.h | 10 ++++++++
arch/x86/boot/compressed/kaslr.c | 40 +++++++++++++++++++++----------
arch/x86/boot/compressed/mem.c | 41 ++++++++++++++++++++++++++++++++
arch/x86/boot/compressed/misc.c | 6 +++++
arch/x86/boot/compressed/misc.h | 10 ++++++++
5 files changed, 95 insertions(+), 12 deletions(-)
diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
index 7db2f41b54cd..866c0af8b5b9 100644
--- a/arch/x86/boot/compressed/efi.h
+++ b/arch/x86/boot/compressed/efi.h
@@ -16,6 +16,7 @@ typedef guid_t efi_guid_t __aligned(__alignof__(u32));
#define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
#define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
#define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42)
+#define LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID EFI_GUID(0xd5d1de3c, 0x105c, 0x44f9, 0x9e, 0xa9, 0xbc, 0xef, 0x98, 0x12, 0x00, 0x31)
#define EFI32_LOADER_SIGNATURE "EL32"
#define EFI64_LOADER_SIGNATURE "EL64"
@@ -32,6 +33,7 @@ typedef struct {
} efi_table_hdr_t;
#define EFI_CONVENTIONAL_MEMORY 7
+#define EFI_UNACCEPTED_MEMORY 15
#define EFI_MEMORY_MORE_RELIABLE \
((u64)0x0000000000010000ULL) /* higher reliability */
@@ -104,6 +106,14 @@ struct efi_setup_data {
u64 reserved[8];
};
+struct efi_unaccepted_memory {
+ u32 version;
+ u32 unit_size;
+ u64 phys_base;
+ u64 size;
+ unsigned long bitmap[];
+};
+
static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right)
{
return memcmp(&left, &right, sizeof (efi_guid_t));
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 454757fbdfe5..9193acf0e9cd 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -672,6 +672,33 @@ static bool process_mem_region(struct mem_vector *region,
}
#ifdef CONFIG_EFI
+
+/*
+ * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are
+ * guaranteed to be free.
+ *
+ * Pick free memory more conservatively than the EFI spec allows: according to
+ * the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory and thus
+ * available to place the kernel image into, but in practice there's firmware
+ * where using that memory leads to crashes. Buggy vendor EFI code registers
+ * for an event that triggers on SetVirtualAddressMap(). The handler assumes
+ * that EFI_BOOT_SERVICES_DATA memory has not been touched by loader yet, which
+ * is probably true for Windows.
+ *
+ * Preserve EFI_BOOT_SERVICES_* regions until after SetVirtualAddressMap().
+ */
+static inline bool memory_type_is_free(efi_memory_desc_t *md)
+{
+ if (md->type == EFI_CONVENTIONAL_MEMORY)
+ return true;
+
+ if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) &&
+ md->type == EFI_UNACCEPTED_MEMORY)
+ return true;
+
+ return false;
+}
+
/*
* Returns true if we processed the EFI memmap, which we prefer over the E820
* table if it is available.
@@ -716,18 +743,7 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
for (i = 0; i < nr_desc; i++) {
md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
- /*
- * Here we are more conservative in picking free memory than
- * the EFI spec allows:
- *
- * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
- * free memory and thus available to place the kernel image into,
- * but in practice there's firmware where using that memory leads
- * to crashes.
- *
- * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
- */
- if (md->type != EFI_CONVENTIONAL_MEMORY)
+ if (!memory_type_is_free(md))
continue;
if (efi_soft_reserve_enabled() &&
diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
index 67594fcb11d9..69038ed7a310 100644
--- a/arch/x86/boot/compressed/mem.c
+++ b/arch/x86/boot/compressed/mem.c
@@ -1,9 +1,50 @@
// SPDX-License-Identifier: GPL-2.0-only
#include "error.h"
+#include "misc.h"
void arch_accept_memory(phys_addr_t start, phys_addr_t end)
{
/* Platform-specific memory-acceptance call goes here */
error("Cannot accept memory");
}
+
+bool init_unaccepted_memory(void)
+{
+ guid_t guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID;
+ struct efi_unaccepted_memory *table;
+ unsigned long cfg_table_pa;
+ unsigned int cfg_table_len;
+ enum efi_type et;
+ int ret;
+
+ et = efi_get_type(boot_params);
+ if (et == EFI_TYPE_NONE)
+ return false;
+
+ ret = efi_get_conf_table(boot_params, &cfg_table_pa, &cfg_table_len);
+ if (ret) {
+ warn("EFI config table not found.");
+ return false;
+ }
+
+ table = (void *)efi_find_vendor_table(boot_params, cfg_table_pa,
+ cfg_table_len, guid);
+ if (!table)
+ return false;
+
+ if (table->version != 1)
+ error("Unknown version of unaccepted memory table\n");
+
+ /*
+ * In many cases unaccepted_table is already set by EFI stub, but it
+ * has to be initialized again to cover cases when the table is not
+ * allocated by EFI stub or EFI stub copied the kernel image with
+ * efi_relocate_kernel() before the variable is set.
+ *
+ * It must be initialized before the first usage of accept_memory().
+ */
+ unaccepted_table = table;
+
+ return true;
+}
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 014ff222bf4b..94b7abcf624b 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -455,6 +455,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#endif
debug_putstr("\nDecompressing Linux... ");
+
+ if (init_unaccepted_memory()) {
+ debug_putstr("Accepting memory... ");
+ accept_memory(__pa(output), __pa(output) + needed_size);
+ }
+
__decompress(input_data, input_len, NULL, NULL, output, output_len,
NULL, error);
entry_offset = parse_elf(output);
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 2f155a0e3041..964fe903a1cd 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -247,4 +247,14 @@ static inline unsigned long efi_find_vendor_table(struct boot_params *bp,
}
#endif /* CONFIG_EFI */
+#ifdef CONFIG_UNACCEPTED_MEMORY
+bool init_unaccepted_memory(void);
+#else
+static inline bool init_unaccepted_memory(void) { return false; }
+#endif
+
+/* Defined in EFI stub */
+extern struct efi_unaccepted_memory *unaccepted_table;
+void accept_memory(phys_addr_t start, phys_addr_t end);
+
#endif /* BOOT_COMPRESSED_MISC_H */
--
2.39.3
--
Kiryl Shutsemau / Kirill A. Shutemov