2017-07-06 08:32:20

by Naoya Horiguchi

[permalink] [raw]
Subject: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

Hi Baoquan, everyone,

I'm also interested in KASLR/EFI related issue (but not the same issue
with yours, so I separated the thread.)

This patch is based on Baoquan's recent patches[1], adding more code
on the new function process_efi_entry().
If it's OK, could you queue this onto your tree/series?

[1] "[PATCH v3 0/2] x86/boot/KASLR: Restrict kernel to be randomized"
https://lkml.org/lkml/2017/7/5/98

Thanks,
Naoya Horiguchi
---
From: Naoya Horiguchi <[email protected]>
Date: Thu, 6 Jul 2017 16:40:52 +0900
Subject: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from
KASLR's choice

KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
UEFI spec, all memory regions marked as EfiBootServicesCode and
EfiBootServicesData are available for free memory after the first call
of ExitBootServices(). So such regions should be usable for kernel on
spec basis.

In x86, however, we have some workaround for broken firmware, where we
keep such regions reserved until SetVirtualAddressMap() is done.
See the following code in should_map_region():

static bool should_map_region(efi_memory_desc_t *md)
{
...
/*
* Map boot services regions as a workaround for buggy
* firmware that accesses them even when they shouldn't.
*
* See efi_{reserve,free}_boot_services().
*/
if (md->type == EFI_BOOT_SERVICES_CODE ||
md->type == EFI_BOOT_SERVICES_DATA)
return false;

This workaround suppressed a boot crash, but potential issues still
remain because no one prevents the regions from overlapping with kernel
image by KASLR.

So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
chosen as kernel memory for the workaround to work fine.

Signed-off-by: Naoya Horiguchi <[email protected]>
---
arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++++++++++++++++---------
1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 94f08fd375ae..f43fed0441a6 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -563,7 +563,8 @@ static void process_mem_region(struct mem_vector *entry,
/* Marks if efi mirror regions have been found and handled. */
static bool efi_mirror_found;

-static void process_efi_entry(unsigned long minimum, unsigned long image_size)
+/* Returns true if we really enter efi memmap walk, otherwise returns false. */
+static bool process_efi_entry(unsigned long minimum, unsigned long image_size)
{
struct efi_info *e = &boot_params->efi_info;
struct mem_vector region;
@@ -577,13 +578,13 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
signature = (char *)&boot_params->efi_info.efi_loader_signature;
if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
- return;
+ return false;

#ifdef CONFIG_X86_32
/* Can't handle data above 4GB at this time */
if (e->efi_memmap_hi) {
warn("Memory map is above 4GB, EFI should be disabled.\n");
- return;
+ return false;
}
pmap = e->efi_memmap;
#else
@@ -593,13 +594,36 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
for (i = 0; i < nr_desc; i++) {
md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
- if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
- region.start = md->phys_addr;
- region.size = md->num_pages << EFI_PAGE_SHIFT;
- process_mem_region(&region, minimum, image_size);
+ if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
efi_mirror_found = true;
+ }
+
+ for (i = 0; i < nr_desc; i++) {
+ md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
+
+ /*
+ * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
+ * services regions could be accessed after ExitBootServices()
+ * due to the workaround for buggy firmware.
+ */
+ if (!(md->type == EFI_LOADER_CODE ||
+ md->type == EFI_LOADER_DATA ||
+ md->type == EFI_CONVENTIONAL_MEMORY))
+ continue;
+
+ if (efi_mirror_found &&
+ !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
+ continue;
+
+ region.start = md->phys_addr;
+ region.size = md->num_pages << EFI_PAGE_SHIFT;
+ process_mem_region(&region, minimum, image_size);
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted EFI scan (slot_areas full)!\n");
+ break;
}
}
+ return true;
}

static void process_e820_entry(unsigned long minimum, unsigned long image_size)
@@ -637,8 +661,7 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);

#ifdef CONFIG_EFI
- process_efi_entry(minimum, image_size);
- if (efi_mirror_found)
+ if (process_efi_entry(minimum, image_size))
return slots_fetch_random();
#endif

--
2.7.4


2017-07-06 09:13:47

by Chao Fan

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Thu, Jul 06, 2017 at 08:31:07AM +0000, Naoya Horiguchi wrote:
>Hi Baoquan, everyone,
>
>I'm also interested in KASLR/EFI related issue (but not the same issue
>with yours, so I separated the thread.)
>
>This patch is based on Baoquan's recent patches[1], adding more code
>on the new function process_efi_entry().
>If it's OK, could you queue this onto your tree/series?
>
>[1] "[PATCH v3 0/2] x86/boot/KASLR: Restrict kernel to be randomized"
> https://lkml.org/lkml/2017/7/5/98
>
>Thanks,
>Naoya Horiguchi
>---
>From: Naoya Horiguchi <[email protected]>
>Date: Thu, 6 Jul 2017 16:40:52 +0900
>Subject: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from
> KASLR's choice
>
>KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
>e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
>EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
>UEFI spec, all memory regions marked as EfiBootServicesCode and
>EfiBootServicesData are available for free memory after the first call
>of ExitBootServices(). So such regions should be usable for kernel on
>spec basis.
>
>In x86, however, we have some workaround for broken firmware, where we
>keep such regions reserved until SetVirtualAddressMap() is done.
>See the following code in should_map_region():
>
> static bool should_map_region(efi_memory_desc_t *md)
> {
> ...
> /*
> * Map boot services regions as a workaround for buggy
> * firmware that accesses them even when they shouldn't.
> *
> * See efi_{reserve,free}_boot_services().
> */
> if (md->type == EFI_BOOT_SERVICES_CODE ||
> md->type == EFI_BOOT_SERVICES_DATA)
> return false;
>
>This workaround suppressed a boot crash, but potential issues still
>remain because no one prevents the regions from overlapping with kernel
>image by KASLR.
>
>So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
>chosen as kernel memory for the workaround to work fine.
>
>Signed-off-by: Naoya Horiguchi <[email protected]>
>---
> arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++++++++++++++++---------
> 1 file changed, 32 insertions(+), 9 deletions(-)
>
>diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
>index 94f08fd375ae..f43fed0441a6 100644
>--- a/arch/x86/boot/compressed/kaslr.c
>+++ b/arch/x86/boot/compressed/kaslr.c
>@@ -563,7 +563,8 @@ static void process_mem_region(struct mem_vector *entry,
> /* Marks if efi mirror regions have been found and handled. */
> static bool efi_mirror_found;
>
>-static void process_efi_entry(unsigned long minimum, unsigned long image_size)
>+/* Returns true if we really enter efi memmap walk, otherwise returns false. */
>+static bool process_efi_entry(unsigned long minimum, unsigned long image_size)
> {
> struct efi_info *e = &boot_params->efi_info;
> struct mem_vector region;
>@@ -577,13 +578,13 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> signature = (char *)&boot_params->efi_info.efi_loader_signature;
> if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
> strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
>- return;
>+ return false;
>
> #ifdef CONFIG_X86_32
> /* Can't handle data above 4GB at this time */
> if (e->efi_memmap_hi) {
> warn("Memory map is above 4GB, EFI should be disabled.\n");
>- return;
>+ return false;
> }
> pmap = e->efi_memmap;
> #else
>@@ -593,13 +594,36 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
> for (i = 0; i < nr_desc; i++) {
> md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
>- if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
>- region.start = md->phys_addr;
>- region.size = md->num_pages << EFI_PAGE_SHIFT;
>- process_mem_region(&region, minimum, image_size);
>+ if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
> efi_mirror_found = true;

Hi Horiguchi-san,

If efi_mirror_found is changed to be true, we won't need to walk other
entries, so I think:
if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
efi_mirror_found = true;
break;
}
will be enough to show that mirror regions exist. And will walk
less entries. How do you think about this?

Another question: what's the benifit of putting this part of
"efi_mirror_found = true" to a independent cycle.

Thanks,
Chao Fan

>+ }
>+
>+ for (i = 0; i < nr_desc; i++) {
>+ md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
>+
>+ /*
>+ * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
>+ * services regions could be accessed after ExitBootServices()
>+ * due to the workaround for buggy firmware.
>+ */
>+ if (!(md->type == EFI_LOADER_CODE ||
>+ md->type == EFI_LOADER_DATA ||
>+ md->type == EFI_CONVENTIONAL_MEMORY))
>+ continue;
>+
>+ if (efi_mirror_found &&
>+ !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
>+ continue;
>+
>+ region.start = md->phys_addr;
>+ region.size = md->num_pages << EFI_PAGE_SHIFT;
>+ process_mem_region(&region, minimum, image_size);
>+ if (slot_area_index == MAX_SLOT_AREA) {
>+ debug_putstr("Aborted EFI scan (slot_areas full)!\n");
>+ break;
> }
> }
>+ return true;
> }
>
> static void process_e820_entry(unsigned long minimum, unsigned long image_size)
>@@ -637,8 +661,7 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
> minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
>
> #ifdef CONFIG_EFI
>- process_efi_entry(minimum, image_size);
>- if (efi_mirror_found)
>+ if (process_efi_entry(minimum, image_size))
> return slots_fetch_random();
> #endif
>
>--
>2.7.4
>
>
>


2017-07-06 09:18:20

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

Hi Naoya Horiguchi,

Thanks for making this!

On 07/06/17 at 08:31am, Naoya Horiguchi wrote:
> Hi Baoquan, everyone,
>
> I'm also interested in KASLR/EFI related issue (but not the same issue
> with yours, so I separated the thread.)
>
> This patch is based on Baoquan's recent patches[1], adding more code
> on the new function process_efi_entry().
> If it's OK, could you queue this onto your tree/series?

This is interesting. So you are suggesting that we should try to avoid
those EFI_BOOT_SERVICES_{CODE|DATA} efi regions as long as efi map
regions are available, meanwhile try to locate kernel inside mirrored
regions if existed. I do know the efi work around, so it seems reasonable
to me, I can add it when repost. Or you can post after mine has been
merged.

A little adjustment, please see the inline comment.
>
> [1] "[PATCH v3 0/2] x86/boot/KASLR: Restrict kernel to be randomized"
> https://lkml.org/lkml/2017/7/5/98
>
> Thanks,
> Naoya Horiguchi
> ---
> From: Naoya Horiguchi <[email protected]>
> Date: Thu, 6 Jul 2017 16:40:52 +0900
> Subject: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from
> KASLR's choice
>
> KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
> e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
> EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
> UEFI spec, all memory regions marked as EfiBootServicesCode and
> EfiBootServicesData are available for free memory after the first call
> of ExitBootServices(). So such regions should be usable for kernel on
> spec basis.
>
> In x86, however, we have some workaround for broken firmware, where we
> keep such regions reserved until SetVirtualAddressMap() is done.
> See the following code in should_map_region():
>
> static bool should_map_region(efi_memory_desc_t *md)
> {
> ...
> /*
> * Map boot services regions as a workaround for buggy
> * firmware that accesses them even when they shouldn't.
> *
> * See efi_{reserve,free}_boot_services().
> */
> if (md->type == EFI_BOOT_SERVICES_CODE ||
> md->type == EFI_BOOT_SERVICES_DATA)
> return false;
>
> This workaround suppressed a boot crash, but potential issues still
> remain because no one prevents the regions from overlapping with kernel
> image by KASLR.
>
> So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
> chosen as kernel memory for the workaround to work fine.
>
> Signed-off-by: Naoya Horiguchi <[email protected]>
> ---
> arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++++++++++++++++---------
> 1 file changed, 32 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index 94f08fd375ae..f43fed0441a6 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -563,7 +563,8 @@ static void process_mem_region(struct mem_vector *entry,
> /* Marks if efi mirror regions have been found and handled. */
> static bool efi_mirror_found;
>
> -static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> +/* Returns true if we really enter efi memmap walk, otherwise returns false. */
> +static bool process_efi_entry(unsigned long minimum, unsigned long image_size)
> {
> struct efi_info *e = &boot_params->efi_info;
> struct mem_vector region;
> @@ -577,13 +578,13 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> signature = (char *)&boot_params->efi_info.efi_loader_signature;
> if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
> strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
> - return;
> + return false;
>
> #ifdef CONFIG_X86_32
> /* Can't handle data above 4GB at this time */
> if (e->efi_memmap_hi) {
> warn("Memory map is above 4GB, EFI should be disabled.\n");
> - return;
> + return false;
> }
> pmap = e->efi_memmap;
> #else
> @@ -593,13 +594,36 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
> for (i = 0; i < nr_desc; i++) {
> md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> - if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
> - region.start = md->phys_addr;
> - region.size = md->num_pages << EFI_PAGE_SHIFT;
> - process_mem_region(&region, minimum, image_size);
> + if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
> efi_mirror_found = true;

Here, we should define a local variable of bool type to mark if mirrored
region is found.

> + }
> +
> + for (i = 0; i < nr_desc; i++) {
> + md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> +
> + /*
> + * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
> + * services regions could be accessed after ExitBootServices()
> + * due to the workaround for buggy firmware.
> + */
> + if (!(md->type == EFI_LOADER_CODE ||
> + md->type == EFI_LOADER_DATA ||
> + md->type == EFI_CONVENTIONAL_MEMORY))
> + continue;
> +
> + if (efi_mirror_found &&
> + !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
> + continue;
> +
> + region.start = md->phys_addr;
> + region.size = md->num_pages << EFI_PAGE_SHIFT;
> + process_mem_region(&region, minimum, image_size);

And can define a new global variable like efi_processed here.
efi_processed = true;


And yes, I missed this snippet of code to break loop if slot_area has
been full, this saves time.

> + if (slot_area_index == MAX_SLOT_AREA) {
> + debug_putstr("Aborted EFI scan (slot_areas full)!\n");
> + break;
> }
> }
> + return true;
> }
>
> static void process_e820_entry(unsigned long minimum, unsigned long image_size)
> @@ -637,8 +661,7 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
> minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
>
> #ifdef CONFIG_EFI
> - process_efi_entry(minimum, image_size);
> - if (efi_mirror_found)
> + if (process_efi_entry(minimum, image_size))
> return slots_fetch_random();
> #endif
>
> --
> 2.7.4
>

2017-07-06 09:23:45

by Naoya Horiguchi

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Thu, Jul 06, 2017 at 05:13:32PM +0800, Chao Fan wrote:
> On Thu, Jul 06, 2017 at 08:31:07AM +0000, Naoya Horiguchi wrote:
> >Hi Baoquan, everyone,
> >
> >I'm also interested in KASLR/EFI related issue (but not the same issue
> >with yours, so I separated the thread.)
> >
> >This patch is based on Baoquan's recent patches[1], adding more code
> >on the new function process_efi_entry().
> >If it's OK, could you queue this onto your tree/series?
> >
> >[1] "[PATCH v3 0/2] x86/boot/KASLR: Restrict kernel to be randomized"
> > https://lkml.org/lkml/2017/7/5/98
> >
> >Thanks,
> >Naoya Horiguchi
> >---
> >From: Naoya Horiguchi <[email protected]>
> >Date: Thu, 6 Jul 2017 16:40:52 +0900
> >Subject: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from
> > KASLR's choice
> >
> >KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
> >e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
> >EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
> >UEFI spec, all memory regions marked as EfiBootServicesCode and
> >EfiBootServicesData are available for free memory after the first call
> >of ExitBootServices(). So such regions should be usable for kernel on
> >spec basis.
> >
> >In x86, however, we have some workaround for broken firmware, where we
> >keep such regions reserved until SetVirtualAddressMap() is done.
> >See the following code in should_map_region():
> >
> > static bool should_map_region(efi_memory_desc_t *md)
> > {
> > ...
> > /*
> > * Map boot services regions as a workaround for buggy
> > * firmware that accesses them even when they shouldn't.
> > *
> > * See efi_{reserve,free}_boot_services().
> > */
> > if (md->type == EFI_BOOT_SERVICES_CODE ||
> > md->type == EFI_BOOT_SERVICES_DATA)
> > return false;
> >
> >This workaround suppressed a boot crash, but potential issues still
> >remain because no one prevents the regions from overlapping with kernel
> >image by KASLR.
> >
> >So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
> >chosen as kernel memory for the workaround to work fine.
> >
> >Signed-off-by: Naoya Horiguchi <[email protected]>
> >---
> > arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++++++++++++++++---------
> > 1 file changed, 32 insertions(+), 9 deletions(-)
> >
> >diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> >index 94f08fd375ae..f43fed0441a6 100644
> >--- a/arch/x86/boot/compressed/kaslr.c
> >+++ b/arch/x86/boot/compressed/kaslr.c
> >@@ -563,7 +563,8 @@ static void process_mem_region(struct mem_vector *entry,
> > /* Marks if efi mirror regions have been found and handled. */
> > static bool efi_mirror_found;
> >
> >-static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> >+/* Returns true if we really enter efi memmap walk, otherwise returns false. */
> >+static bool process_efi_entry(unsigned long minimum, unsigned long image_size)
> > {
> > struct efi_info *e = &boot_params->efi_info;
> > struct mem_vector region;
> >@@ -577,13 +578,13 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> > signature = (char *)&boot_params->efi_info.efi_loader_signature;
> > if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
> > strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
> >- return;
> >+ return false;
> >
> > #ifdef CONFIG_X86_32
> > /* Can't handle data above 4GB at this time */
> > if (e->efi_memmap_hi) {
> > warn("Memory map is above 4GB, EFI should be disabled.\n");
> >- return;
> >+ return false;
> > }
> > pmap = e->efi_memmap;
> > #else
> >@@ -593,13 +594,36 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> > nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
> > for (i = 0; i < nr_desc; i++) {
> > md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> >- if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
> >- region.start = md->phys_addr;
> >- region.size = md->num_pages << EFI_PAGE_SHIFT;
> >- process_mem_region(&region, minimum, image_size);
> >+ if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
> > efi_mirror_found = true;
>
> Hi Horiguchi-san,
>
> If efi_mirror_found is changed to be true, we won't need to walk other
> entries, so I think:
> if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
> efi_mirror_found = true;
> break;
> }
> will be enough to show that mirror regions exist. And will walk
> less entries. How do you think about this?

Thank you for the review, Chao.
And you're right, I'll add break here.

# I'll post revised one tomorrow waiting for more comments.

> Another question: what's the benifit of putting this part of
> "efi_mirror_found = true" to a independent cycle.

We can't easily cancel process_mem_region(), so if we process a few normal
regions like EFI_CONVENTIONAL_MEMORY and then find a EFI_MEMORY_MORE_RELIABLE
region, that's a bit troublesome.
So I decided to first check whether EFI_MEMORY_MORE_RELIABLE region exists or not.

Thanks,
Naoya Horiguchi

2017-07-06 09:37:13

by Naoya Horiguchi

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Thu, Jul 06, 2017 at 05:18:09PM +0800, Baoquan He wrote:
> Hi Naoya Horiguchi,
>
> Thanks for making this!
>
> On 07/06/17 at 08:31am, Naoya Horiguchi wrote:
> > Hi Baoquan, everyone,
> >
> > I'm also interested in KASLR/EFI related issue (but not the same issue
> > with yours, so I separated the thread.)
> >
> > This patch is based on Baoquan's recent patches[1], adding more code
> > on the new function process_efi_entry().
> > If it's OK, could you queue this onto your tree/series?
>
> This is interesting. So you are suggesting that we should try to avoid
> those EFI_BOOT_SERVICES_{CODE|DATA} efi regions as long as efi map
> regions are available, meanwhile try to locate kernel inside mirrored
> regions if existed. I do know the efi work around, so it seems reasonable
> to me, I can add it when repost. Or you can post after mine has been
> merged.

Thank you for the positive response, Baoquan.

>
> A little adjustment, please see the inline comment.
> >
> > [1] "[PATCH v3 0/2] x86/boot/KASLR: Restrict kernel to be randomized"
> > https://lkml.org/lkml/2017/7/5/98
> >
> > Thanks,
> > Naoya Horiguchi
> > ---
> > From: Naoya Horiguchi <[email protected]>
> > Date: Thu, 6 Jul 2017 16:40:52 +0900
> > Subject: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from
> > KASLR's choice
> >
> > KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
> > e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
> > EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
> > UEFI spec, all memory regions marked as EfiBootServicesCode and
> > EfiBootServicesData are available for free memory after the first call
> > of ExitBootServices(). So such regions should be usable for kernel on
> > spec basis.
> >
> > In x86, however, we have some workaround for broken firmware, where we
> > keep such regions reserved until SetVirtualAddressMap() is done.
> > See the following code in should_map_region():
> >
> > static bool should_map_region(efi_memory_desc_t *md)
> > {
> > ...
> > /*
> > * Map boot services regions as a workaround for buggy
> > * firmware that accesses them even when they shouldn't.
> > *
> > * See efi_{reserve,free}_boot_services().
> > */
> > if (md->type == EFI_BOOT_SERVICES_CODE ||
> > md->type == EFI_BOOT_SERVICES_DATA)
> > return false;
> >
> > This workaround suppressed a boot crash, but potential issues still
> > remain because no one prevents the regions from overlapping with kernel
> > image by KASLR.
> >
> > So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
> > chosen as kernel memory for the workaround to work fine.
> >
> > Signed-off-by: Naoya Horiguchi <[email protected]>
> > ---
> > arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++++++++++++++++---------
> > 1 file changed, 32 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> > index 94f08fd375ae..f43fed0441a6 100644
> > --- a/arch/x86/boot/compressed/kaslr.c
> > +++ b/arch/x86/boot/compressed/kaslr.c
> > @@ -563,7 +563,8 @@ static void process_mem_region(struct mem_vector *entry,
> > /* Marks if efi mirror regions have been found and handled. */
> > static bool efi_mirror_found;
> >
> > -static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> > +/* Returns true if we really enter efi memmap walk, otherwise returns false. */
> > +static bool process_efi_entry(unsigned long minimum, unsigned long image_size)
> > {
> > struct efi_info *e = &boot_params->efi_info;
> > struct mem_vector region;
> > @@ -577,13 +578,13 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> > signature = (char *)&boot_params->efi_info.efi_loader_signature;
> > if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
> > strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
> > - return;
> > + return false;
> >
> > #ifdef CONFIG_X86_32
> > /* Can't handle data above 4GB at this time */
> > if (e->efi_memmap_hi) {
> > warn("Memory map is above 4GB, EFI should be disabled.\n");
> > - return;
> > + return false;
> > }
> > pmap = e->efi_memmap;
> > #else
> > @@ -593,13 +594,36 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> > nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
> > for (i = 0; i < nr_desc; i++) {
> > md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> > - if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
> > - region.start = md->phys_addr;
> > - region.size = md->num_pages << EFI_PAGE_SHIFT;
> > - process_mem_region(&region, minimum, image_size);
> > + if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
> > efi_mirror_found = true;
>
> Here, we should define a local variable of bool type to mark if mirrored
> region is found.

OK. efi_mirror_found need not be a global variable any longer.

> > + }
> > +
> > + for (i = 0; i < nr_desc; i++) {
> > + md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> > +
> > + /*
> > + * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
> > + * services regions could be accessed after ExitBootServices()
> > + * due to the workaround for buggy firmware.
> > + */
> > + if (!(md->type == EFI_LOADER_CODE ||
> > + md->type == EFI_LOADER_DATA ||
> > + md->type == EFI_CONVENTIONAL_MEMORY))
> > + continue;
> > +
> > + if (efi_mirror_found &&
> > + !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
> > + continue;
> > +
> > + region.start = md->phys_addr;
> > + region.size = md->num_pages << EFI_PAGE_SHIFT;
> > + process_mem_region(&region, minimum, image_size);
>
> And can define a new global variable like efi_processed here.
> efi_processed = true;

I intented that this is represented by the return value of process_efi_entry(),
but if you have more readable/clearer options, I'm fine with that.

>
>
> And yes, I missed this snippet of code to break loop if slot_area has
> been full, this saves time.

No problem, feel free to move this snippet to your 2/2 patch.

Thanks,
Naoya Horiguchi

2017-07-06 09:37:11

by Chao Fan

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Thu, Jul 06, 2017 at 09:22:38AM +0000, Naoya Horiguchi wrote:
>On Thu, Jul 06, 2017 at 05:13:32PM +0800, Chao Fan wrote:
>> On Thu, Jul 06, 2017 at 08:31:07AM +0000, Naoya Horiguchi wrote:
>> >Hi Baoquan, everyone,
>> >
>> >I'm also interested in KASLR/EFI related issue (but not the same issue
>> >with yours, so I separated the thread.)
>> >
>> >This patch is based on Baoquan's recent patches[1], adding more code
>> >on the new function process_efi_entry().
>> >If it's OK, could you queue this onto your tree/series?
>> >
>> >[1] "[PATCH v3 0/2] x86/boot/KASLR: Restrict kernel to be randomized"
>> > https://lkml.org/lkml/2017/7/5/98
>> >
>> >Thanks,
>> >Naoya Horiguchi
>> >---
>> >From: Naoya Horiguchi <[email protected]>
>> >Date: Thu, 6 Jul 2017 16:40:52 +0900
>> >Subject: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from
>> > KASLR's choice
>> >
>> >KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
>> >e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
>> >EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
>> >UEFI spec, all memory regions marked as EfiBootServicesCode and
>> >EfiBootServicesData are available for free memory after the first call
>> >of ExitBootServices(). So such regions should be usable for kernel on
>> >spec basis.
>> >
>> >In x86, however, we have some workaround for broken firmware, where we
>> >keep such regions reserved until SetVirtualAddressMap() is done.
>> >See the following code in should_map_region():
>> >
>> > static bool should_map_region(efi_memory_desc_t *md)
>> > {
>> > ...
>> > /*
>> > * Map boot services regions as a workaround for buggy
>> > * firmware that accesses them even when they shouldn't.
>> > *
>> > * See efi_{reserve,free}_boot_services().
>> > */
>> > if (md->type == EFI_BOOT_SERVICES_CODE ||
>> > md->type == EFI_BOOT_SERVICES_DATA)
>> > return false;
>> >
>> >This workaround suppressed a boot crash, but potential issues still
>> >remain because no one prevents the regions from overlapping with kernel
>> >image by KASLR.
>> >
>> >So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
>> >chosen as kernel memory for the workaround to work fine.
>> >
>> >Signed-off-by: Naoya Horiguchi <[email protected]>
>> >---
>> > arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++++++++++++++++---------
>> > 1 file changed, 32 insertions(+), 9 deletions(-)
>> >
>> >diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
>> >index 94f08fd375ae..f43fed0441a6 100644
>> >--- a/arch/x86/boot/compressed/kaslr.c
>> >+++ b/arch/x86/boot/compressed/kaslr.c
>> >@@ -563,7 +563,8 @@ static void process_mem_region(struct mem_vector *entry,
>> > /* Marks if efi mirror regions have been found and handled. */
>> > static bool efi_mirror_found;
>> >
>> >-static void process_efi_entry(unsigned long minimum, unsigned long image_size)
>> >+/* Returns true if we really enter efi memmap walk, otherwise returns false. */
>> >+static bool process_efi_entry(unsigned long minimum, unsigned long image_size)
>> > {
>> > struct efi_info *e = &boot_params->efi_info;
>> > struct mem_vector region;
>> >@@ -577,13 +578,13 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
>> > signature = (char *)&boot_params->efi_info.efi_loader_signature;
>> > if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
>> > strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
>> >- return;
>> >+ return false;
>> >
>> > #ifdef CONFIG_X86_32
>> > /* Can't handle data above 4GB at this time */
>> > if (e->efi_memmap_hi) {
>> > warn("Memory map is above 4GB, EFI should be disabled.\n");
>> >- return;
>> >+ return false;
>> > }
>> > pmap = e->efi_memmap;
>> > #else
>> >@@ -593,13 +594,36 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
>> > nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
>> > for (i = 0; i < nr_desc; i++) {
>> > md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
>> >- if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
>> >- region.start = md->phys_addr;
>> >- region.size = md->num_pages << EFI_PAGE_SHIFT;
>> >- process_mem_region(&region, minimum, image_size);
>> >+ if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
>> > efi_mirror_found = true;
>>
>> Hi Horiguchi-san,
>>
>> If efi_mirror_found is changed to be true, we won't need to walk other
>> entries, so I think:
>> if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
>> efi_mirror_found = true;
>> break;
>> }
>> will be enough to show that mirror regions exist. And will walk
>> less entries. How do you think about this?
>
>Thank you for the review, Chao.
>And you're right, I'll add break here.
>
># I'll post revised one tomorrow waiting for more comments.
>
>> Another question: what's the benifit of putting this part of
>> "efi_mirror_found = true" to a independent cycle.
>
>We can't easily cancel process_mem_region(), so if we process a few normal
>regions like EFI_CONVENTIONAL_MEMORY and then find a EFI_MEMORY_MORE_RELIABLE
>region, that's a bit troublesome.
>So I decided to first check whether EFI_MEMORY_MORE_RELIABLE region exists or not.

OK, I got it. Thanks for your explanation.

Thanks,
Chao Fan
>
>Thanks,
>Naoya Horiguchi
>
>


2017-07-06 10:04:57

by Chao Fan

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Thu, Jul 06, 2017 at 08:31:07AM +0000, Naoya Horiguchi wrote:
>Hi Baoquan, everyone,
>
>I'm also interested in KASLR/EFI related issue (but not the same issue
>with yours, so I separated the thread.)
>
>This patch is based on Baoquan's recent patches[1], adding more code
>on the new function process_efi_entry().
>If it's OK, could you queue this onto your tree/series?
>
>[1] "[PATCH v3 0/2] x86/boot/KASLR: Restrict kernel to be randomized"
> https://lkml.org/lkml/2017/7/5/98
>
>Thanks,
>Naoya Horiguchi
>---
>From: Naoya Horiguchi <[email protected]>
>Date: Thu, 6 Jul 2017 16:40:52 +0900
>Subject: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from
> KASLR's choice
>
>KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
>e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
>EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
>UEFI spec, all memory regions marked as EfiBootServicesCode and
>EfiBootServicesData are available for free memory after the first call
>of ExitBootServices(). So such regions should be usable for kernel on
>spec basis.
>
>In x86, however, we have some workaround for broken firmware, where we
>keep such regions reserved until SetVirtualAddressMap() is done.
>See the following code in should_map_region():
>
> static bool should_map_region(efi_memory_desc_t *md)
> {
> ...
> /*
> * Map boot services regions as a workaround for buggy
> * firmware that accesses them even when they shouldn't.
> *
> * See efi_{reserve,free}_boot_services().
> */
> if (md->type == EFI_BOOT_SERVICES_CODE ||
> md->type == EFI_BOOT_SERVICES_DATA)
> return false;
>
>This workaround suppressed a boot crash, but potential issues still
>remain because no one prevents the regions from overlapping with kernel
>image by KASLR.
>
>So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
>chosen as kernel memory for the workaround to work fine.
>
>Signed-off-by: Naoya Horiguchi <[email protected]>
>---
> arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++++++++++++++++---------
> 1 file changed, 32 insertions(+), 9 deletions(-)
>
>diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
>index 94f08fd375ae..f43fed0441a6 100644
>--- a/arch/x86/boot/compressed/kaslr.c
>+++ b/arch/x86/boot/compressed/kaslr.c
>@@ -563,7 +563,8 @@ static void process_mem_region(struct mem_vector *entry,
> /* Marks if efi mirror regions have been found and handled. */
> static bool efi_mirror_found;
>
>-static void process_efi_entry(unsigned long minimum, unsigned long image_size)
>+/* Returns true if we really enter efi memmap walk, otherwise returns false. */
>+static bool process_efi_entry(unsigned long minimum, unsigned long image_size)
> {
> struct efi_info *e = &boot_params->efi_info;
> struct mem_vector region;
>@@ -577,13 +578,13 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> signature = (char *)&boot_params->efi_info.efi_loader_signature;
> if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
> strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
>- return;
>+ return false;
>
> #ifdef CONFIG_X86_32
> /* Can't handle data above 4GB at this time */
> if (e->efi_memmap_hi) {
> warn("Memory map is above 4GB, EFI should be disabled.\n");
>- return;
>+ return false;
> }
> pmap = e->efi_memmap;
> #else
>@@ -593,13 +594,36 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
> for (i = 0; i < nr_desc; i++) {
> md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
>- if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
>- region.start = md->phys_addr;
>- region.size = md->num_pages << EFI_PAGE_SHIFT;
>- process_mem_region(&region, minimum, image_size);
>+ if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
> efi_mirror_found = true;
>+ }

Hi Horiguchi-san,

Sorry for one more suggestion,
How about add:
if (!efi_mirror_found)
return false;
at this place, between the two cycles.

Because if there are no mirror regions found, I think we can return
directly and go to walk the e820 entries.
I don't know whether my understanding is right.

Thanks,
Chao Fan

>+
>+ for (i = 0; i < nr_desc; i++) {
>+ md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
>+
>+ /*
>+ * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
>+ * services regions could be accessed after ExitBootServices()
>+ * due to the workaround for buggy firmware.
>+ */
>+ if (!(md->type == EFI_LOADER_CODE ||
>+ md->type == EFI_LOADER_DATA ||
>+ md->type == EFI_CONVENTIONAL_MEMORY))
>+ continue;
>+
>+ if (efi_mirror_found &&
>+ !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
>+ continue;
>+
>+ region.start = md->phys_addr;
>+ region.size = md->num_pages << EFI_PAGE_SHIFT;
>+ process_mem_region(&region, minimum, image_size);
>+ if (slot_area_index == MAX_SLOT_AREA) {
>+ debug_putstr("Aborted EFI scan (slot_areas full)!\n");
>+ break;
> }
> }
>+ return true;
> }
>
> static void process_e820_entry(unsigned long minimum, unsigned long image_size)
>@@ -637,8 +661,7 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
> minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
>
> #ifdef CONFIG_EFI
>- process_efi_entry(minimum, image_size);
>- if (efi_mirror_found)
>+ if (process_efi_entry(minimum, image_size))
> return slots_fetch_random();
> #endif
>
>--
>2.7.4
>
>
>


2017-07-06 10:21:02

by Chao Fan

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Thu, Jul 06, 2017 at 06:04:46PM +0800, Chao Fan wrote:
>On Thu, Jul 06, 2017 at 08:31:07AM +0000, Naoya Horiguchi wrote:
>>Hi Baoquan, everyone,
>>
>>I'm also interested in KASLR/EFI related issue (but not the same issue
>>with yours, so I separated the thread.)
>>
>>This patch is based on Baoquan's recent patches[1], adding more code
>>on the new function process_efi_entry().
>>If it's OK, could you queue this onto your tree/series?
>>
>>[1] "[PATCH v3 0/2] x86/boot/KASLR: Restrict kernel to be randomized"
>> https://lkml.org/lkml/2017/7/5/98
>>
>>Thanks,
>>Naoya Horiguchi
>>---
>>From: Naoya Horiguchi <[email protected]>
>>Date: Thu, 6 Jul 2017 16:40:52 +0900
>>Subject: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from
>> KASLR's choice
>>
>>KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
>>e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
>>EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
>>UEFI spec, all memory regions marked as EfiBootServicesCode and
>>EfiBootServicesData are available for free memory after the first call
>>of ExitBootServices(). So such regions should be usable for kernel on
>>spec basis.
>>
>>In x86, however, we have some workaround for broken firmware, where we
>>keep such regions reserved until SetVirtualAddressMap() is done.
>>See the following code in should_map_region():
>>
>> static bool should_map_region(efi_memory_desc_t *md)
>> {
>> ...
>> /*
>> * Map boot services regions as a workaround for buggy
>> * firmware that accesses them even when they shouldn't.
>> *
>> * See efi_{reserve,free}_boot_services().
>> */
>> if (md->type == EFI_BOOT_SERVICES_CODE ||
>> md->type == EFI_BOOT_SERVICES_DATA)
>> return false;
>>
>>This workaround suppressed a boot crash, but potential issues still
>>remain because no one prevents the regions from overlapping with kernel
>>image by KASLR.
>>
>>So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
>>chosen as kernel memory for the workaround to work fine.
>>
>>Signed-off-by: Naoya Horiguchi <[email protected]>
>>---
>> arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++++++++++++++++---------
>> 1 file changed, 32 insertions(+), 9 deletions(-)
>>
>>diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
>>index 94f08fd375ae..f43fed0441a6 100644
>>--- a/arch/x86/boot/compressed/kaslr.c
>>+++ b/arch/x86/boot/compressed/kaslr.c
>>@@ -563,7 +563,8 @@ static void process_mem_region(struct mem_vector *entry,
>> /* Marks if efi mirror regions have been found and handled. */
>> static bool efi_mirror_found;
>>
>>-static void process_efi_entry(unsigned long minimum, unsigned long image_size)
>>+/* Returns true if we really enter efi memmap walk, otherwise returns false. */
>>+static bool process_efi_entry(unsigned long minimum, unsigned long image_size)
>> {
>> struct efi_info *e = &boot_params->efi_info;
>> struct mem_vector region;
>>@@ -577,13 +578,13 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
>> signature = (char *)&boot_params->efi_info.efi_loader_signature;
>> if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
>> strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
>>- return;
>>+ return false;
>>
>> #ifdef CONFIG_X86_32
>> /* Can't handle data above 4GB at this time */
>> if (e->efi_memmap_hi) {
>> warn("Memory map is above 4GB, EFI should be disabled.\n");
>>- return;
>>+ return false;
>> }
>> pmap = e->efi_memmap;
>> #else
>>@@ -593,13 +594,36 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
>> nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
>> for (i = 0; i < nr_desc; i++) {
>> md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
>>- if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
>>- region.start = md->phys_addr;
>>- region.size = md->num_pages << EFI_PAGE_SHIFT;
>>- process_mem_region(&region, minimum, image_size);
>>+ if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
>> efi_mirror_found = true;
>>+ }
>
>Hi Horiguchi-san,
>
>Sorry for one more suggestion,
>How about add:
> if (!efi_mirror_found)
> return false;
>at this place, between the two cycles.
>
>Because if there are no mirror regions found, I think we can return
>directly and go to walk the e820 entries.
>I don't know whether my understanding is right.

Sorry for disturbing, it's my misunderstanding.
My suggestion is useless.

Since efi entries have been walked, we can use them directly,
no need to walk e820 entries again.

The logic of your codes is good.
Sorry again.

Thanks,
Chao Fan

>
>Thanks,
>Chao Fan
>
>>+
>>+ for (i = 0; i < nr_desc; i++) {
>>+ md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
>>+
>>+ /*
>>+ * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
>>+ * services regions could be accessed after ExitBootServices()
>>+ * due to the workaround for buggy firmware.
>>+ */
>>+ if (!(md->type == EFI_LOADER_CODE ||
>>+ md->type == EFI_LOADER_DATA ||
>>+ md->type == EFI_CONVENTIONAL_MEMORY))
>>+ continue;
>>+
>>+ if (efi_mirror_found &&
>>+ !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
>>+ continue;
>>+
>>+ region.start = md->phys_addr;
>>+ region.size = md->num_pages << EFI_PAGE_SHIFT;
>>+ process_mem_region(&region, minimum, image_size);
>>+ if (slot_area_index == MAX_SLOT_AREA) {
>>+ debug_putstr("Aborted EFI scan (slot_areas full)!\n");
>>+ break;
>> }
>> }
>>+ return true;
>> }
>>
>> static void process_e820_entry(unsigned long minimum, unsigned long image_size)
>>@@ -637,8 +661,7 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
>> minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
>>
>> #ifdef CONFIG_EFI
>>- process_efi_entry(minimum, image_size);
>>- if (efi_mirror_found)
>>+ if (process_efi_entry(minimum, image_size))
>> return slots_fetch_random();
>> #endif
>>
>>--
>>2.7.4
>>
>>
>>


2017-07-06 14:57:32

by Matt Fleming

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Thu, 06 Jul, at 08:31:07AM, Naoya Horiguchi wrote:
>
> KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
> e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
> EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
> UEFI spec, all memory regions marked as EfiBootServicesCode and
> EfiBootServicesData are available for free memory after the first call
> of ExitBootServices(). So such regions should be usable for kernel on
> spec basis.
>
> In x86, however, we have some workaround for broken firmware, where we
> keep such regions reserved until SetVirtualAddressMap() is done.
> See the following code in should_map_region():
>
> static bool should_map_region(efi_memory_desc_t *md)
> {
> ...
> /*
> * Map boot services regions as a workaround for buggy
> * firmware that accesses them even when they shouldn't.
> *
> * See efi_{reserve,free}_boot_services().
> */
> if (md->type == EFI_BOOT_SERVICES_CODE ||
> md->type == EFI_BOOT_SERVICES_DATA)
> return false;
>
> This workaround suppressed a boot crash, but potential issues still
> remain because no one prevents the regions from overlapping with kernel
> image by KASLR.
>
> So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
> chosen as kernel memory for the workaround to work fine.
>
> Signed-off-by: Naoya Horiguchi <[email protected]>
> ---
> arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++++++++++++++++---------
> 1 file changed, 32 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index 94f08fd375ae..f43fed0441a6 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -563,7 +563,8 @@ static void process_mem_region(struct mem_vector *entry,
> /* Marks if efi mirror regions have been found and handled. */
> static bool efi_mirror_found;
>
> -static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> +/* Returns true if we really enter efi memmap walk, otherwise returns false. */
> +static bool process_efi_entry(unsigned long minimum, unsigned long image_size)
> {
> struct efi_info *e = &boot_params->efi_info;
> struct mem_vector region;
> @@ -577,13 +578,13 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> signature = (char *)&boot_params->efi_info.efi_loader_signature;
> if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
> strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
> - return;
> + return false;
>
> #ifdef CONFIG_X86_32
> /* Can't handle data above 4GB at this time */
> if (e->efi_memmap_hi) {
> warn("Memory map is above 4GB, EFI should be disabled.\n");
> - return;
> + return false;
> }
> pmap = e->efi_memmap;
> #else
> @@ -593,13 +594,36 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
> nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
> for (i = 0; i < nr_desc; i++) {
> md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> - if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
> - region.start = md->phys_addr;
> - region.size = md->num_pages << EFI_PAGE_SHIFT;
> - process_mem_region(&region, minimum, image_size);
> + if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
> efi_mirror_found = true;
> + }
> +
> + for (i = 0; i < nr_desc; i++) {
> + md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> +
> + /*
> + * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
> + * services regions could be accessed after ExitBootServices()
> + * due to the workaround for buggy firmware.
> + */
> + if (!(md->type == EFI_LOADER_CODE ||
> + md->type == EFI_LOADER_DATA ||
> + md->type == EFI_CONVENTIONAL_MEMORY))
> + continue;

Wouldn't it make more sense to *only* use EFI_CONVENTIONAL_MEMORY?

You can't re-use EFI_LOADER_* regions because the kaslr code is run so
early in boot that you've no idea if data the kernel will need is in
those EFI_LOADER_* regions.

For example, we pass struct setup_data objects inside of
EFI_LOADER_DATA regions.

2017-07-07 03:08:04

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On 07/06/17 at 03:57pm, Matt Fleming wrote:
> On Thu, 06 Jul, at 08:31:07AM, Naoya Horiguchi wrote:
> > + for (i = 0; i < nr_desc; i++) {
> > + md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> > +
> > + /*
> > + * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
> > + * services regions could be accessed after ExitBootServices()
> > + * due to the workaround for buggy firmware.
> > + */
> > + if (!(md->type == EFI_LOADER_CODE ||
> > + md->type == EFI_LOADER_DATA ||
> > + md->type == EFI_CONVENTIONAL_MEMORY))
> > + continue;
>
> Wouldn't it make more sense to *only* use EFI_CONVENTIONAL_MEMORY?
>
> You can't re-use EFI_LOADER_* regions because the kaslr code is run so
> early in boot that you've no idea if data the kernel will need is in
> those EFI_LOADER_* regions.
>
> For example, we pass struct setup_data objects inside of
> EFI_LOADER_DATA regions.

It doesn't matter because we have tried to avoid those memory setup_data
resides in in mem_avoid_overlap(). Here discarding EFI_LOADER_* could
discard the whole regions while setup_data could occupy small part of
them.

2017-07-07 06:13:20

by Naoya Horiguchi

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Fri, Jul 07, 2017 at 11:07:59AM +0800, Baoquan He wrote:
> On 07/06/17 at 03:57pm, Matt Fleming wrote:
> > On Thu, 06 Jul, at 08:31:07AM, Naoya Horiguchi wrote:
> > > + for (i = 0; i < nr_desc; i++) {
> > > + md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> > > +
> > > + /*
> > > + * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
> > > + * services regions could be accessed after ExitBootServices()
> > > + * due to the workaround for buggy firmware.
> > > + */
> > > + if (!(md->type == EFI_LOADER_CODE ||
> > > + md->type == EFI_LOADER_DATA ||
> > > + md->type == EFI_CONVENTIONAL_MEMORY))
> > > + continue;
> >
> > Wouldn't it make more sense to *only* use EFI_CONVENTIONAL_MEMORY?
> >
> > You can't re-use EFI_LOADER_* regions because the kaslr code is run so
> > early in boot that you've no idea if data the kernel will need is in
> > those EFI_LOADER_* regions.
> >
> > For example, we pass struct setup_data objects inside of
> > EFI_LOADER_DATA regions.
>
> It doesn't matter because we have tried to avoid those memory setup_data
> resides in in mem_avoid_overlap(). Here discarding EFI_LOADER_* could
> discard the whole regions while setup_data could occupy small part of
> them.

Hi Matt, Baoquan,

I added these three checks to accept any regions corresponding to
E820_TYPE_RAM except EFI_BOOT_SERVICES_*, just thinking of that it's minimum
surprising. Baoquan gave a good justification on that, so I'll leave it
as-is in next version.

Thanks,
Naoya Horiguchi

2017-07-07 07:22:57

by Naoya Horiguchi

[permalink] [raw]
Subject: [PATCH v2 2/2] x86/efi: clean up dead code around efi_reserve_boot_services()

EFI_BOOT_SERVICES_{CODE|DATA} regions never overlap the kernel now,
so we can clean up the check in efi_reserve_boot_services().

Signed-off-by: Naoya Horiguchi <[email protected]>
---
arch/x86/platform/efi/quirks.c | 23 +----------------------
1 file changed, 1 insertion(+), 22 deletions(-)

diff --git next-20170705/arch/x86/platform/efi/quirks.c next-20170705_patched/arch/x86/platform/efi/quirks.c
index 8a99a2e..191f6f7 100644
--- next-20170705/arch/x86/platform/efi/quirks.c
+++ next-20170705_patched/arch/x86/platform/efi/quirks.c
@@ -292,27 +292,6 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
efi_memmap_install(new_phys, num_entries);
}

-/*
- * Helper function for efi_reserve_boot_services() to figure out if we
- * can free regions in efi_free_boot_services().
- *
- * Use this function to ensure we do not free regions owned by somebody
- * else. We must only reserve (and then free) regions:
- *
- * - Not within any part of the kernel
- * - Not the BIOS reserved area (E820_TYPE_RESERVED, E820_TYPE_NVS, etc)
- */
-static bool can_free_region(u64 start, u64 size)
-{
- if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end))
- return false;
-
- if (!e820__mapped_all(start, start+size, E820_TYPE_RAM))
- return false;
-
- return true;
-}
-
void __init efi_reserve_boot_services(void)
{
efi_memory_desc_t *md;
@@ -350,7 +329,7 @@ void __init efi_reserve_boot_services(void)
* one else cares about it. We own it and can
* free it later.
*/
- if (can_free_region(start, size))
+ if (e820__mapped_all(start, start+size, E820_TYPE_RAM))
continue;
}

--
2.7.0

2017-07-07 07:22:53

by Naoya Horiguchi

[permalink] [raw]
Subject: [PATCH v2 1/2] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
UEFI spec, all memory regions marked as EfiBootServicesCode and
EfiBootServicesData are available for free memory after the first call
of ExitBootServices(). So such regions should be usable for kernel on
spec basis.

In x86, however, we have some workaround for broken firmware, where we
keep such regions reserved until SetVirtualAddressMap() is done.
See the following code in should_map_region():

static bool should_map_region(efi_memory_desc_t *md)
{
...
/*
* Map boot services regions as a workaround for buggy
* firmware that accesses them even when they shouldn't.
*
* See efi_{reserve,free}_boot_services().
*/
if (md->type == EFI_BOOT_SERVICES_CODE ||
md->type == EFI_BOOT_SERVICES_DATA)
return false;

This workaround suppressed a boot crash, but potential issues still
remain because no one prevents the regions from overlapping with kernel
image by KASLR.

So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
chosen as kernel memory for the workaround to work fine.

Signed-off-by: Naoya Horiguchi <[email protected]>
---
v1 -> v2:
- switch efi_mirror_found to local variable
- insert break when EFI_MEMORY_MORE_RELIABLE found
---
arch/x86/boot/compressed/kaslr.c | 46 +++++++++++++++++++++++++++++-----------
1 file changed, 34 insertions(+), 12 deletions(-)

diff --git next-20170705/arch/x86/boot/compressed/kaslr.c next-20170705_patched/arch/x86/boot/compressed/kaslr.c
index 94f08fd..29f5c4e 100644
--- next-20170705/arch/x86/boot/compressed/kaslr.c
+++ next-20170705_patched/arch/x86/boot/compressed/kaslr.c
@@ -560,10 +560,8 @@ static void process_mem_region(struct mem_vector *entry,
}
}

-/* Marks if efi mirror regions have been found and handled. */
-static bool efi_mirror_found;
-
-static void process_efi_entry(unsigned long minimum, unsigned long image_size)
+/* Returns true if we really enter efi memmap walk, otherwise returns false. */
+static bool process_efi_entry(unsigned long minimum, unsigned long image_size)
{
struct efi_info *e = &boot_params->efi_info;
struct mem_vector region;
@@ -572,18 +570,18 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
char *signature;
u32 nr_desc;
int i;
-
+ bool efi_mirror_found;

signature = (char *)&boot_params->efi_info.efi_loader_signature;
if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
- return;
+ return false;

#ifdef CONFIG_X86_32
/* Can't handle data above 4GB at this time */
if (e->efi_memmap_hi) {
warn("Memory map is above 4GB, EFI should be disabled.\n");
- return;
+ return false;
}
pmap = e->efi_memmap;
#else
@@ -594,12 +592,37 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
for (i = 0; i < nr_desc; i++) {
md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
- region.start = md->phys_addr;
- region.size = md->num_pages << EFI_PAGE_SHIFT;
- process_mem_region(&region, minimum, image_size);
efi_mirror_found = true;
+ break;
}
}
+
+ for (i = 0; i < nr_desc; i++) {
+ md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
+
+ /*
+ * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
+ * services regions could be accessed after ExitBootServices()
+ * due to the workaround for buggy firmware.
+ */
+ if (!(md->type == EFI_LOADER_CODE ||
+ md->type == EFI_LOADER_DATA ||
+ md->type == EFI_CONVENTIONAL_MEMORY))
+ continue;
+
+ if (efi_mirror_found &&
+ !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
+ continue;
+
+ region.start = md->phys_addr;
+ region.size = md->num_pages << EFI_PAGE_SHIFT;
+ process_mem_region(&region, minimum, image_size);
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted EFI scan (slot_areas full)!\n");
+ break;
+ }
+ }
+ return true;
}

static void process_e820_entry(unsigned long minimum, unsigned long image_size)
@@ -637,8 +660,7 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);

#ifdef CONFIG_EFI
- process_efi_entry(minimum, image_size);
- if (efi_mirror_found)
+ if (process_efi_entry(minimum, image_size))
return slots_fetch_random();
#endif

--
2.7.0

2017-07-07 10:57:03

by Matt Fleming

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Fri, 07 Jul, at 11:07:59AM, Baoquan He wrote:
> On 07/06/17 at 03:57pm, Matt Fleming wrote:
> > On Thu, 06 Jul, at 08:31:07AM, Naoya Horiguchi wrote:
> > > + for (i = 0; i < nr_desc; i++) {
> > > + md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> > > +
> > > + /*
> > > + * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
> > > + * services regions could be accessed after ExitBootServices()
> > > + * due to the workaround for buggy firmware.
> > > + */
> > > + if (!(md->type == EFI_LOADER_CODE ||
> > > + md->type == EFI_LOADER_DATA ||
> > > + md->type == EFI_CONVENTIONAL_MEMORY))
> > > + continue;
> >
> > Wouldn't it make more sense to *only* use EFI_CONVENTIONAL_MEMORY?
> >
> > You can't re-use EFI_LOADER_* regions because the kaslr code is run so
> > early in boot that you've no idea if data the kernel will need is in
> > those EFI_LOADER_* regions.
> >
> > For example, we pass struct setup_data objects inside of
> > EFI_LOADER_DATA regions.
>
> It doesn't matter because we have tried to avoid those memory setup_data
> resides in in mem_avoid_overlap(). Here discarding EFI_LOADER_* could
> discard the whole regions while setup_data could occupy small part of
> them.

What about the GDT that we allocate in the x86 EFI boot stub as
EFI_LOADER_DATA? Are there functions to avoid that too?

What about any future uses we add? Who's going to remember to patch
the kaslr code which now duplicates some of the EFI memory map logic?

All of these problems can avoided if you just stick with
EFI_CONVENTIONAL_MEMORY.

Honestly, how much memory do we expect to waste if we ignore
EFI_LOADER_* regions?

Also, the fact that you're referencing EFI-specific boot quirks in the
kaslr code should be a massive red flag that you're playing with the
innards of the EFI subsystem.

2017-07-07 10:58:18

by Matt Fleming

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Fri, 07 Jul, at 06:11:24AM, Naoya Horiguchi wrote:
> On Fri, Jul 07, 2017 at 11:07:59AM +0800, Baoquan He wrote:
> > On 07/06/17 at 03:57pm, Matt Fleming wrote:
> > > On Thu, 06 Jul, at 08:31:07AM, Naoya Horiguchi wrote:
> > > > + for (i = 0; i < nr_desc; i++) {
> > > > + md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> > > > +
> > > > + /*
> > > > + * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
> > > > + * services regions could be accessed after ExitBootServices()
> > > > + * due to the workaround for buggy firmware.
> > > > + */
> > > > + if (!(md->type == EFI_LOADER_CODE ||
> > > > + md->type == EFI_LOADER_DATA ||
> > > > + md->type == EFI_CONVENTIONAL_MEMORY))
> > > > + continue;
> > >
> > > Wouldn't it make more sense to *only* use EFI_CONVENTIONAL_MEMORY?
> > >
> > > You can't re-use EFI_LOADER_* regions because the kaslr code is run so
> > > early in boot that you've no idea if data the kernel will need is in
> > > those EFI_LOADER_* regions.
> > >
> > > For example, we pass struct setup_data objects inside of
> > > EFI_LOADER_DATA regions.
> >
> > It doesn't matter because we have tried to avoid those memory setup_data
> > resides in in mem_avoid_overlap(). Here discarding EFI_LOADER_* could
> > discard the whole regions while setup_data could occupy small part of
> > them.
>
> Hi Matt, Baoquan,
>
> I added these three checks to accept any regions corresponding to
> E820_TYPE_RAM except EFI_BOOT_SERVICES_*, just thinking of that it's minimum
> surprising. Baoquan gave a good justification on that, so I'll leave it
> as-is in next version.

I disagree. The least surprising option would be to use the region
type that everyone (boot loader, kernel, firmware) agrees is free:
EFI_CONVENTIONAL_MEMORY.

2017-07-09 10:44:22

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On 07/07/17 at 11:56am, Matt Fleming wrote:
> On Fri, 07 Jul, at 11:07:59AM, Baoquan He wrote:
> > On 07/06/17 at 03:57pm, Matt Fleming wrote:
> > > On Thu, 06 Jul, at 08:31:07AM, Naoya Horiguchi wrote:
> > > > + for (i = 0; i < nr_desc; i++) {
> > > > + md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> > > > +
> > > > + /*
> > > > + * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
> > > > + * services regions could be accessed after ExitBootServices()
> > > > + * due to the workaround for buggy firmware.
> > > > + */
> > > > + if (!(md->type == EFI_LOADER_CODE ||
> > > > + md->type == EFI_LOADER_DATA ||
> > > > + md->type == EFI_CONVENTIONAL_MEMORY))
> > > > + continue;
> > >
> > > Wouldn't it make more sense to *only* use EFI_CONVENTIONAL_MEMORY?
> > >
> > > You can't re-use EFI_LOADER_* regions because the kaslr code is run so
> > > early in boot that you've no idea if data the kernel will need is in
> > > those EFI_LOADER_* regions.
> > >
> > > For example, we pass struct setup_data objects inside of
> > > EFI_LOADER_DATA regions.
> >
> > It doesn't matter because we have tried to avoid those memory setup_data
> > resides in in mem_avoid_overlap(). Here discarding EFI_LOADER_* could
> > discard the whole regions while setup_data could occupy small part of
> > them.
>
> What about the GDT that we allocate in the x86 EFI boot stub as
> EFI_LOADER_DATA? Are there functions to avoid that too?

This is a very good question. For the current e820 processing, we don't
avoid GDT allocated in x86 EFI STUB because we have no information about
GDT in EFI STUB. You can see setup_e820() in boot/compressed/eboot.c
grabs EFI_LOADER_* regions as E820_TYPE_RAM. Now the GDT which is built
in EFI STUB code will live till kernel is ready to build its onw gdt(in
kernel/head_64.S). After that it become useless and can be reclaimed for
reusing. I believe Naoya must have read boot/compressed/eboot.c and
found setup_e820() code, then added EFI_LOADER_* for kaslr usage.

Previously I thought e820 should take the precedence to be processed
on uefi system because continuous efi memory regions will be merged into
e820 regions. Using e820 can avoid those small efi regions or the left
part of efi regions being discarded directly when they are smaller than
kernel image size. Now considering this GDT in efi stub issue, we have to
try efi regions first on uefi system. Otherwise it may cause problem that
kernel could be decompressed onto the GDT tables of efi stub.

In fact, I am wondering if we can reuse the gdt table which is built
before entering into long mode in boot/compressed/head_64.S, but not
allocate memory for GDT in efi stub. The thing is 32bit system doesn't
have this gdt table in boot compressing stage since it has one before
entering into protection mode. Just personal thought.
>
> What about any future uses we add? Who's going to remember to patch
> the kaslr code which now duplicates some of the EFI memory map logic?
>
> All of these problems can avoided if you just stick with
> EFI_CONVENTIONAL_MEMORY.

Below is the dmesg with 'efi=debug' adding on my ovmf uefi kvm guest.
Since uefi could do a lot of thing when loading OS, E.g loading into any
kind of storage driver application, firmware usually reserve a chunk of
memory of hunderes of Mega Bytes. Like this one:

******
[ +0.000000] efi: mem12: [Loader Data | ||WB|WT|WC|UC] range=[0x000000005c8eb000-0x000000007bfbdfff] (502MB)
******

We can't say it's a big deal, but 500MB is also not so trival that we
can easily ignore it without any consideration. Just uefi spec doesn't
define the limitation of Loader memory and Conventional memory and if
Conventional memory has to be present. With my understanding, there won't
be any problem if only Loader memory exists.

Further more, kaslr is not a precise searching job, no specific address
has to be positioned. Even it's OK that the physical address
randomization failed to find a new address randomly. So it's fine to me
that we don't take EFI_LOADER_* memory into consideration for kaslr. BUT
we need make this clear that why not, and if we can do anyting to make
it better.

[ +0.000000] efi: SMBIOS=0x7fed5000 ACPI=0x7ff03000 ACPI 2.0=0x7ff03014 MEMATTR=0x7ea50218
[ +0.000000] efi: mem00: [Boot Code | | | | | | | | |WB|WT|WC|UC] range=[0x0000000000000000-0x0000000000000fff] (0MB)
[ +0.000000] efi: mem01: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x0000000000001000-0x0000000000001fff] (0MB)
[ +0.000000] efi: mem02: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000000002000-0x000000000009ffff] (0MB)
[ +0.000000] efi: mem03: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000000100000-0x0000000000805fff] (7MB)
[ +0.000000] efi: mem04: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x0000000000806000-0x0000000000806fff] (0MB)
[ +0.000000] efi: mem05: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000000807000-0x000000000081ffff] (0MB)
[ +0.000000] efi: mem06: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x0000000000820000-0x00000000012fffff] (10MB)
[ +0.000000] efi: mem07: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000001300000-0x0000000001ffffff] (13MB)
[ +0.000000] efi: mem08: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x0000000002000000-0x0000000003614fff] (22MB)
[ +0.000000] efi: mem09: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000003615000-0x000000003d6b3fff] (928MB)
[ +0.000000] efi: mem10: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000003d6b4000-0x000000003fffffff] (41MB)
[ +0.000000] efi: mem11: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000040000000-0x000000005c8eafff] (456MB)
[ +0.000000] efi: mem12: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000005c8eb000-0x000000007bfbdfff] (502MB)
[ +0.000000] efi: mem13: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007bfbe000-0x000000007bfddfff] (0MB)
[ +0.000000] efi: mem14: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007bfde000-0x000000007e6effff] (39MB)
[ +0.000000] efi: mem15: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007e6f0000-0x000000007e7e3fff] (0MB)
[ +0.000000] efi: mem16: [Loader Code | | | | | | | | |WB|WT|WC|UC] range=[0x000000007e7e4000-0x000000007e90afff] (1MB)
[ +0.000000] efi: mem17: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007e90b000-0x000000007e914fff] (0MB)
[ +0.000000] efi: mem18: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007e915000-0x000000007ea46fff] (1MB)
[ +0.000000] efi: mem19: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ea47000-0x000000007eb8dfff] (1MB)
[ +0.000000] efi: mem20: [Boot Code | | | | | | | | |WB|WT|WC|UC] range=[0x000000007eb8e000-0x000000007edd0fff] (2MB)
[ +0.000000] efi: mem21: [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007edd1000-0x000000007edd5fff] (0MB)
[ +0.000000] efi: mem22: [Runtime Code |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007edd6000-0x000000007edddfff] (0MB)
[ +0.000000] efi: mem23: [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007edde000-0x000000007ede2fff] (0MB)
[ +0.000000] efi: mem24: [Runtime Code |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007ede3000-0x000000007ede8fff] (0MB)
[ +0.000000] efi: mem25: [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007ede9000-0x000000007ee12fff] (0MB)
[ +0.000000] efi: mem26: [Runtime Code |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007ee13000-0x000000007ee23fff] (0MB)
[ +0.000000] efi: mem27: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ee24000-0x000000007f46dfff] (6MB)
[ +0.000000] efi: mem28: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007f46e000-0x000000007f477fff] (0MB)
[ +0.000000] efi: mem29: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007f478000-0x000000007fd23fff] (8MB)
[ +0.000000] efi: mem30: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007fd24000-0x000000007fd24fff] (0MB)
[ +0.000000] efi: mem31: [Boot Code | | | | | | | | |WB|WT|WC|UC] range=[0x000000007fd25000-0x000000007fea3fff] (1MB)
[ +0.000000] efi: mem32: [Runtime Code |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007fea4000-0x000000007fed3fff] (0MB)
[ +0.000000] efi: mem33: [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007fed4000-0x000000007fef7fff] (0MB)
[ +0.000000] efi: mem34: [Reserved | | | | | | | | |WB|WT|WC|UC] range=[0x000000007fef8000-0x000000007fefbfff] (0MB)
[ +0.000000] efi: mem35: [ACPI Reclaim Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007fefc000-0x000000007ff03fff] (0MB)
[ +0.000000] efi: mem36: [ACPI Memory NVS | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ff04000-0x000000007ff07fff] (0MB)
[ +0.000000] efi: mem37: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ff08000-0x000000007ff6afff] (0MB)
[ +0.000000] efi: mem38: [Boot Code | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ff6b000-0x000000007ff95fff] (0MB)
[ +0.000000] efi: mem39: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ff96000-0x000000007ffa6fff] (0MB)
[ +0.000000] efi: mem40: [Boot Code | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ffa7000-0x000000007ffcffff] (0MB)
[ +0.000000] efi: mem41: [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007ffd0000-0x000000007ffeffff] (0MB)
[ +0.000000] efi: mem42: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007fff0000-0x000000007fffffff] (0MB)
[ +0.000000] efi: mem43: [Runtime Data |RUN| | | | | | || | | |UC] range=[0x00000000ffe00000-0x00000000ffffffff] (2MB)

>
> Honestly, how much memory do we expect to waste if we ignore
> EFI_LOADER_* regions?
>
> Also, the fact that you're referencing EFI-specific boot quirks in the
> kaslr code should be a massive red flag that you're playing with the
> innards of the EFI subsystem.

Questions has been answered in above words.

2017-07-09 14:27:57

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On 07/09/17 at 06:44pm, Baoquan He wrote:
> On 07/07/17 at 11:56am, Matt Fleming wrote:
> > On Fri, 07 Jul, at 11:07:59AM, Baoquan He wrote:
> > > On 07/06/17 at 03:57pm, Matt Fleming wrote:
> > > > On Thu, 06 Jul, at 08:31:07AM, Naoya Horiguchi wrote:
> > > > > + for (i = 0; i < nr_desc; i++) {
> > > > > + md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> > > > > +
> > > > > + /*
> > > > > + * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
> > > > > + * services regions could be accessed after ExitBootServices()
> > > > > + * due to the workaround for buggy firmware.
> > > > > + */
> > > > > + if (!(md->type == EFI_LOADER_CODE ||
> > > > > + md->type == EFI_LOADER_DATA ||
> > > > > + md->type == EFI_CONVENTIONAL_MEMORY))
> > > > > + continue;
> > > >
> > > > Wouldn't it make more sense to *only* use EFI_CONVENTIONAL_MEMORY?
> > > >
> > > > You can't re-use EFI_LOADER_* regions because the kaslr code is run so
> > > > early in boot that you've no idea if data the kernel will need is in
> > > > those EFI_LOADER_* regions.
> > > >
> > > > For example, we pass struct setup_data objects inside of
> > > > EFI_LOADER_DATA regions.
> > >
> > > It doesn't matter because we have tried to avoid those memory setup_data
> > > resides in in mem_avoid_overlap(). Here discarding EFI_LOADER_* could
> > > discard the whole regions while setup_data could occupy small part of
> > > them.
> >
> > What about the GDT that we allocate in the x86 EFI boot stub as
> > EFI_LOADER_DATA? Are there functions to avoid that too?
>
> This is a very good question. For the current e820 processing, we don't
> avoid GDT allocated in x86 EFI STUB because we have no information about
> GDT in EFI STUB. You can see setup_e820() in boot/compressed/eboot.c

I was wrong here, we can know gdt information by sgdt. Surely GDT is
just an example, as Matt mentioned, there could be other stuffs we need
avoid, or future change of uefi.

> grabs EFI_LOADER_* regions as E820_TYPE_RAM. Now the GDT which is built
> in EFI STUB code will live till kernel is ready to build its onw gdt(in
> kernel/head_64.S). After that it become useless and can be reclaimed for
> reusing. I believe Naoya must have read boot/compressed/eboot.c and
> found setup_e820() code, then added EFI_LOADER_* for kaslr usage.
>
> Previously I thought e820 should take the precedence to be processed
> on uefi system because continuous efi memory regions will be merged into
> e820 regions. Using e820 can avoid those small efi regions or the left
> part of efi regions being discarded directly when they are smaller than
> kernel image size. Now considering this GDT in efi stub issue, we have to
> try efi regions first on uefi system. Otherwise it may cause problem that
> kernel could be decompressed onto the GDT tables of efi stub.
>
> In fact, I am wondering if we can reuse the gdt table which is built
> before entering into long mode in boot/compressed/head_64.S, but not
> allocate memory for GDT in efi stub. The thing is 32bit system doesn't
> have this gdt table in boot compressing stage since it has one before
> entering into protection mode. Just personal thought.
> >
> > What about any future uses we add? Who's going to remember to patch
> > the kaslr code which now duplicates some of the EFI memory map logic?
> >
> > All of these problems can avoided if you just stick with
> > EFI_CONVENTIONAL_MEMORY.
>
> Below is the dmesg with 'efi=debug' adding on my ovmf uefi kvm guest.
> Since uefi could do a lot of thing when loading OS, E.g loading into any
> kind of storage driver application, firmware usually reserve a chunk of
> memory of hunderes of Mega Bytes. Like this one:
>
> ******
> [ +0.000000] efi: mem12: [Loader Data | ||WB|WT|WC|UC] range=[0x000000005c8eb000-0x000000007bfbdfff] (502MB)
> ******
>
> We can't say it's a big deal, but 500MB is also not so trival that we
> can easily ignore it without any consideration. Just uefi spec doesn't
> define the limitation of Loader memory and Conventional memory and if
> Conventional memory has to be present. With my understanding, there won't
> be any problem if only Loader memory exists.
>
> Further more, kaslr is not a precise searching job, no specific address
> has to be positioned. Even it's OK that the physical address
> randomization failed to find a new address randomly. So it's fine to me
> that we don't take EFI_LOADER_* memory into consideration for kaslr. BUT
> we need make this clear that why not, and if we can do anyting to make
> it better.
>
> [ +0.000000] efi: SMBIOS=0x7fed5000 ACPI=0x7ff03000 ACPI 2.0=0x7ff03014 MEMATTR=0x7ea50218
> [ +0.000000] efi: mem00: [Boot Code | | | | | | | | |WB|WT|WC|UC] range=[0x0000000000000000-0x0000000000000fff] (0MB)
> [ +0.000000] efi: mem01: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x0000000000001000-0x0000000000001fff] (0MB)
> [ +0.000000] efi: mem02: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000000002000-0x000000000009ffff] (0MB)
> [ +0.000000] efi: mem03: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000000100000-0x0000000000805fff] (7MB)
> [ +0.000000] efi: mem04: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x0000000000806000-0x0000000000806fff] (0MB)
> [ +0.000000] efi: mem05: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000000807000-0x000000000081ffff] (0MB)
> [ +0.000000] efi: mem06: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x0000000000820000-0x00000000012fffff] (10MB)
> [ +0.000000] efi: mem07: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000001300000-0x0000000001ffffff] (13MB)
> [ +0.000000] efi: mem08: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x0000000002000000-0x0000000003614fff] (22MB)
> [ +0.000000] efi: mem09: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000003615000-0x000000003d6b3fff] (928MB)
> [ +0.000000] efi: mem10: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000003d6b4000-0x000000003fffffff] (41MB)
> [ +0.000000] efi: mem11: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x0000000040000000-0x000000005c8eafff] (456MB)
> [ +0.000000] efi: mem12: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000005c8eb000-0x000000007bfbdfff] (502MB)
> [ +0.000000] efi: mem13: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007bfbe000-0x000000007bfddfff] (0MB)
> [ +0.000000] efi: mem14: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007bfde000-0x000000007e6effff] (39MB)
> [ +0.000000] efi: mem15: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007e6f0000-0x000000007e7e3fff] (0MB)
> [ +0.000000] efi: mem16: [Loader Code | | | | | | | | |WB|WT|WC|UC] range=[0x000000007e7e4000-0x000000007e90afff] (1MB)
> [ +0.000000] efi: mem17: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007e90b000-0x000000007e914fff] (0MB)
> [ +0.000000] efi: mem18: [Loader Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007e915000-0x000000007ea46fff] (1MB)
> [ +0.000000] efi: mem19: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ea47000-0x000000007eb8dfff] (1MB)
> [ +0.000000] efi: mem20: [Boot Code | | | | | | | | |WB|WT|WC|UC] range=[0x000000007eb8e000-0x000000007edd0fff] (2MB)
> [ +0.000000] efi: mem21: [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007edd1000-0x000000007edd5fff] (0MB)
> [ +0.000000] efi: mem22: [Runtime Code |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007edd6000-0x000000007edddfff] (0MB)
> [ +0.000000] efi: mem23: [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007edde000-0x000000007ede2fff] (0MB)
> [ +0.000000] efi: mem24: [Runtime Code |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007ede3000-0x000000007ede8fff] (0MB)
> [ +0.000000] efi: mem25: [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007ede9000-0x000000007ee12fff] (0MB)
> [ +0.000000] efi: mem26: [Runtime Code |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007ee13000-0x000000007ee23fff] (0MB)
> [ +0.000000] efi: mem27: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ee24000-0x000000007f46dfff] (6MB)
> [ +0.000000] efi: mem28: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007f46e000-0x000000007f477fff] (0MB)
> [ +0.000000] efi: mem29: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007f478000-0x000000007fd23fff] (8MB)
> [ +0.000000] efi: mem30: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007fd24000-0x000000007fd24fff] (0MB)
> [ +0.000000] efi: mem31: [Boot Code | | | | | | | | |WB|WT|WC|UC] range=[0x000000007fd25000-0x000000007fea3fff] (1MB)
> [ +0.000000] efi: mem32: [Runtime Code |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007fea4000-0x000000007fed3fff] (0MB)
> [ +0.000000] efi: mem33: [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007fed4000-0x000000007fef7fff] (0MB)
> [ +0.000000] efi: mem34: [Reserved | | | | | | | | |WB|WT|WC|UC] range=[0x000000007fef8000-0x000000007fefbfff] (0MB)
> [ +0.000000] efi: mem35: [ACPI Reclaim Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007fefc000-0x000000007ff03fff] (0MB)
> [ +0.000000] efi: mem36: [ACPI Memory NVS | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ff04000-0x000000007ff07fff] (0MB)
> [ +0.000000] efi: mem37: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ff08000-0x000000007ff6afff] (0MB)
> [ +0.000000] efi: mem38: [Boot Code | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ff6b000-0x000000007ff95fff] (0MB)
> [ +0.000000] efi: mem39: [Boot Data | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ff96000-0x000000007ffa6fff] (0MB)
> [ +0.000000] efi: mem40: [Boot Code | | | | | | | | |WB|WT|WC|UC] range=[0x000000007ffa7000-0x000000007ffcffff] (0MB)
> [ +0.000000] efi: mem41: [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] range=[0x000000007ffd0000-0x000000007ffeffff] (0MB)
> [ +0.000000] efi: mem42: [Conventional Memory| | | | | | | | |WB|WT|WC|UC] range=[0x000000007fff0000-0x000000007fffffff] (0MB)
> [ +0.000000] efi: mem43: [Runtime Data |RUN| | | | | | || | | |UC] range=[0x00000000ffe00000-0x00000000ffffffff] (2MB)
>
> >
> > Honestly, how much memory do we expect to waste if we ignore
> > EFI_LOADER_* regions?
> >
> > Also, the fact that you're referencing EFI-specific boot quirks in the
> > kaslr code should be a massive red flag that you're playing with the
> > innards of the EFI subsystem.
>
> Questions has been answered in above words.

2017-07-10 05:48:19

by Naoya Horiguchi

[permalink] [raw]
Subject: Re: [PATCH] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Fri, Jul 07, 2017 at 11:58:14AM +0100, Matt Fleming wrote:
> On Fri, 07 Jul, at 06:11:24AM, Naoya Horiguchi wrote:
> > On Fri, Jul 07, 2017 at 11:07:59AM +0800, Baoquan He wrote:
> > > On 07/06/17 at 03:57pm, Matt Fleming wrote:
> > > > On Thu, 06 Jul, at 08:31:07AM, Naoya Horiguchi wrote:
> > > > > + for (i = 0; i < nr_desc; i++) {
> > > > > + md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> > > > > +
> > > > > + /*
> > > > > + * EFI_BOOT_SERVICES_{CODE|DATA} are avoided because boot
> > > > > + * services regions could be accessed after ExitBootServices()
> > > > > + * due to the workaround for buggy firmware.
> > > > > + */
> > > > > + if (!(md->type == EFI_LOADER_CODE ||
> > > > > + md->type == EFI_LOADER_DATA ||
> > > > > + md->type == EFI_CONVENTIONAL_MEMORY))
> > > > > + continue;
> > > >
> > > > Wouldn't it make more sense to *only* use EFI_CONVENTIONAL_MEMORY?
> > > >
> > > > You can't re-use EFI_LOADER_* regions because the kaslr code is run so
> > > > early in boot that you've no idea if data the kernel will need is in
> > > > those EFI_LOADER_* regions.
> > > >
> > > > For example, we pass struct setup_data objects inside of
> > > > EFI_LOADER_DATA regions.
> > >
> > > It doesn't matter because we have tried to avoid those memory setup_data
> > > resides in in mem_avoid_overlap(). Here discarding EFI_LOADER_* could
> > > discard the whole regions while setup_data could occupy small part of
> > > them.
> >
> > Hi Matt, Baoquan,
> >
> > I added these three checks to accept any regions corresponding to
> > E820_TYPE_RAM except EFI_BOOT_SERVICES_*, just thinking of that it's minimum
> > surprising. Baoquan gave a good justification on that, so I'll leave it
> > as-is in next version.
>
> I disagree. The least surprising option would be to use the region
> type that everyone (boot loader, kernel, firmware) agrees is free:
> EFI_CONVENTIONAL_MEMORY.

OK, I will do it.

2017-07-10 05:51:45

by Naoya Horiguchi

[permalink] [raw]
Subject: [PATCH v3 1/2] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
UEFI spec, all memory regions marked as EfiBootServicesCode and
EfiBootServicesData are available for free memory after the first call
of ExitBootServices(). So such regions should be usable for kernel on
spec basis.

In x86, however, we have some workaround for broken firmware, where we
keep such regions reserved until SetVirtualAddressMap() is done.
See the following code in should_map_region():

static bool should_map_region(efi_memory_desc_t *md)
{
...
/*
* Map boot services regions as a workaround for buggy
* firmware that accesses them even when they shouldn't.
*
* See efi_{reserve,free}_boot_services().
*/
if (md->type == EFI_BOOT_SERVICES_CODE ||
md->type == EFI_BOOT_SERVICES_DATA)
return false;

This workaround suppressed a boot crash, but potential issues still
remain because no one prevents the regions from overlapping with kernel
image by KASLR.

So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
chosen as kernel memory for the workaround to work fine.

Signed-off-by: Naoya Horiguchi <[email protected]>
---
v2 -> v3:
- skip EFI_LOADER_CODE and EFI_LOADER_DATA in region scan

v1 -> v2:
- switch efi_mirror_found to local variable
- insert break when EFI_MEMORY_MORE_RELIABLE found
---
arch/x86/boot/compressed/kaslr.c | 44 +++++++++++++++++++++++++++++-----------
1 file changed, 32 insertions(+), 12 deletions(-)

diff --git next-20170705/arch/x86/boot/compressed/kaslr.c next-20170705_patched/arch/x86/boot/compressed/kaslr.c
index 94f08fd..44778e9 100644
--- next-20170705/arch/x86/boot/compressed/kaslr.c
+++ next-20170705_patched/arch/x86/boot/compressed/kaslr.c
@@ -560,10 +560,8 @@ static void process_mem_region(struct mem_vector *entry,
}
}

-/* Marks if efi mirror regions have been found and handled. */
-static bool efi_mirror_found;
-
-static void process_efi_entry(unsigned long minimum, unsigned long image_size)
+/* Returns true if we really enter efi memmap walk, otherwise returns false. */
+static bool process_efi_entry(unsigned long minimum, unsigned long image_size)
{
struct efi_info *e = &boot_params->efi_info;
struct mem_vector region;
@@ -572,18 +570,18 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
char *signature;
u32 nr_desc;
int i;
-
+ bool efi_mirror_found;

signature = (char *)&boot_params->efi_info.efi_loader_signature;
if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
- return;
+ return false;

#ifdef CONFIG_X86_32
/* Can't handle data above 4GB at this time */
if (e->efi_memmap_hi) {
warn("Memory map is above 4GB, EFI should be disabled.\n");
- return;
+ return false;
}
pmap = e->efi_memmap;
#else
@@ -594,12 +592,35 @@ static void process_efi_entry(unsigned long minimum, unsigned long image_size)
for (i = 0; i < nr_desc; i++) {
md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
- region.start = md->phys_addr;
- region.size = md->num_pages << EFI_PAGE_SHIFT;
- process_mem_region(&region, minimum, image_size);
efi_mirror_found = true;
+ break;
}
}
+
+ for (i = 0; i < nr_desc; i++) {
+ md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
+
+ /*
+ * According to spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
+ * available for kernel image, but we don't include them for
+ * the workaround for buggy firmware.
+ */
+ if (md->type != EFI_CONVENTIONAL_MEMORY)
+ continue;
+
+ if (efi_mirror_found &&
+ !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
+ continue;
+
+ region.start = md->phys_addr;
+ region.size = md->num_pages << EFI_PAGE_SHIFT;
+ process_mem_region(&region, minimum, image_size);
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted EFI scan (slot_areas full)!\n");
+ break;
+ }
+ }
+ return true;
}

static void process_e820_entry(unsigned long minimum, unsigned long image_size)
@@ -637,8 +658,7 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);

#ifdef CONFIG_EFI
- process_efi_entry(minimum, image_size);
- if (efi_mirror_found)
+ if (process_efi_entry(minimum, image_size))
return slots_fetch_random();
#endif

--
2.7.0

2017-07-10 05:51:54

by Naoya Horiguchi

[permalink] [raw]
Subject: [PATCH v3 2/2] x86/efi: clean up dead code around efi_reserve_boot_services()

EFI_BOOT_SERVICES_{CODE|DATA} regions never overlap the kernel now,
so we can clean up the check in efi_reserve_boot_services().

Signed-off-by: Naoya Horiguchi <[email protected]>
---
arch/x86/platform/efi/quirks.c | 23 +----------------------
1 file changed, 1 insertion(+), 22 deletions(-)

diff --git next-20170705/arch/x86/platform/efi/quirks.c next-20170705_patched/arch/x86/platform/efi/quirks.c
index 8a99a2e..191f6f7 100644
--- next-20170705/arch/x86/platform/efi/quirks.c
+++ next-20170705_patched/arch/x86/platform/efi/quirks.c
@@ -292,27 +292,6 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
efi_memmap_install(new_phys, num_entries);
}

-/*
- * Helper function for efi_reserve_boot_services() to figure out if we
- * can free regions in efi_free_boot_services().
- *
- * Use this function to ensure we do not free regions owned by somebody
- * else. We must only reserve (and then free) regions:
- *
- * - Not within any part of the kernel
- * - Not the BIOS reserved area (E820_TYPE_RESERVED, E820_TYPE_NVS, etc)
- */
-static bool can_free_region(u64 start, u64 size)
-{
- if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end))
- return false;
-
- if (!e820__mapped_all(start, start+size, E820_TYPE_RAM))
- return false;
-
- return true;
-}
-
void __init efi_reserve_boot_services(void)
{
efi_memory_desc_t *md;
@@ -350,7 +329,7 @@ void __init efi_reserve_boot_services(void)
* one else cares about it. We own it and can
* free it later.
*/
- if (can_free_region(start, size))
+ if (e820__mapped_all(start, start+size, E820_TYPE_RAM))
continue;
}

--
2.7.0

2017-07-24 13:17:18

by Matt Fleming

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Mon, 10 Jul, at 02:51:35PM, Naoya Horiguchi wrote:
> KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
> e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
> EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
> UEFI spec, all memory regions marked as EfiBootServicesCode and
> EfiBootServicesData are available for free memory after the first call
> of ExitBootServices(). So such regions should be usable for kernel on
> spec basis.
>
> In x86, however, we have some workaround for broken firmware, where we
> keep such regions reserved until SetVirtualAddressMap() is done.
> See the following code in should_map_region():
>
> static bool should_map_region(efi_memory_desc_t *md)
> {
> ...
> /*
> * Map boot services regions as a workaround for buggy
> * firmware that accesses them even when they shouldn't.
> *
> * See efi_{reserve,free}_boot_services().
> */
> if (md->type == EFI_BOOT_SERVICES_CODE ||
> md->type == EFI_BOOT_SERVICES_DATA)
> return false;
>
> This workaround suppressed a boot crash, but potential issues still
> remain because no one prevents the regions from overlapping with kernel
> image by KASLR.
>
> So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
> chosen as kernel memory for the workaround to work fine.
>
> Signed-off-by: Naoya Horiguchi <[email protected]>
> ---
> v2 -> v3:
> - skip EFI_LOADER_CODE and EFI_LOADER_DATA in region scan
>
> v1 -> v2:
> - switch efi_mirror_found to local variable
> - insert break when EFI_MEMORY_MORE_RELIABLE found
> ---
> arch/x86/boot/compressed/kaslr.c | 44 +++++++++++++++++++++++++++++-----------
> 1 file changed, 32 insertions(+), 12 deletions(-)

I think the patch content is fine, but please update the comments and
the changelog to say why EFI_CONVENTIONAL_MEMORY was chosen, i.e. that
it's the only memory type that we know to be free.

2017-07-24 13:20:54

by Matt Fleming

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] x86/efi: clean up dead code around efi_reserve_boot_services()

On Mon, 10 Jul, at 02:51:36PM, Naoya Horiguchi wrote:
> EFI_BOOT_SERVICES_{CODE|DATA} regions never overlap the kernel now,
> so we can clean up the check in efi_reserve_boot_services().
>
> Signed-off-by: Naoya Horiguchi <[email protected]>
> ---
> arch/x86/platform/efi/quirks.c | 23 +----------------------
> 1 file changed, 1 insertion(+), 22 deletions(-)

Is this true for kernels not using KASLR?

2017-07-25 06:22:31

by Naoya Horiguchi

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] x86/boot/KASLR: exclude EFI_BOOT_SERVICES_{CODE|DATA} from KASLR's choice

On Mon, Jul 24, 2017 at 02:17:07PM +0100, Matt Fleming wrote:
> On Mon, 10 Jul, at 02:51:35PM, Naoya Horiguchi wrote:
> > KASLR chooses kernel location from E820_TYPE_RAM regions by walking over
> > e820 entries now. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and
> > EFI_BOOT_SERVICES_DATA, so those regions can be the target. According to
> > UEFI spec, all memory regions marked as EfiBootServicesCode and
> > EfiBootServicesData are available for free memory after the first call
> > of ExitBootServices(). So such regions should be usable for kernel on
> > spec basis.
> >
> > In x86, however, we have some workaround for broken firmware, where we
> > keep such regions reserved until SetVirtualAddressMap() is done.
> > See the following code in should_map_region():
> >
> > static bool should_map_region(efi_memory_desc_t *md)
> > {
> > ...
> > /*
> > * Map boot services regions as a workaround for buggy
> > * firmware that accesses them even when they shouldn't.
> > *
> > * See efi_{reserve,free}_boot_services().
> > */
> > if (md->type == EFI_BOOT_SERVICES_CODE ||
> > md->type == EFI_BOOT_SERVICES_DATA)
> > return false;
> >
> > This workaround suppressed a boot crash, but potential issues still
> > remain because no one prevents the regions from overlapping with kernel
> > image by KASLR.
> >
> > So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never
> > chosen as kernel memory for the workaround to work fine.
> >
> > Signed-off-by: Naoya Horiguchi <[email protected]>
> > ---
> > v2 -> v3:
> > - skip EFI_LOADER_CODE and EFI_LOADER_DATA in region scan
> >
> > v1 -> v2:
> > - switch efi_mirror_found to local variable
> > - insert break when EFI_MEMORY_MORE_RELIABLE found
> > ---
> > arch/x86/boot/compressed/kaslr.c | 44 +++++++++++++++++++++++++++++-----------
> > 1 file changed, 32 insertions(+), 12 deletions(-)
>
> I think the patch content is fine, but please update the comments and
> the changelog to say why EFI_CONVENTIONAL_MEMORY was chosen, i.e. that
> it's the only memory type that we know to be free.

OK, I'll update this.

2017-07-26 00:13:31

by Naoya Horiguchi

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] x86/efi: clean up dead code around efi_reserve_boot_services()

On Mon, Jul 24, 2017 at 02:20:44PM +0100, Matt Fleming wrote:
> On Mon, 10 Jul, at 02:51:36PM, Naoya Horiguchi wrote:
> > EFI_BOOT_SERVICES_{CODE|DATA} regions never overlap the kernel now,
> > so we can clean up the check in efi_reserve_boot_services().
> >
> > Signed-off-by: Naoya Horiguchi <[email protected]>
> > ---
> > arch/x86/platform/efi/quirks.c | 23 +----------------------
> > 1 file changed, 1 insertion(+), 22 deletions(-)
>
> Is this true for kernels not using KASLR?

Thank you for pointing out this. It's not true depending on memmap layout.
If a firmware does not define the memory around the kernel address
(0x1000000 or CONFIG_PHYSICAL_START) as EFI_BOOT_SERVICES_*, no overlap
happens. That's true in my testing server, but I don't think that we can
expect it generally.

So I think of adding some assertion in the patch 1/2 to detect this overlap
in extract_kernel() even for no KASLR case.

Thanks,
Naoya Horiguchi

2017-07-26 01:13:43

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] x86/efi: clean up dead code around efi_reserve_boot_services()

On 07/26/17 at 12:12am, Naoya Horiguchi wrote:
> On Mon, Jul 24, 2017 at 02:20:44PM +0100, Matt Fleming wrote:
> > On Mon, 10 Jul, at 02:51:36PM, Naoya Horiguchi wrote:
> > > EFI_BOOT_SERVICES_{CODE|DATA} regions never overlap the kernel now,
> > > so we can clean up the check in efi_reserve_boot_services().
> > >
> > > Signed-off-by: Naoya Horiguchi <[email protected]>
> > > ---
> > > arch/x86/platform/efi/quirks.c | 23 +----------------------
> > > 1 file changed, 1 insertion(+), 22 deletions(-)
> >
> > Is this true for kernels not using KASLR?
>
> Thank you for pointing out this. It's not true depending on memmap layout.
> If a firmware does not define the memory around the kernel address
> (0x1000000 or CONFIG_PHYSICAL_START) as EFI_BOOT_SERVICES_*, no overlap
> happens. That's true in my testing server, but I don't think that we can
> expect it generally.
>
> So I think of adding some assertion in the patch 1/2 to detect this overlap
> in extract_kernel() even for no KASLR case.

EFI_BOOT_SERVICES_* memory are collected as e820 region of
E820_TYPE_RAM, how can we guarantee kernel won't use them after jumping
into the running kernel whether KASLR enabled or not? We can only wish
that EFI firmware engineer don't put EFI_BOOT_SERVICES_* far from
0x1000000 where normal kernel is loaded.

2017-07-26 01:34:38

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] x86/efi: clean up dead code around efi_reserve_boot_services()

On 07/26/17 at 09:13am, Baoquan He wrote:
> On 07/26/17 at 12:12am, Naoya Horiguchi wrote:
> > On Mon, Jul 24, 2017 at 02:20:44PM +0100, Matt Fleming wrote:
> > > On Mon, 10 Jul, at 02:51:36PM, Naoya Horiguchi wrote:
> > > > EFI_BOOT_SERVICES_{CODE|DATA} regions never overlap the kernel now,
> > > > so we can clean up the check in efi_reserve_boot_services().
> > > >
> > > > Signed-off-by: Naoya Horiguchi <[email protected]>
> > > > ---
> > > > arch/x86/platform/efi/quirks.c | 23 +----------------------
> > > > 1 file changed, 1 insertion(+), 22 deletions(-)
> > >
> > > Is this true for kernels not using KASLR?
> >
> > Thank you for pointing out this. It's not true depending on memmap layout.
> > If a firmware does not define the memory around the kernel address
> > (0x1000000 or CONFIG_PHYSICAL_START) as EFI_BOOT_SERVICES_*, no overlap
> > happens. That's true in my testing server, but I don't think that we can
> > expect it generally.
> >
> > So I think of adding some assertion in the patch 1/2 to detect this overlap
> > in extract_kernel() even for no KASLR case.
>
> EFI_BOOT_SERVICES_* memory are collected as e820 region of
> E820_TYPE_RAM, how can we guarantee kernel won't use them after jumping
> into the running kernel whether KASLR enabled or not? We can only wish
> that EFI firmware engineer don't put EFI_BOOT_SERVICES_* far from
sorry, typo. I meant EFI boot
service region need be put far from 0x1000000. Otherwise normal kernel could
allocate memory bottom up and stomp on them. It's embarassment caused by
the hardware flaw of x86 platfrom.
> 0x1000000 where normal kernel is loaded.

2017-07-28 06:50:09

by Naoya Horiguchi

[permalink] [raw]
Subject: [PATCH] x86/boot: check overlap between kernel and EFI_BOOT_SERVICES_*

On Wed, Jul 26, 2017 at 09:34:32AM +0800, Baoquan He wrote:
> On 07/26/17 at 09:13am, Baoquan He wrote:
> > On 07/26/17 at 12:12am, Naoya Horiguchi wrote:
> > > On Mon, Jul 24, 2017 at 02:20:44PM +0100, Matt Fleming wrote:
> > > > On Mon, 10 Jul, at 02:51:36PM, Naoya Horiguchi wrote:
> > > > > EFI_BOOT_SERVICES_{CODE|DATA} regions never overlap the kernel now,
> > > > > so we can clean up the check in efi_reserve_boot_services().
> > > > >
> > > > > Signed-off-by: Naoya Horiguchi <[email protected]>
> > > > > ---
> > > > > arch/x86/platform/efi/quirks.c | 23 +----------------------
> > > > > 1 file changed, 1 insertion(+), 22 deletions(-)
> > > >
> > > > Is this true for kernels not using KASLR?
> > >
> > > Thank you for pointing out this. It's not true depending on memmap layout.
> > > If a firmware does not define the memory around the kernel address
> > > (0x1000000 or CONFIG_PHYSICAL_START) as EFI_BOOT_SERVICES_*, no overlap
> > > happens. That's true in my testing server, but I don't think that we can
> > > expect it generally.
> > >
> > > So I think of adding some assertion in the patch 1/2 to detect this overlap
> > > in extract_kernel() even for no KASLR case.
> >
> > EFI_BOOT_SERVICES_* memory are collected as e820 region of
> > E820_TYPE_RAM, how can we guarantee kernel won't use them after jumping
> > into the running kernel whether KASLR enabled or not? We can only wish

Current kernels have no such check, so it's not guarantted, I think.
And I guess that most firmwares (luckily?) avoid the overlap, and/or
if the overlap happens, that might not cause any detectable error.

> > that EFI firmware engineer don't put EFI_BOOT_SERVICES_* far from
> sorry, typo. I meant EFI boot
> service region need be put far from 0x1000000. Otherwise normal kernel could
> allocate memory bottom up and stomp on them. It's embarassment caused by
> the hardware flaw of x86 platfrom.
> > 0x1000000 where normal kernel is loaded.


> > > So I think of adding some assertion in the patch 1/2 to detect this overlap
> > > in extract_kernel() even for no KASLR case.

Anyway I wrote the following patch adding the assertion as a separate one.
I'm glad if I can get your feedback or advise.

Thanks,
Naoya Horiguchi
---
From: Naoya Horiguchi <[email protected]>
Date: Thu, 27 Jul 2017 13:19:35 +0900
Subject: [PATCH] x86/boot: check overlap between kernel and
EFI_BOOT_SERVICES_*

Even when KASLR is turned off, kernel still could overlap with
EFI_BOOT_SERVICES_* region, for example because of firmware memmap
layout and/or CONFIG_PHYSICAL_START.

So this patch adds an assertion that we are free from the overlap
which is called for non-KASLR case.

Signed-off-by: Naoya Horiguchi <[email protected]>
---
arch/x86/boot/compressed/kaslr.c | 3 ---
arch/x86/boot/compressed/misc.c | 56 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a6604717d4fe..5549c80b45c2 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -721,9 +721,6 @@ void choose_random_location(unsigned long input,

boot_params->hdr.loadflags |= KASLR_FLAG;

- /* Prepare to add new identity pagetables on demand. */
- initialize_identity_maps();
-
/* Record the various known unsafe memory ranges. */
mem_avoid_init(input, input_size, *output);

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index a0838ab929f2..b23159fa159c 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -15,6 +15,8 @@
#include "error.h"
#include "../string.h"
#include "../voffset.h"
+#include <linux/efi.h>
+#include <asm/efi.h>

/*
* WARNING!!
@@ -169,6 +171,55 @@ void __puthex(unsigned long value)
}
}

+#ifdef CONFIG_EFI
+bool __init
+efi_kernel_boot_services_overlap(unsigned long start, unsigned long size)
+{
+ int i;
+ u32 nr_desc;
+ struct efi_info *e = &boot_params->efi_info;
+ efi_memory_desc_t *md;
+ char *signature;
+ unsigned long pmap;
+
+ signature = (char *)&boot_params->efi_info.efi_loader_signature;
+ if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
+ strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
+ return false;
+
+#ifdef CONFIG_X86 /* Can't handle data above 4GB at this time */
+ if (e->efi_memmap_hi) {
+ warn("Memory map is above 4GB, EFI should be disabled.\n");
+ return false;
+ }
+ pmap = e->efi_memmap;
+#else
+ pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
+#endif
+
+ add_identity_map(pmap, e->efi_memmap_size);
+
+ nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
+ for (i = 0; i < nr_desc; i++) {
+ md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
+ if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) <= start)
+ continue;
+ if (md->phys_addr >= start + size)
+ continue;
+ if (md->type == EFI_BOOT_SERVICES_CODE ||
+ md->type == EFI_BOOT_SERVICES_DATA)
+ return true;
+ }
+ return false;
+}
+#else
+bool __init
+efi_kernel_boot_services_overlap(unsigned long start, unsigned long size)
+{
+ return false;
+}
+#endif
+
#if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
@@ -372,6 +423,9 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
debug_putaddr(output_len);
debug_putaddr(kernel_total_size);

+ /* Prepare to add new identity pagetables on demand. */
+ initialize_identity_maps();
+
/*
* The memory hole needed for the kernel is the larger of either
* the entire decompressed kernel plus relocation table, or the
@@ -402,6 +456,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
if (virt_addr != LOAD_PHYSICAL_ADDR)
error("Destination virtual address changed when not relocatable");
#endif
+ if (efi_kernel_boot_services_overlap((unsigned long)output, output_len))
+ error("Kernel overlaps EFI_BOOT_SERVICES area");

debug_putstr("\nDecompressing Linux... ");
__decompress(input_data, input_len, NULL, NULL, output, output_len,
--
2.7.4


2017-07-29 10:04:30

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] x86/boot: check overlap between kernel and EFI_BOOT_SERVICES_*

Hi Naoya,

[auto build test ERROR on efi/next]
[also build test ERROR on v4.13-rc2 next-20170728]
[cannot apply to tip/x86/core]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/Naoya-Horiguchi/x86-boot-check-overlap-between-kernel-and-EFI_BOOT_SERVICES_/20170729-170209
base: https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next
config: x86_64-lkp (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64

All errors (new ones prefixed by >>):

arch/x86/boot/compressed/misc.o: In function `extract_kernel':
>> misc.c:(.text+0x2cf9): undefined reference to `initialize_identity_maps'
arch/x86/boot/compressed/misc.o: In function `efi_kernel_boot_services_overlap':
>> misc.c:(.init.text+0x7b): undefined reference to `add_identity_map'

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (1.10 kB)
.config.gz (25.04 kB)
Download all attachments

2017-07-29 13:01:45

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] x86/boot: check overlap between kernel and EFI_BOOT_SERVICES_*

Hi Naoya,

[auto build test WARNING on efi/next]
[also build test WARNING on v4.13-rc2 next-20170728]
[cannot apply to tip/x86/core]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/Naoya-Horiguchi/x86-boot-check-overlap-between-kernel-and-EFI_BOOT_SERVICES_/20170729-170209
base: https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next
reproduce:
# apt-get install sparse
make ARCH=x86_64 allmodconfig
make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)


Please review and possibly fold the followup patch.

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation

2017-07-29 13:01:43

by kernel test robot

[permalink] [raw]
Subject: [RFC PATCH] x86/boot: efi_kernel_boot_services_overlap can be static


Fixes: 7ebe6cf3a6da ("x86/boot: check overlap between kernel and EFI_BOOT_SERVICES_*")
Signed-off-by: Fengguang Wu <[email protected]>
---
misc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b23159f..981d5f3 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -172,7 +172,7 @@ void __puthex(unsigned long value)
}

#ifdef CONFIG_EFI
-bool __init
+static bool __init
efi_kernel_boot_services_overlap(unsigned long start, unsigned long size)
{
int i;

2017-08-23 08:24:34

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86/boot: check overlap between kernel and EFI_BOOT_SERVICES_*

Hi Naoya,

On 07/28/17 at 06:48am, Naoya Horiguchi wrote:
> > > > So I think of adding some assertion in the patch 1/2 to detect this overlap
> > > > in extract_kernel() even for no KASLR case.
> > >
> > > EFI_BOOT_SERVICES_* memory are collected as e820 region of
> > > E820_TYPE_RAM, how can we guarantee kernel won't use them after jumping
> > > into the running kernel whether KASLR enabled or not? We can only wish
>
> Current kernels have no such check, so it's not guarantted, I think.
> And I guess that most firmwares (luckily?) avoid the overlap, and/or
> if the overlap happens, that might not cause any detectable error.
>
> > > that EFI firmware engineer don't put EFI_BOOT_SERVICES_* far from
> > sorry, typo. I meant EFI boot
> > service region need be put far from 0x1000000. Otherwise normal kernel could
> > allocate memory bottom up and stomp on them. It's embarassment caused by
> > the hardware flaw of x86 platfrom.
> > > 0x1000000 where normal kernel is loaded.
>
>
> > > > So I think of adding some assertion in the patch 1/2 to detect this overlap
> > > > in extract_kernel() even for no KASLR case.
>
> Anyway I wrote the following patch adding the assertion as a separate one.
> I'm glad if I can get your feedback or advise.

Sorry, I didn't notice your mail in this deep thread. I think it makes
sense to do something about the overlap. Maybe you can repost a patch
independently, or put it together with the patchset excluding
EFI_BOOT_SERVICES_{CODE|DATA}. Let's see what x86 maintainers and EFI
subsystem maintainers will say.

By the way, Ingo has helped to put my patchset of kernel mirror handling
related to kaslr into tip/x86/boot, you can base your
EFI_BOOT_SERVICES_{CODE|DATA} excluding patchset and repost. Maybe start
a new thread, it might not be easy to follow in a too deep thread.

THanks
Baoquan

> From: Naoya Horiguchi <[email protected]>
> Date: Thu, 27 Jul 2017 13:19:35 +0900
> Subject: [PATCH] x86/boot: check overlap between kernel and
> EFI_BOOT_SERVICES_*
>
> Even when KASLR is turned off, kernel still could overlap with
> EFI_BOOT_SERVICES_* region, for example because of firmware memmap
> layout and/or CONFIG_PHYSICAL_START.
>
> So this patch adds an assertion that we are free from the overlap
> which is called for non-KASLR case.
>
> Signed-off-by: Naoya Horiguchi <[email protected]>
> ---
> arch/x86/boot/compressed/kaslr.c | 3 ---
> arch/x86/boot/compressed/misc.c | 56 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 56 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index a6604717d4fe..5549c80b45c2 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -721,9 +721,6 @@ void choose_random_location(unsigned long input,
>
> boot_params->hdr.loadflags |= KASLR_FLAG;
>
> - /* Prepare to add new identity pagetables on demand. */
> - initialize_identity_maps();
> -
> /* Record the various known unsafe memory ranges. */
> mem_avoid_init(input, input_size, *output);
>
> diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
> index a0838ab929f2..b23159fa159c 100644
> --- a/arch/x86/boot/compressed/misc.c
> +++ b/arch/x86/boot/compressed/misc.c
> @@ -15,6 +15,8 @@
> #include "error.h"
> #include "../string.h"
> #include "../voffset.h"
> +#include <linux/efi.h>
> +#include <asm/efi.h>
>
> /*
> * WARNING!!
> @@ -169,6 +171,55 @@ void __puthex(unsigned long value)
> }
> }
>
> +#ifdef CONFIG_EFI
> +bool __init
> +efi_kernel_boot_services_overlap(unsigned long start, unsigned long size)
> +{
> + int i;
> + u32 nr_desc;
> + struct efi_info *e = &boot_params->efi_info;
> + efi_memory_desc_t *md;
> + char *signature;
> + unsigned long pmap;
> +
> + signature = (char *)&boot_params->efi_info.efi_loader_signature;
> + if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
> + strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
> + return false;
> +
> +#ifdef CONFIG_X86 /* Can't handle data above 4GB at this time */
> + if (e->efi_memmap_hi) {
> + warn("Memory map is above 4GB, EFI should be disabled.\n");
> + return false;
> + }
> + pmap = e->efi_memmap;
> +#else
> + pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
> +#endif
> +
> + add_identity_map(pmap, e->efi_memmap_size);
> +
> + nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
> + for (i = 0; i < nr_desc; i++) {
> + md = (efi_memory_desc_t *)(pmap + (i * e->efi_memdesc_size));
> + if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) <= start)
> + continue;
> + if (md->phys_addr >= start + size)
> + continue;
> + if (md->type == EFI_BOOT_SERVICES_CODE ||
> + md->type == EFI_BOOT_SERVICES_DATA)
> + return true;
> + }
> + return false;
> +}
> +#else
> +bool __init
> +efi_kernel_boot_services_overlap(unsigned long start, unsigned long size)
> +{
> + return false;
> +}
> +#endif
> +
> #if CONFIG_X86_NEED_RELOCS
> static void handle_relocations(void *output, unsigned long output_len,
> unsigned long virt_addr)
> @@ -372,6 +423,9 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
> debug_putaddr(output_len);
> debug_putaddr(kernel_total_size);
>
> + /* Prepare to add new identity pagetables on demand. */
> + initialize_identity_maps();
> +
> /*
> * The memory hole needed for the kernel is the larger of either
> * the entire decompressed kernel plus relocation table, or the
> @@ -402,6 +456,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
> if (virt_addr != LOAD_PHYSICAL_ADDR)
> error("Destination virtual address changed when not relocatable");
> #endif
> + if (efi_kernel_boot_services_overlap((unsigned long)output, output_len))
> + error("Kernel overlaps EFI_BOOT_SERVICES area");
>
> debug_putstr("\nDecompressing Linux... ");
> __decompress(input_data, input_len, NULL, NULL, output, output_len,
> --
> 2.7.4
>
>