2008-02-13 09:35:35

by Huang, Ying

[permalink] [raw]
Subject: [PATCH] x86: EFI runtime code mapping enhancement

This patch enhances EFI runtime code memory mapping as following:

- Move __supported_pte_mask & _PAGE_NX checking before invoking
runtime_code_page_mkexec(). This makes it possible for compiler to
eliminate runtime_code_page_mkexec() on machine without NX support.

- Use set_memory_x/nx in early_mapping_set_exec(). This eliminates the
duplicated implementation.

This patch has been tested on Intel x86_64 platform with EFI64/32
firmware.

Signed-off-by: Huang Ying <[email protected]>

---
arch/x86/kernel/efi.c | 6 ++----
arch/x86/kernel/efi_64.c | 30 ++++++++++++------------------
2 files changed, 14 insertions(+), 22 deletions(-)

--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -384,9 +384,6 @@ static void __init runtime_code_page_mke
efi_memory_desc_t *md;
void *p;

- if (!(__supported_pte_mask & _PAGE_NX))
- return;
-
/* Make EFI runtime service code area executable */
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
@@ -476,7 +473,8 @@ void __init efi_enter_virtual_mode(void)
efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
efi.reset_system = virt_efi_reset_system;
efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
- runtime_code_page_mkexec();
+ if (__supported_pte_mask & _PAGE_NX)
+ runtime_code_page_mkexec();
early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
memmap.map = NULL;
}
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -35,6 +35,7 @@
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm/efi.h>
+#include <asm/cacheflush.h>

static pgd_t save_pgd __initdata;
static unsigned long efi_flags __initdata;
@@ -43,22 +44,15 @@ static void __init early_mapping_set_exe
unsigned long end,
int executable)
{
- pte_t *kpte;
- unsigned int level;
+ unsigned long num_pages;

- while (start < end) {
- kpte = lookup_address((unsigned long)__va(start), &level);
- BUG_ON(!kpte);
- if (executable)
- set_pte(kpte, pte_mkexec(*kpte));
- else
- set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
- __supported_pte_mask));
- if (level == PG_LEVEL_4K)
- start = (start + PAGE_SIZE) & PAGE_MASK;
- else
- start = (start + PMD_SIZE) & PMD_MASK;
- }
+ start &= PMD_MASK;
+ end = (end + PMD_SIZE - 1) & PMD_MASK;
+ num_pages = (end - start) >> PAGE_SHIFT;
+ if (executable)
+ set_memory_x((unsigned long)__va(start), num_pages);
+ else
+ set_memory_nx((unsigned long)__va(start), num_pages);
}

static void __init early_runtime_code_mapping_set_exec(int executable)
@@ -74,7 +68,7 @@ static void __init early_runtime_code_ma
md = p;
if (md->type == EFI_RUNTIME_SERVICES_CODE) {
unsigned long end;
- end = md->phys_addr + (md->num_pages << PAGE_SHIFT);
+ end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
early_mapping_set_exec(md->phys_addr, end, executable);
}
}
@@ -84,8 +78,8 @@ void __init efi_call_phys_prelog(void)
{
unsigned long vaddress;

- local_irq_save(efi_flags);
early_runtime_code_mapping_set_exec(1);
+ local_irq_save(efi_flags);
vaddress = (unsigned long)__va(0x0UL);
save_pgd = *pgd_offset_k(0x0UL);
set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
@@ -98,9 +92,9 @@ void __init efi_call_phys_epilog(void)
* After the lock is released, the original page table is restored.
*/
set_pgd(pgd_offset_k(0x0UL), save_pgd);
- early_runtime_code_mapping_set_exec(0);
__flush_tlb_all();
local_irq_restore(efi_flags);
+ early_runtime_code_mapping_set_exec(0);
}

void __init efi_reserve_bootmem(void)


2008-02-13 11:31:39

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] x86: EFI runtime code mapping enhancement

Huang, Ying wrote:
> This patch enhances EFI runtime code memory mapping as following:
>
> - Move __supported_pte_mask & _PAGE_NX checking before invoking
> runtime_code_page_mkexec(). This makes it possible for compiler to
> eliminate runtime_code_page_mkexec() on machine without NX support.
>
> - Use set_memory_x/nx in early_mapping_set_exec(). This eliminates the
> duplicated implementation.

It's a inefficiency because you split up the 2MB (or 1GB) pages
in the direct mapping, although that is not strictly needed. This
means everybody who happens to use memory in same 2M/1G region
will eat unnecessary TLB misses.

It would be generally better to only execute your code in
ioremap_cache() because that won't affect anybody else.

Eventually set_memory_x() will work correctly for ioremap() too
so that should work then.

-Andi

2008-02-14 01:45:44

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH] x86: EFI runtime code mapping enhancement

On Wed, 2008-02-13 at 12:32 +0100, Andi Kleen wrote:
> Huang, Ying wrote:
> > This patch enhances EFI runtime code memory mapping as following:
> >
> > - Move __supported_pte_mask & _PAGE_NX checking before invoking
> > runtime_code_page_mkexec(). This makes it possible for compiler to
> > eliminate runtime_code_page_mkexec() on machine without NX support.
> >
> > - Use set_memory_x/nx in early_mapping_set_exec(). This eliminates the
> > duplicated implementation.
>
> It's a inefficiency because you split up the 2MB (or 1GB) pages
> in the direct mapping, although that is not strictly needed. This
> means everybody who happens to use memory in same 2M/1G region
> will eat unnecessary TLB misses.
>
> It would be generally better to only execute your code in
> ioremap_cache() because that won't affect anybody else.
>
> Eventually set_memory_x() will work correctly for ioremap() too
> so that should work then.

For EFI runtime service in virtual mode, using direct mapping is mainly
for kexec, where EFI runtime memory area need to be mapped at same
virtual address across kexec. For performance reason, it may be better
to use fixmap only instead. But this will waste some fixmap space.

But this patch is for EFI runtime service in physical mode (in 64 mode,
this needs a direct mapping page table), there are two choice.

- Use direct mapping of kernel, clean NX bit from kernel page table
temporarily before/after EFI calling. This needs not split 2M page into
4K pages, because the region changed is aligned with 2M. And, because
the changing is temporary, a little larger region is not a big issue.

- Construct a direct mapping page table for EFI only. I think this is
not good too. The code will be duplicated with kernel page table
initialization code.


An issue of this patch is that it does not work with 1G page. Aligning
EFI runtime code region with 1G seems not a good idea too. I think a
better method is adding a non-split mode to c_p_a(), where the region
changed is enlarged if necessary to avoid page allocation. This can be
used to implement early_set_memory_xx(). The early_set_memory_xx()
instead of duplicated c_p_a() variant can be used by EFI code.

Best Regards,
Huang Ying

2008-02-14 12:06:34

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] x86: EFI runtime code mapping enhancement


> For EFI runtime service in virtual mode, using direct mapping is mainly
> for kexec, where EFI runtime memory area need to be mapped at same
> virtual address across kexec.

I see. I didn't consider this aspect.

> - Use direct mapping of kernel, clean NX bit from kernel page table
> temporarily before/after EFI calling. This needs not split 2M page into
> 4K pages, because the region changed is aligned with 2M. And, because
> the changing is temporary, a little larger region is not a big issue.

I would just do it permanently.

> Aligning
> EFI runtime code region with 1G seems not a good idea too. I think a
> better method is adding a non-split mode to c_p_a(), where the region
> changed is enlarged if necessary to avoid page allocation. This can be
> used to implement early_set_memory_xx(). The early_set_memory_xx()
> instead of duplicated c_p_a() variant can be used by EFI code.

I attempted something like this with my advisory vs required static
protections last week, but it was rejected.

But yes having such a mode would make sense agreed.

The easiest way (as in least amount of code) to implement it actually
is to just bypass set_memory_*() and just do the lookup_address() yourself
and clear NX and do a global TLB flush. For the special case of NX
that is fine because you don't need to worry about fixing up any aliases.

-Andi

2008-02-15 02:20:29

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH] x86: EFI runtime code mapping enhancement

On Thu, 2008-02-14 at 13:06 +0100, Andi Kleen wrote:
> > For EFI runtime service in virtual mode, using direct mapping is mainly
> > for kexec, where EFI runtime memory area need to be mapped at same
> > virtual address across kexec.
>
> I see. I didn't consider this aspect.
>
> > - Use direct mapping of kernel, clean NX bit from kernel page table
> > temporarily before/after EFI calling. This needs not split 2M page into
> > 4K pages, because the region changed is aligned with 2M. And, because
> > the changing is temporary, a little larger region is not a big issue.
>
> I would just do it permanently.

Because during early boot stage, alloc_page() is not available. If we do
it there, we must make more pages executable to avoid alloc_page(). It
may be acceptable for 2M mapping (aligned memory range with 2M), but I
think it is not acceptable for 1G mapping (aligned memory range with
1G).

Maybe it is the better method that always using the fixmap (efi_ioremap)
to map executable memory area to avoid split large mapping.

> > Aligning
> > EFI runtime code region with 1G seems not a good idea too. I think a
> > better method is adding a non-split mode to c_p_a(), where the region
> > changed is enlarged if necessary to avoid page allocation. This can be
> > used to implement early_set_memory_xx(). The early_set_memory_xx()
> > instead of duplicated c_p_a() variant can be used by EFI code.
>
> I attempted something like this with my advisory vs required static
> protections last week, but it was rejected.
>
> But yes having such a mode would make sense agreed.
>
> The easiest way (as in least amount of code) to implement it actually
> is to just bypass set_memory_*() and just do the lookup_address() yourself
> and clear NX and do a global TLB flush. For the special case of NX
> that is fine because you don't need to worry about fixing up any aliases.

This is the original method. The issue is that it must be synced with
set_memory_*() and lookup_address() implementation. For example, it
lacked 1G support when that is added to lookup_address().

Best Regards,
Huang Ying

2008-02-15 02:36:15

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH] x86: EFI runtime code mapping enhancement

Hi, Ingo,

Please revert this patch. Because it does not work with 1GB pages.
Although the original implementation does not work with 1GB pages too,
it can be fixed easier than this one. I will compose a new patch to fix
the original implementation.

Best Regards,
Huang Ying

On Wed, 2008-02-13 at 17:22 +0800, Huang, Ying wrote:
> This patch enhances EFI runtime code memory mapping as following:
>
> - Move __supported_pte_mask & _PAGE_NX checking before invoking
> runtime_code_page_mkexec(). This makes it possible for compiler to
> eliminate runtime_code_page_mkexec() on machine without NX support.
>
> - Use set_memory_x/nx in early_mapping_set_exec(). This eliminates the
> duplicated implementation.
>
> This patch has been tested on Intel x86_64 platform with EFI64/32
> firmware.
>
> Signed-off-by: Huang Ying <[email protected]>
>
> ---
> arch/x86/kernel/efi.c | 6 ++----
> arch/x86/kernel/efi_64.c | 30 ++++++++++++------------------
> 2 files changed, 14 insertions(+), 22 deletions(-)
>
> --- a/arch/x86/kernel/efi.c
> +++ b/arch/x86/kernel/efi.c
> @@ -384,9 +384,6 @@ static void __init runtime_code_page_mke
> efi_memory_desc_t *md;
> void *p;
>
> - if (!(__supported_pte_mask & _PAGE_NX))
> - return;
> -
> /* Make EFI runtime service code area executable */
> for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
> md = p;
> @@ -476,7 +473,8 @@ void __init efi_enter_virtual_mode(void)
> efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
> efi.reset_system = virt_efi_reset_system;
> efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
> - runtime_code_page_mkexec();
> + if (__supported_pte_mask & _PAGE_NX)
> + runtime_code_page_mkexec();
> early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
> memmap.map = NULL;
> }
> --- a/arch/x86/kernel/efi_64.c
> +++ b/arch/x86/kernel/efi_64.c
> @@ -35,6 +35,7 @@
> #include <asm/tlbflush.h>
> #include <asm/proto.h>
> #include <asm/efi.h>
> +#include <asm/cacheflush.h>
>
> static pgd_t save_pgd __initdata;
> static unsigned long efi_flags __initdata;
> @@ -43,22 +44,15 @@ static void __init early_mapping_set_exe
> unsigned long end,
> int executable)
> {
> - pte_t *kpte;
> - unsigned int level;
> + unsigned long num_pages;
>
> - while (start < end) {
> - kpte = lookup_address((unsigned long)__va(start), &level);
> - BUG_ON(!kpte);
> - if (executable)
> - set_pte(kpte, pte_mkexec(*kpte));
> - else
> - set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
> - __supported_pte_mask));
> - if (level == PG_LEVEL_4K)
> - start = (start + PAGE_SIZE) & PAGE_MASK;
> - else
> - start = (start + PMD_SIZE) & PMD_MASK;
> - }
> + start &= PMD_MASK;
> + end = (end + PMD_SIZE - 1) & PMD_MASK;
> + num_pages = (end - start) >> PAGE_SHIFT;
> + if (executable)
> + set_memory_x((unsigned long)__va(start), num_pages);
> + else
> + set_memory_nx((unsigned long)__va(start), num_pages);
> }
>
> static void __init early_runtime_code_mapping_set_exec(int executable)
> @@ -74,7 +68,7 @@ static void __init early_runtime_code_ma
> md = p;
> if (md->type == EFI_RUNTIME_SERVICES_CODE) {
> unsigned long end;
> - end = md->phys_addr + (md->num_pages << PAGE_SHIFT);
> + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
> early_mapping_set_exec(md->phys_addr, end, executable);
> }
> }
> @@ -84,8 +78,8 @@ void __init efi_call_phys_prelog(void)
> {
> unsigned long vaddress;
>
> - local_irq_save(efi_flags);
> early_runtime_code_mapping_set_exec(1);
> + local_irq_save(efi_flags);
> vaddress = (unsigned long)__va(0x0UL);
> save_pgd = *pgd_offset_k(0x0UL);
> set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
> @@ -98,9 +92,9 @@ void __init efi_call_phys_epilog(void)
> * After the lock is released, the original page table is restored.
> */
> set_pgd(pgd_offset_k(0x0UL), save_pgd);
> - early_runtime_code_mapping_set_exec(0);
> __flush_tlb_all();
> local_irq_restore(efi_flags);
> + early_runtime_code_mapping_set_exec(0);
> }
>
> void __init efi_reserve_bootmem(void)
>