2015-02-04 02:03:50

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

Now could use kexec to place kernel/boot_params/cmd_line/initrd
above 4G, but that is with legacy interface with startup_64 directly.

This patch will allow 64bit EFI kernel to be loaded above 4G
and use EFI HANDOVER PROTOCOL to start the kernel.

Current code32_start is used for passing around loading address,
so it will overflow when kernel is loaded abover 4G.

The patch mainly add ext_code32_start to take address high 32bit.

After this patch, could use patched grub2-x86_64.efi to place
kernel/boot_params/cmd_line/initrd all above 4G and execute the kernel
above 4G.

bootlog like:

params: [1618fc000,1618fffff]
cmdline: [1618fb000,1618fb7fe]
kernel: [15e000000,161385fff]
kernel: done [ linux 9.25MiB 100% 6.66MiB/s ]
initrd: [15bcbe000,15dffffbb]
initrd: 1 file done [ initrd.img 35.26MiB 100% 11.93MiB/s ]
early console in decompress_kernel
decompress_kernel:
input: [0x15fd0b3b4-0x16063c803], output: 0x15e000000, heap: [0x160645b00-0x16064daff]

Decompressing Linux... xz... Parsing ELF... done.
Booting the kernel.
[ 0.000000] bootconsole [uart0] enabled
[ 0.000000] real_mode_data : phys 00000001618fc000
[ 0.000000] real_mode_data : virt ffff8801618fc000
[ 0.000000] Kernel Layout:
[ 0.000000] .text: [0x15e000000-0x15f08f72c]
[ 0.000000] .rodata: [0x15f200000-0x15fa44fff]
[ 0.000000] .data: [0x15fc00000-0x15fe545ff]
[ 0.000000] .init: [0x15fe56000-0x16021afff]
[ 0.000000] .bss: [0x160229000-0x16135ffff]
[ 0.000000] .brk: [0x161360000-0x161385fff]
[ 0.000000] memblock_reserve: [0x0000000009f000-0x000000000fffff] flags 0x0 * BIOS reserved
...
[ 0.000000] memblock_reserve: [0x0000015e000000-0x0000016135ffff] flags 0x0 TEXT DATA BSS
[ 0.000000] memblock_reserve: [0x0000015bcbe000-0x0000015dffffff] flags 0x0 RAMDISK


Signed-off-by: Yinghai Lu <[email protected]>

---
Documentation/x86/boot.txt | 18 ++++++++++++++++++
arch/x86/boot/compressed/eboot.c | 15 ++++++++++-----
arch/x86/boot/compressed/head_64.S | 7 ++++++-
arch/x86/boot/header.S | 3 ++-
arch/x86/include/uapi/asm/bootparam.h | 1 +
arch/x86/kernel/asm-offsets.c | 1 +
6 files changed, 38 insertions(+), 7 deletions(-)

Index: linux-2.6/arch/x86/include/uapi/asm/bootparam.h
===================================================================
--- linux-2.6.orig/arch/x86/include/uapi/asm/bootparam.h
+++ linux-2.6/arch/x86/include/uapi/asm/bootparam.h
@@ -83,6 +83,7 @@ struct setup_header {
__u64 pref_address;
__u32 init_size;
__u32 handover_offset;
+ __u32 ext_code32_start;
} __attribute__((packed));

struct sys_desc_table {
Index: linux-2.6/arch/x86/kernel/asm-offsets.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/asm-offsets.c
+++ linux-2.6/arch/x86/kernel/asm-offsets.c
@@ -68,6 +68,7 @@ void common(void) {
OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
OFFSET(BP_pref_address, boot_params, hdr.pref_address);
OFFSET(BP_code32_start, boot_params, hdr.code32_start);
+ OFFSET(BP_ext_code32_start, boot_params, hdr.ext_code32_start);

BLANK();
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
Index: linux-2.6/arch/x86/boot/compressed/head_64.S
===================================================================
--- linux-2.6.orig/arch/x86/boot/compressed/head_64.S
+++ linux-2.6/arch/x86/boot/compressed/head_64.S
@@ -263,6 +263,8 @@ ENTRY(efi_pe_entry)
mov %rax, %rsi
leaq startup_32(%rip), %rax
movl %eax, BP_code32_start(%rsi)
+ shr $32, %rax
+ movl %eax, BP_ext_code32_start(%rsi)
jmp 2f /* Skip the relocation */

handover_entry:
@@ -286,7 +288,10 @@ fail:
hlt
jmp fail
2:
- movl BP_code32_start(%esi), %eax
+ movl BP_code32_start(%rsi), %eax
+ movl BP_ext_code32_start(%rsi), %ebx
+ shl $32, %rbx
+ orq %rbx, %rax
leaq preferred_addr(%rax), %rax
jmp *%rax

Index: linux-2.6/arch/x86/boot/compressed/eboot.c
===================================================================
--- linux-2.6.orig/arch/x86/boot/compressed/eboot.c
+++ linux-2.6/arch/x86/boot/compressed/eboot.c
@@ -1389,6 +1389,7 @@ struct boot_params *efi_main(struct efi_
void *handle;
efi_system_table_t *_table;
bool is64;
+ unsigned long loaded_addr;

efi_early = c;

@@ -1430,9 +1431,12 @@ struct boot_params *efi_main(struct efi_
* If the kernel isn't already loaded at the preferred load
* address, relocate it.
*/
- if (hdr->pref_address != hdr->code32_start) {
- unsigned long bzimage_addr = hdr->code32_start;
- status = efi_relocate_kernel(sys_table, &bzimage_addr,
+ loaded_addr = hdr->code32_start;
+ loaded_addr |= (unsigned long)hdr->ext_code32_start << 32;
+ if (hdr->pref_address != loaded_addr) {
+ unsigned long loaded_addr_orig = loaded_addr;
+
+ status = efi_relocate_kernel(sys_table, &loaded_addr,
hdr->init_size, hdr->init_size,
hdr->pref_address,
hdr->kernel_alignment);
@@ -1441,8 +1445,9 @@ struct boot_params *efi_main(struct efi_
goto fail;
}

- hdr->pref_address = hdr->code32_start;
- hdr->code32_start = bzimage_addr;
+ hdr->pref_address = loaded_addr_orig;
+ hdr->code32_start = loaded_addr & 0xffffffff;
+ hdr->ext_code32_start = loaded_addr >> 32;
}

status = exit_boot(boot_params, handle, is64);
Index: linux-2.6/arch/x86/boot/header.S
===================================================================
--- linux-2.6.orig/arch/x86/boot/header.S
+++ linux-2.6/arch/x86/boot/header.S
@@ -301,7 +301,7 @@ _start:
# Part 2 of the header, from the old setup.S

.ascii "HdrS" # header signature
- .word 0x020d # header version number (>= 0x0105)
+ .word 0x020e # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -449,6 +449,7 @@ pref_address: .quad LOAD_PHYSICAL_ADDR
#endif
init_size: .long INIT_SIZE # kernel initialization size
handover_offset: .long 0 # Filled in by build.c
+ext_code32_start: .long 0 # werid one!

# End of setup header #####################################################

Index: linux-2.6/Documentation/x86/boot.txt
===================================================================
--- linux-2.6.orig/Documentation/x86/boot.txt
+++ linux-2.6/Documentation/x86/boot.txt
@@ -61,6 +61,9 @@ Protocol 2.12: (Kernel 3.8) Added the xl
to struct boot_params for loading bzImage and ramdisk
above 4G in 64bit.

+Protocol 2.14: (Kernel 3.20) Added the ext_code32_start to support EFI64
+ to be loaded above 4G.
+
**** MEMORY LAYOUT

The traditional memory map for the kernel loader, used for Image or
@@ -197,6 +200,7 @@ Offset Proto Name Meaning
0258/8 2.10+ pref_address Preferred loading address
0260/4 2.10+ init_size Linear memory required during initialization
0264/4 2.11+ handover_offset Offset of handover entry point
+0268/4 2.14+ ext_code32_start Extended part for code32_start

(1) For backwards compatibility, if the setup_sects field contains 0, the
real value is 4.
@@ -738,6 +742,13 @@ Offset/size: 0x264/4

See EFI HANDOVER PROTOCOL below for more details.

+Field name: ext_code32_start
+Type: modify (optional, reloc)
+Offset/size: 0x268/4
+Protocol: 2.14+
+
+ The address is used with code32_start to compare pref_address
+ to support EFI 64bit kernel get loaded above 4G.

**** THE IMAGE CHECKSUM

@@ -1122,4 +1133,11 @@ The boot loader *must* fill out the foll
o hdr.ramdisk_image (if applicable)
o hdr.ramdisk_size (if applicable)

+for 64bit, when loading above 4G, *must* fill out the following fields,
+
+ o hdr.ext_code32_start
+ o ext_cmd_line_ptr
+ o ext_ramdisk_image (if applicable)
+ o ext_ramdisk_size (if applicable)
+
All other fields should be zero.


2015-02-05 03:25:32

by Dave Young

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

Hi,

Thanks, it will be useful for possible efi rebooting to kdump reserved memory.

On 02/03/15 at 06:03pm, Yinghai Lu wrote:
> Now could use kexec to place kernel/boot_params/cmd_line/initrd
> above 4G, but that is with legacy interface with startup_64 directly.
>
> This patch will allow 64bit EFI kernel to be loaded above 4G
> and use EFI HANDOVER PROTOCOL to start the kernel.
>
> Current code32_start is used for passing around loading address,
> so it will overflow when kernel is loaded abover 4G.
>
> The patch mainly add ext_code32_start to take address high 32bit.
>
> After this patch, could use patched grub2-x86_64.efi to place
> kernel/boot_params/cmd_line/initrd all above 4G and execute the kernel
> above 4G.

Can you share the grub2 patch for testing?

>
> bootlog like:
>
> params: [1618fc000,1618fffff]
> cmdline: [1618fb000,1618fb7fe]
> kernel: [15e000000,161385fff]
> kernel: done [ linux 9.25MiB 100% 6.66MiB/s ]
> initrd: [15bcbe000,15dffffbb]
> initrd: 1 file done [ initrd.img 35.26MiB 100% 11.93MiB/s ]
> early console in decompress_kernel
> decompress_kernel:
> input: [0x15fd0b3b4-0x16063c803], output: 0x15e000000, heap: [0x160645b00-0x16064daff]
>
> Decompressing Linux... xz... Parsing ELF... done.
> Booting the kernel.
> [ 0.000000] bootconsole [uart0] enabled
> [ 0.000000] real_mode_data : phys 00000001618fc000
> [ 0.000000] real_mode_data : virt ffff8801618fc000
> [ 0.000000] Kernel Layout:
> [ 0.000000] .text: [0x15e000000-0x15f08f72c]
> [ 0.000000] .rodata: [0x15f200000-0x15fa44fff]
> [ 0.000000] .data: [0x15fc00000-0x15fe545ff]
> [ 0.000000] .init: [0x15fe56000-0x16021afff]
> [ 0.000000] .bss: [0x160229000-0x16135ffff]
> [ 0.000000] .brk: [0x161360000-0x161385fff]
> [ 0.000000] memblock_reserve: [0x0000000009f000-0x000000000fffff] flags 0x0 * BIOS reserved
> ...
> [ 0.000000] memblock_reserve: [0x0000015e000000-0x0000016135ffff] flags 0x0 TEXT DATA BSS
> [ 0.000000] memblock_reserve: [0x0000015bcbe000-0x0000015dffffff] flags 0x0 RAMDISK
>
>
> Signed-off-by: Yinghai Lu <[email protected]>
>
> ---
> Documentation/x86/boot.txt | 18 ++++++++++++++++++
> arch/x86/boot/compressed/eboot.c | 15 ++++++++++-----
> arch/x86/boot/compressed/head_64.S | 7 ++++++-
> arch/x86/boot/header.S | 3 ++-
> arch/x86/include/uapi/asm/bootparam.h | 1 +
> arch/x86/kernel/asm-offsets.c | 1 +
> 6 files changed, 38 insertions(+), 7 deletions(-)
>
> Index: linux-2.6/arch/x86/include/uapi/asm/bootparam.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/uapi/asm/bootparam.h
> +++ linux-2.6/arch/x86/include/uapi/asm/bootparam.h
> @@ -83,6 +83,7 @@ struct setup_header {
> __u64 pref_address;
> __u32 init_size;
> __u32 handover_offset;
> + __u32 ext_code32_start;
> } __attribute__((packed));
>
> struct sys_desc_table {
> Index: linux-2.6/arch/x86/kernel/asm-offsets.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/asm-offsets.c
> +++ linux-2.6/arch/x86/kernel/asm-offsets.c
> @@ -68,6 +68,7 @@ void common(void) {
> OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
> OFFSET(BP_pref_address, boot_params, hdr.pref_address);
> OFFSET(BP_code32_start, boot_params, hdr.code32_start);
> + OFFSET(BP_ext_code32_start, boot_params, hdr.ext_code32_start);
>
> BLANK();
> DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
> Index: linux-2.6/arch/x86/boot/compressed/head_64.S
> ===================================================================
> --- linux-2.6.orig/arch/x86/boot/compressed/head_64.S
> +++ linux-2.6/arch/x86/boot/compressed/head_64.S
> @@ -263,6 +263,8 @@ ENTRY(efi_pe_entry)
> mov %rax, %rsi
> leaq startup_32(%rip), %rax
> movl %eax, BP_code32_start(%rsi)
> + shr $32, %rax
> + movl %eax, BP_ext_code32_start(%rsi)
> jmp 2f /* Skip the relocation */
>
> handover_entry:
> @@ -286,7 +288,10 @@ fail:
> hlt
> jmp fail
> 2:
> - movl BP_code32_start(%esi), %eax
> + movl BP_code32_start(%rsi), %eax
> + movl BP_ext_code32_start(%rsi), %ebx
> + shl $32, %rbx
> + orq %rbx, %rax
> leaq preferred_addr(%rax), %rax
> jmp *%rax
>
> Index: linux-2.6/arch/x86/boot/compressed/eboot.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/boot/compressed/eboot.c
> +++ linux-2.6/arch/x86/boot/compressed/eboot.c
> @@ -1389,6 +1389,7 @@ struct boot_params *efi_main(struct efi_
> void *handle;
> efi_system_table_t *_table;
> bool is64;
> + unsigned long loaded_addr;
>
> efi_early = c;
>
> @@ -1430,9 +1431,12 @@ struct boot_params *efi_main(struct efi_
> * If the kernel isn't already loaded at the preferred load
> * address, relocate it.
> */
> - if (hdr->pref_address != hdr->code32_start) {
> - unsigned long bzimage_addr = hdr->code32_start;
> - status = efi_relocate_kernel(sys_table, &bzimage_addr,
> + loaded_addr = hdr->code32_start;
> + loaded_addr |= (unsigned long)hdr->ext_code32_start << 32;
> + if (hdr->pref_address != loaded_addr) {
> + unsigned long loaded_addr_orig = loaded_addr;
> +
> + status = efi_relocate_kernel(sys_table, &loaded_addr,
> hdr->init_size, hdr->init_size,
> hdr->pref_address,
> hdr->kernel_alignment);
> @@ -1441,8 +1445,9 @@ struct boot_params *efi_main(struct efi_
> goto fail;
> }
>
> - hdr->pref_address = hdr->code32_start;
> - hdr->code32_start = bzimage_addr;
> + hdr->pref_address = loaded_addr_orig;
> + hdr->code32_start = loaded_addr & 0xffffffff;
> + hdr->ext_code32_start = loaded_addr >> 32;
> }
>
> status = exit_boot(boot_params, handle, is64);
> Index: linux-2.6/arch/x86/boot/header.S
> ===================================================================
> --- linux-2.6.orig/arch/x86/boot/header.S
> +++ linux-2.6/arch/x86/boot/header.S
> @@ -301,7 +301,7 @@ _start:
> # Part 2 of the header, from the old setup.S
>
> .ascii "HdrS" # header signature
> - .word 0x020d # header version number (>= 0x0105)
> + .word 0x020e # header version number (>= 0x0105)
> # or else old loadlin-1.5 will fail)
> .globl realmode_swtch
> realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
> @@ -449,6 +449,7 @@ pref_address: .quad LOAD_PHYSICAL_ADDR
> #endif
> init_size: .long INIT_SIZE # kernel initialization size
> handover_offset: .long 0 # Filled in by build.c
> +ext_code32_start: .long 0 # werid one!
>
> # End of setup header #####################################################
>
> Index: linux-2.6/Documentation/x86/boot.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/x86/boot.txt
> +++ linux-2.6/Documentation/x86/boot.txt
> @@ -61,6 +61,9 @@ Protocol 2.12: (Kernel 3.8) Added the xl
> to struct boot_params for loading bzImage and ramdisk
> above 4G in 64bit.
>
> +Protocol 2.14: (Kernel 3.20) Added the ext_code32_start to support EFI64
> + to be loaded above 4G.
> +
> **** MEMORY LAYOUT
>
> The traditional memory map for the kernel loader, used for Image or
> @@ -197,6 +200,7 @@ Offset Proto Name Meaning
> 0258/8 2.10+ pref_address Preferred loading address
> 0260/4 2.10+ init_size Linear memory required during initialization
> 0264/4 2.11+ handover_offset Offset of handover entry point
> +0268/4 2.14+ ext_code32_start Extended part for code32_start
>
> (1) For backwards compatibility, if the setup_sects field contains 0, the
> real value is 4.
> @@ -738,6 +742,13 @@ Offset/size: 0x264/4
>
> See EFI HANDOVER PROTOCOL below for more details.
>
> +Field name: ext_code32_start
> +Type: modify (optional, reloc)
> +Offset/size: 0x268/4
> +Protocol: 2.14+
> +
> + The address is used with code32_start to compare pref_address
> + to support EFI 64bit kernel get loaded above 4G.
>
> **** THE IMAGE CHECKSUM
>
> @@ -1122,4 +1133,11 @@ The boot loader *must* fill out the foll
> o hdr.ramdisk_image (if applicable)
> o hdr.ramdisk_size (if applicable)
>
> +for 64bit, when loading above 4G, *must* fill out the following fields,
> +
> + o hdr.ext_code32_start
> + o ext_cmd_line_ptr
> + o ext_ramdisk_image (if applicable)
> + o ext_ramdisk_size (if applicable)
> +
> All other fields should be zero.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-efi" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2015-02-05 05:25:30

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Wed, Feb 4, 2015 at 7:25 PM, Dave Young <[email protected]> wrote:
>> After this patch, could use patched grub2-x86_64.efi to place
>> kernel/boot_params/cmd_line/initrd all above 4G and execute the kernel
>> above 4G.
>
> Can you share the grub2 patch for testing?

Please check attached 5 patches. last one is for debug purpose.

You need to apply them on top of

git://git.savannah.gnu.org/grub.git

plus http://pkgs.fedoraproject.org/cgit/grub2.git/plain/0091-Add-support-for-linuxefi.patch


Attachments:
ramdisk_above_4g_7_1.patch (1.83 kB)
ramdisk_above_4g_7_2.patch (4.99 kB)
ramdisk_above_4g_7_3.patch (1.81 kB)
ramdisk_above_4g_7_4.patch (8.32 kB)
ramdisk_above_4g_7_5.patch (2.76 kB)
Download all attachments

2015-02-05 06:09:57

by Dave Young

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On 02/04/15 at 09:25pm, Yinghai Lu wrote:
> On Wed, Feb 4, 2015 at 7:25 PM, Dave Young <[email protected]> wrote:
> >> After this patch, could use patched grub2-x86_64.efi to place
> >> kernel/boot_params/cmd_line/initrd all above 4G and execute the kernel
> >> above 4G.
> >
> > Can you share the grub2 patch for testing?
>
> Please check attached 5 patches. last one is for debug purpose.
>
> You need to apply them on top of
>
> git://git.savannah.gnu.org/grub.git
>
> plus http://pkgs.fedoraproject.org/cgit/grub2.git/plain/0091-Add-support-for-linuxefi.patch

> Subject: [PATCH] update setup_header and boot_params to 2.12
>
> the one that is used by kernel v3.8...

Thanks for quick feedback..
Dave

2015-02-09 18:27:51

by Matt Fleming

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Tue, 03 Feb, at 06:03:20PM, Yinghai Lu wrote:
> Now could use kexec to place kernel/boot_params/cmd_line/initrd
> above 4G, but that is with legacy interface with startup_64 directly.
>
> This patch will allow 64bit EFI kernel to be loaded above 4G
> and use EFI HANDOVER PROTOCOL to start the kernel.
>
> Current code32_start is used for passing around loading address,
> so it will overflow when kernel is loaded abover 4G.
>
> The patch mainly add ext_code32_start to take address high 32bit.
>
> After this patch, could use patched grub2-x86_64.efi to place
> kernel/boot_params/cmd_line/initrd all above 4G and execute the kernel
> above 4G.

The first thing that comes to mind is the issues we experienced last
year when adding support for loading initrds above 4GB to the EFI boot
stub, c.f. commit 47226ad4f4cf ("x86/efi: Only load initrd above 4g on
second try").

Are things going to work correctly this time?

> Index: linux-2.6/arch/x86/boot/compressed/eboot.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/boot/compressed/eboot.c
> +++ linux-2.6/arch/x86/boot/compressed/eboot.c
> @@ -1389,6 +1389,7 @@ struct boot_params *efi_main(struct efi_
> void *handle;
> efi_system_table_t *_table;
> bool is64;
> + unsigned long loaded_addr;
>
> efi_early = c;
>
> @@ -1430,9 +1431,12 @@ struct boot_params *efi_main(struct efi_
> * If the kernel isn't already loaded at the preferred load
> * address, relocate it.
> */
> - if (hdr->pref_address != hdr->code32_start) {
> - unsigned long bzimage_addr = hdr->code32_start;
> - status = efi_relocate_kernel(sys_table, &bzimage_addr,
> + loaded_addr = hdr->code32_start;
> + loaded_addr |= (unsigned long)hdr->ext_code32_start << 32;

Please compile this for CONFIG_X86_32 and fix any compiler warnings.

> @@ -738,6 +742,13 @@ Offset/size: 0x264/4
>
> See EFI HANDOVER PROTOCOL below for more details.
>
> +Field name: ext_code32_start
> +Type: modify (optional, reloc)
> +Offset/size: 0x268/4
> +Protocol: 2.14+
> +
> + The address is used with code32_start to compare pref_address
> + to support EFI 64bit kernel get loaded above 4G.

It would be good to mention that this new field contains the upper
32-bits of the 64-bit address.

--
Matt Fleming, Intel Open Source Technology Center

2015-02-09 20:23:18

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Mon, Feb 9, 2015 at 10:27 AM, Matt Fleming <[email protected]> wrote:
> On Tue, 03 Feb, at 06:03:20PM, Yinghai Lu wrote:
>
> The first thing that comes to mind is the issues we experienced last
> year when adding support for loading initrds above 4GB to the EFI boot
> stub, c.f. commit 47226ad4f4cf ("x86/efi: Only load initrd above 4g on
> second try").
>
> Are things going to work correctly this time?

That should be addressed the grub2.

I was thinking that we may need to add mem_limit command together with
linuxefi and initrdefi.
or add linuxefi64/initrdefi64?

BTW, I tested loading kernel above grub2 on
virutalbox, qemu/kvm/OVMF, and real servers (ami ...) all work without problem.

wonder if we need have one black list for 64bit UEFI that does not
support access
memory above 4G.

>> @@ -1430,9 +1431,12 @@ struct boot_params *efi_main(struct efi_
>> * If the kernel isn't already loaded at the preferred load
>> * address, relocate it.
>> */
>> - if (hdr->pref_address != hdr->code32_start) {
>> - unsigned long bzimage_addr = hdr->code32_start;
>> - status = efi_relocate_kernel(sys_table, &bzimage_addr,
>> + loaded_addr = hdr->code32_start;
>> + loaded_addr |= (unsigned long)hdr->ext_code32_start << 32;
>
> Please compile this for CONFIG_X86_32 and fix any compiler warnings.

Ok.

>
>> @@ -738,6 +742,13 @@ Offset/size: 0x264/4
>>
>> See EFI HANDOVER PROTOCOL below for more details.
>>
>> +Field name: ext_code32_start
>> +Type: modify (optional, reloc)
>> +Offset/size: 0x268/4
>> +Protocol: 2.14+
>> +
>> + The address is used with code32_start to compare pref_address
>> + to support EFI 64bit kernel get loaded above 4G.
>
> It would be good to mention that this new field contains the upper
> 32-bits of the 64-bit address.

ok. that is: upper 32bits of the 64bit address of startup_32 when kernel
loaded above 4G.

Thanks

Yinghai

2015-02-11 06:12:00

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

Hi Yinghai,

Could you please help to have a look at a problem which I encountered?

I am trying to make kaslr randomize on both kernel physical and virtual
address separately. Now the separate randomization has been done,
kernel physical address can be randomized to [16M, 4G], and virtual
address can be randomzed to [16M, 1G]. Below is the post.
http://thread.gmane.org/gmane.linux.kernel/1870532

Now I am trying to make kernel physical address randomize anywhere, not
limited to below 4G. As you know in arch/x86/boot/compressed/head_64.S a
identity mapping of 0~4G has been built, for address above 4G I added an
IDT and #PF handler. Then I hardcoded the output address of
choose_kernel_location as 5G, the #PF handler worked, however it will
reboot in arch/x86/kernel/head_64.S.

I don't know how to debug asm code, and have no idea why it has been in
64 bit mode while it can't be in above 4G in boot/compressed/head_64.S.

Now for debugging this issue, I made a small debug patch as below. Four
more pages are added as pmd page table, so identity mapping cover
0~8G. Then hardcode the output address as 5G, and disable the relocation
handling to filter unnecessary interference.

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 6b1766c..74da678 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -123,7 +123,7 @@ ENTRY(startup_32)
/* Initialize Page tables to 0 */
leal pgtable(%ebx), %edi
xorl %eax, %eax
- movl $((4096*6)/4), %ecx
+ movl $((4096*10)/4), %ecx
rep stosl

/* Build Level 4 */
@@ -134,7 +134,7 @@ ENTRY(startup_32)
/* Build Level 3 */
leal pgtable + 0x1000(%ebx), %edi
leal 0x1007(%edi), %eax
- movl $4, %ecx
+ movl $8, %ecx
1: movl %eax, 0x00(%edi)
addl $0x00001000, %eax
addl $8, %edi
@@ -144,7 +144,7 @@ ENTRY(startup_32)
/* Build Level 2 */
leal pgtable + 0x2000(%ebx), %edi
movl $0x00000183, %eax
- movl $2048, %ecx
+ movl $4096, %ecx
1: movl %eax, 0(%edi)
addl $0x00200000, %eax
addl $8, %edi
@@ -476,4 +476,4 @@ boot_stack_end:
.section ".pgtable","a",@nobits
.balign 4096
pgtable:
- .fill 6*4096, 1, 0
+ .fill 10*4096, 1, 0
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index a950864..47c8c80 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -404,6 +404,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
output = choose_kernel_location(input_data, input_len, output,
output_len > run_size ? output_len
: run_size);
+ output = 0x140000000;

/* Validate memory location choices. */
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
@@ -427,8 +428,10 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
* 32-bit always performs relocations. 64-bit relocations are only
* needed if kASLR has chosen a different load address.
*/
+#if 0
if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
handle_relocations(output, output_len);
+#endif
debug_putstr("done.\nBooting the kernel.\n");
return output;
}


Thanks
Baoquan

On 02/03/15 at 06:03pm, Yinghai Lu wrote:
> Now could use kexec to place kernel/boot_params/cmd_line/initrd
> above 4G, but that is with legacy interface with startup_64 directly.
>
> This patch will allow 64bit EFI kernel to be loaded above 4G
> and use EFI HANDOVER PROTOCOL to start the kernel.
>
> Current code32_start is used for passing around loading address,
> so it will overflow when kernel is loaded abover 4G.
>
> The patch mainly add ext_code32_start to take address high 32bit.
>
> After this patch, could use patched grub2-x86_64.efi to place
> kernel/boot_params/cmd_line/initrd all above 4G and execute the kernel
> above 4G.
>
> bootlog like:
>
> params: [1618fc000,1618fffff]
> cmdline: [1618fb000,1618fb7fe]
> kernel: [15e000000,161385fff]
> kernel: done [ linux 9.25MiB 100% 6.66MiB/s ]
> initrd: [15bcbe000,15dffffbb]
> initrd: 1 file done [ initrd.img 35.26MiB 100% 11.93MiB/s ]
> early console in decompress_kernel
> decompress_kernel:
> input: [0x15fd0b3b4-0x16063c803], output: 0x15e000000, heap: [0x160645b00-0x16064daff]
>
> Decompressing Linux... xz... Parsing ELF... done.
> Booting the kernel.
> [ 0.000000] bootconsole [uart0] enabled
> [ 0.000000] real_mode_data : phys 00000001618fc000
> [ 0.000000] real_mode_data : virt ffff8801618fc000
> [ 0.000000] Kernel Layout:
> [ 0.000000] .text: [0x15e000000-0x15f08f72c]
> [ 0.000000] .rodata: [0x15f200000-0x15fa44fff]
> [ 0.000000] .data: [0x15fc00000-0x15fe545ff]
> [ 0.000000] .init: [0x15fe56000-0x16021afff]
> [ 0.000000] .bss: [0x160229000-0x16135ffff]
> [ 0.000000] .brk: [0x161360000-0x161385fff]
> [ 0.000000] memblock_reserve: [0x0000000009f000-0x000000000fffff] flags 0x0 * BIOS reserved
> ...
> [ 0.000000] memblock_reserve: [0x0000015e000000-0x0000016135ffff] flags 0x0 TEXT DATA BSS
> [ 0.000000] memblock_reserve: [0x0000015bcbe000-0x0000015dffffff] flags 0x0 RAMDISK
>
>
> Signed-off-by: Yinghai Lu <[email protected]>
>
> ---
> Documentation/x86/boot.txt | 18 ++++++++++++++++++
> arch/x86/boot/compressed/eboot.c | 15 ++++++++++-----
> arch/x86/boot/compressed/head_64.S | 7 ++++++-
> arch/x86/boot/header.S | 3 ++-
> arch/x86/include/uapi/asm/bootparam.h | 1 +
> arch/x86/kernel/asm-offsets.c | 1 +
> 6 files changed, 38 insertions(+), 7 deletions(-)
>
> Index: linux-2.6/arch/x86/include/uapi/asm/bootparam.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/uapi/asm/bootparam.h
> +++ linux-2.6/arch/x86/include/uapi/asm/bootparam.h
> @@ -83,6 +83,7 @@ struct setup_header {
> __u64 pref_address;
> __u32 init_size;
> __u32 handover_offset;
> + __u32 ext_code32_start;
> } __attribute__((packed));
>
> struct sys_desc_table {
> Index: linux-2.6/arch/x86/kernel/asm-offsets.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/asm-offsets.c
> +++ linux-2.6/arch/x86/kernel/asm-offsets.c
> @@ -68,6 +68,7 @@ void common(void) {
> OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
> OFFSET(BP_pref_address, boot_params, hdr.pref_address);
> OFFSET(BP_code32_start, boot_params, hdr.code32_start);
> + OFFSET(BP_ext_code32_start, boot_params, hdr.ext_code32_start);
>
> BLANK();
> DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
> Index: linux-2.6/arch/x86/boot/compressed/head_64.S
> ===================================================================
> --- linux-2.6.orig/arch/x86/boot/compressed/head_64.S
> +++ linux-2.6/arch/x86/boot/compressed/head_64.S
> @@ -263,6 +263,8 @@ ENTRY(efi_pe_entry)
> mov %rax, %rsi
> leaq startup_32(%rip), %rax
> movl %eax, BP_code32_start(%rsi)
> + shr $32, %rax
> + movl %eax, BP_ext_code32_start(%rsi)
> jmp 2f /* Skip the relocation */
>
> handover_entry:
> @@ -286,7 +288,10 @@ fail:
> hlt
> jmp fail
> 2:
> - movl BP_code32_start(%esi), %eax
> + movl BP_code32_start(%rsi), %eax
> + movl BP_ext_code32_start(%rsi), %ebx
> + shl $32, %rbx
> + orq %rbx, %rax
> leaq preferred_addr(%rax), %rax
> jmp *%rax
>
> Index: linux-2.6/arch/x86/boot/compressed/eboot.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/boot/compressed/eboot.c
> +++ linux-2.6/arch/x86/boot/compressed/eboot.c
> @@ -1389,6 +1389,7 @@ struct boot_params *efi_main(struct efi_
> void *handle;
> efi_system_table_t *_table;
> bool is64;
> + unsigned long loaded_addr;
>
> efi_early = c;
>
> @@ -1430,9 +1431,12 @@ struct boot_params *efi_main(struct efi_
> * If the kernel isn't already loaded at the preferred load
> * address, relocate it.
> */
> - if (hdr->pref_address != hdr->code32_start) {
> - unsigned long bzimage_addr = hdr->code32_start;
> - status = efi_relocate_kernel(sys_table, &bzimage_addr,
> + loaded_addr = hdr->code32_start;
> + loaded_addr |= (unsigned long)hdr->ext_code32_start << 32;
> + if (hdr->pref_address != loaded_addr) {
> + unsigned long loaded_addr_orig = loaded_addr;
> +
> + status = efi_relocate_kernel(sys_table, &loaded_addr,
> hdr->init_size, hdr->init_size,
> hdr->pref_address,
> hdr->kernel_alignment);
> @@ -1441,8 +1445,9 @@ struct boot_params *efi_main(struct efi_
> goto fail;
> }
>
> - hdr->pref_address = hdr->code32_start;
> - hdr->code32_start = bzimage_addr;
> + hdr->pref_address = loaded_addr_orig;
> + hdr->code32_start = loaded_addr & 0xffffffff;
> + hdr->ext_code32_start = loaded_addr >> 32;
> }
>
> status = exit_boot(boot_params, handle, is64);
> Index: linux-2.6/arch/x86/boot/header.S
> ===================================================================
> --- linux-2.6.orig/arch/x86/boot/header.S
> +++ linux-2.6/arch/x86/boot/header.S
> @@ -301,7 +301,7 @@ _start:
> # Part 2 of the header, from the old setup.S
>
> .ascii "HdrS" # header signature
> - .word 0x020d # header version number (>= 0x0105)
> + .word 0x020e # header version number (>= 0x0105)
> # or else old loadlin-1.5 will fail)
> .globl realmode_swtch
> realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
> @@ -449,6 +449,7 @@ pref_address: .quad LOAD_PHYSICAL_ADDR
> #endif
> init_size: .long INIT_SIZE # kernel initialization size
> handover_offset: .long 0 # Filled in by build.c
> +ext_code32_start: .long 0 # werid one!
>
> # End of setup header #####################################################
>
> Index: linux-2.6/Documentation/x86/boot.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/x86/boot.txt
> +++ linux-2.6/Documentation/x86/boot.txt
> @@ -61,6 +61,9 @@ Protocol 2.12: (Kernel 3.8) Added the xl
> to struct boot_params for loading bzImage and ramdisk
> above 4G in 64bit.
>
> +Protocol 2.14: (Kernel 3.20) Added the ext_code32_start to support EFI64
> + to be loaded above 4G.
> +
> **** MEMORY LAYOUT
>
> The traditional memory map for the kernel loader, used for Image or
> @@ -197,6 +200,7 @@ Offset Proto Name Meaning
> 0258/8 2.10+ pref_address Preferred loading address
> 0260/4 2.10+ init_size Linear memory required during initialization
> 0264/4 2.11+ handover_offset Offset of handover entry point
> +0268/4 2.14+ ext_code32_start Extended part for code32_start
>
> (1) For backwards compatibility, if the setup_sects field contains 0, the
> real value is 4.
> @@ -738,6 +742,13 @@ Offset/size: 0x264/4
>
> See EFI HANDOVER PROTOCOL below for more details.
>
> +Field name: ext_code32_start
> +Type: modify (optional, reloc)
> +Offset/size: 0x268/4
> +Protocol: 2.14+
> +
> + The address is used with code32_start to compare pref_address
> + to support EFI 64bit kernel get loaded above 4G.
>
> **** THE IMAGE CHECKSUM
>
> @@ -1122,4 +1133,11 @@ The boot loader *must* fill out the foll
> o hdr.ramdisk_image (if applicable)
> o hdr.ramdisk_size (if applicable)
>
> +for 64bit, when loading above 4G, *must* fill out the following fields,
> +
> + o hdr.ext_code32_start
> + o ext_cmd_line_ptr
> + o ext_ramdisk_image (if applicable)
> + o ext_ramdisk_size (if applicable)
> +
> All other fields should be zero.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2015-02-11 15:55:30

by Matt Fleming

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Mon, 09 Feb, at 12:23:15PM, Yinghai Lu wrote:
> On Mon, Feb 9, 2015 at 10:27 AM, Matt Fleming <[email protected]> wrote:
> > On Tue, 03 Feb, at 06:03:20PM, Yinghai Lu wrote:
> >
> > The first thing that comes to mind is the issues we experienced last
> > year when adding support for loading initrds above 4GB to the EFI boot
> > stub, c.f. commit 47226ad4f4cf ("x86/efi: Only load initrd above 4g on
> > second try").
> >
> > Are things going to work correctly this time?
>
> That should be addressed the grub2.

I vaguely remember thinking that the issue was only experienced when
using the EFI_FILE protocol, which grub2 doesn't use. So the grub
developers may be OK, but we should at least give them a heads up.

> I was thinking that we may need to add mem_limit command together with
> linuxefi and initrdefi.
> or add linuxefi64/initrdefi64?

No, we definitely do not want to add any more grub commands.

> BTW, I tested loading kernel above grub2 on
> virutalbox, qemu/kvm/OVMF, and real servers (ami ...) all work without problem.
>
> wonder if we need have one black list for 64bit UEFI that does not
> support access
> memory above 4G.

We have been successful, so far, in not introducing these kind of
blacklists. It would be a shame to start now.

--
Matt Fleming, Intel Open Source Technology Center

2015-02-11 16:30:27

by Peter Jones

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Wed, Feb 11, 2015 at 03:55:24PM +0000, Matt Fleming wrote:
> On Mon, 09 Feb, at 12:23:15PM, Yinghai Lu wrote:
> > On Mon, Feb 9, 2015 at 10:27 AM, Matt Fleming <[email protected]> wrote:
> > > On Tue, 03 Feb, at 06:03:20PM, Yinghai Lu wrote:
> > >
> > > The first thing that comes to mind is the issues we experienced last
> > > year when adding support for loading initrds above 4GB to the EFI boot
> > > stub, c.f. commit 47226ad4f4cf ("x86/efi: Only load initrd above 4g on
> > > second try").
> > >
> > > Are things going to work correctly this time?
> >
> > That should be addressed the grub2.
>
> I vaguely remember thinking that the issue was only experienced when
> using the EFI_FILE protocol, which grub2 doesn't use. So the grub
> developers may be OK, but we should at least give them a heads up.

Looks correct to me.

> > I was thinking that we may need to add mem_limit command together with
> > linuxefi and initrdefi.
> > or add linuxefi64/initrdefi64?
>
> No, we definitely do not want to add any more grub commands.

Definitely agree.

> > BTW, I tested loading kernel above grub2 on
> > virutalbox, qemu/kvm/OVMF, and real servers (ami ...) all work without problem.
> >
> > wonder if we need have one black list for 64bit UEFI that does not
> > support access
> > memory above 4G.
>
> We have been successful, so far, in not introducing these kind of
> blacklists. It would be a shame to start now.

>From grub's point of view I'm not sure why we'd care - the pages kernel
and initramfs land in are both from the Boot Services allocator, so if the
machine doesn't support high addresses, they won't be there.

--
Peter

2015-02-12 14:59:47

by Matt Fleming

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Wed, 11 Feb, at 11:29:58AM, Peter Jones wrote:
>
> From grub's point of view I'm not sure why we'd care - the pages kernel
> and initramfs land in are both from the Boot Services allocator, so if the
> machine doesn't support high addresses, they won't be there.

It's not that some implementations don't "support" higher addresses,
it's that the EFI_FILE_PROTOCOL is buggy and it corrupts the memory when
reading into it; you can allocate it just fine. At least, that's what I
remember from the limited investigation I performed.

But since grub doesn't use EFI_FILE_PROTOCOL, we should be cool.

--
Matt Fleming, Intel Open Source Technology Center

2015-02-18 07:22:41

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Tue, Feb 10, 2015 at 10:11 PM, Baoquan He <[email protected]> wrote:
> Hi Yinghai,
>
> Could you please help to have a look at a problem which I encountered?
>
> I am trying to make kaslr randomize on both kernel physical and virtual
> address separately. Now the separate randomization has been done,
> kernel physical address can be randomized to [16M, 4G], and virtual
> address can be randomzed to [16M, 1G]. Below is the post.
> http://thread.gmane.org/gmane.linux.kernel/1870532
>
> Now I am trying to make kernel physical address randomize anywhere, not
> limited to below 4G. As you know in arch/x86/boot/compressed/head_64.S a
> identity mapping of 0~4G has been built, for address above 4G I added an
> IDT and #PF handler. Then I hardcoded the output address of
> choose_kernel_location as 5G, the #PF handler worked, however it will
> reboot in arch/x86/kernel/head_64.S.

For 64bit, I'd like to see bootloader could load kernel to random hw address.

otherwise you will need to set another ident mapping for new range.

Thanks

Yinghai

2015-02-18 07:29:57

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Wed, Feb 4, 2015 at 10:09 PM, Dave Young <[email protected]> wrote:
> On 02/04/15 at 09:25pm, Yinghai Lu wrote:
>> On Wed, Feb 4, 2015 at 7:25 PM, Dave Young <[email protected]> wrote:
>> >> After this patch, could use patched grub2-x86_64.efi to place
>> >> kernel/boot_params/cmd_line/initrd all above 4G and execute the kernel
>> >> above 4G.
>> >
>> > Can you share the grub2 patch for testing?
>>
>> Please check attached 5 patches. last one is for debug purpose.
>>
>> You need to apply them on top of
>>
>> git://git.savannah.gnu.org/grub.git
>>
>> plus http://pkgs.fedoraproject.org/cgit/grub2.git/plain/0091-Add-support-for-linuxefi.patch
>

FYI, I put those patches together with patches that extend grub2 to load
kernel/initrd above 4G into github tree.

https://github.com/yhlu/grub2.git

Thanks

Yinghai

2015-02-18 11:30:07

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On 02/17/15 at 11:22pm, Yinghai Lu wrote:
> On Tue, Feb 10, 2015 at 10:11 PM, Baoquan He <[email protected]> wrote:
> > Hi Yinghai,
> >
> > Could you please help to have a look at a problem which I encountered?
> >
> > I am trying to make kaslr randomize on both kernel physical and virtual
> > address separately. Now the separate randomization has been done,
> > kernel physical address can be randomized to [16M, 4G], and virtual
> > address can be randomzed to [16M, 1G]. Below is the post.
> > http://thread.gmane.org/gmane.linux.kernel/1870532
> >
> > Now I am trying to make kernel physical address randomize anywhere, not
> > limited to below 4G. As you know in arch/x86/boot/compressed/head_64.S a
> > identity mapping of 0~4G has been built, for address above 4G I added an
> > IDT and #PF handler. Then I hardcoded the output address of
> > choose_kernel_location as 5G, the #PF handler worked, however it will
> > reboot in arch/x86/kernel/head_64.S.
>
> For 64bit, I'd like to see bootloader could load kernel to random hw address.
>
> otherwise you will need to set another ident mapping for new range.

Hi Yinghai,

I am sorry I didn't get what you mean. I have tried to set another ident
mapping for new range and it is successful seeing from debug message
printing. However it always reboot because of GPF. That's why I made
that small debug patch which add another 4G ident mapping and hardcoded
to put kernel between 4G and 8G.

Could you say more of it? Or which direction should I go?

Thanks a lot for your help!

Thanks
Baoquan

>
> Thanks
>
> Yinghai

2015-02-18 19:47:05

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Wed, Feb 18, 2015 at 3:29 AM, Baoquan He <[email protected]> wrote:
> On 02/17/15 at 11:22pm, Yinghai Lu wrote:
>> On Tue, Feb 10, 2015 at 10:11 PM, Baoquan He <[email protected]> wrote:
> I am sorry I didn't get what you mean. I have tried to set another ident
> mapping for new range and it is successful seeing from debug message
> printing. However it always reboot because of GPF. That's why I made
> that small debug patch which add another 4G ident mapping and hardcoded
> to put kernel between 4G and 8G.
>
> Could you say more of it? Or which direction should I go?

I would suggest:
leave physical address random handling to bootloader, as bootloader always
need to set ident mapping to cover kernel/bootparam/cmdline.

For virtual address random handling it should go kasl in kernel.

Thanks

Yinghai

2015-02-20 02:21:56

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On 02/18/15 at 11:47am, Yinghai Lu wrote:
> On Wed, Feb 18, 2015 at 3:29 AM, Baoquan He <[email protected]> wrote:
> > On 02/17/15 at 11:22pm, Yinghai Lu wrote:
> >> On Tue, Feb 10, 2015 at 10:11 PM, Baoquan He <[email protected]> wrote:
> > I am sorry I didn't get what you mean. I have tried to set another ident
> > mapping for new range and it is successful seeing from debug message
> > printing. However it always reboot because of GPF. That's why I made
> > that small debug patch which add another 4G ident mapping and hardcoded
> > to put kernel between 4G and 8G.
> >
> > Could you say more of it? Or which direction should I go?
>
> I would suggest:
> leave physical address random handling to bootloader, as bootloader always
> need to set ident mapping to cover kernel/bootparam/cmdline.
>
> For virtual address random handling it should go kasl in kernel.

Kaslr need both virtual and physical address be randomized, otherwise
it doesn't make sense. Please check what hpa said about this issue:

https://lkml.org/lkml/2014/10/13/350

And usually no bootloader often load kernel onto a random physical
address. Fow now we can often see kexec/kdump did this. I believe Kees
introduced kaslr to mainly solve security issue of normal kernel which
is not like kexec/kdump for testing or debugging. Randomizing physical
address makes sense for kaslr feature.

In normal kernel it jump from 32bit to 64bit and enter into 64bit mode
of long mode. I am wondering why it will cause a GPF when kernel is put
above 4G. Adding a IDT and #PF handler to solve the ident page mapping
on demand has been done now, I am blocked by the GPF now, otherwise I can
post it very soon for testing.

Thanks
Baoquan
>
> Thanks
>
> Yinghai

2015-02-20 03:35:21

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Thu, Feb 19, 2015 at 6:13 PM, Baoquan He <[email protected]> wrote:
> On 02/18/15 at 11:47am, Yinghai Lu wrote:
>
> Kaslr need both virtual and physical address be randomized, otherwise
> it doesn't make sense. Please check what hpa said about this issue:
>
> https://lkml.org/lkml/2014/10/13/350
>

If I read correctly, it could be separated. phy one could be on bootloader
and virtual could be in kernel.

> And usually no bootloader often load kernel onto a random physical
> address. Fow now we can often see kexec/kdump did this. I believe Kees
> introduced kaslr to mainly solve security issue of normal kernel which
> is not like kexec/kdump for testing or debugging. Randomizing physical
> address makes sense for kaslr feature.

I put some grub2 patches that extend grub2 to load kernel/initrd above
4G into github tree.

https://github.com/yhlu/grub2.git

main purpose for that feature is to handle initrd that bigger than 4G.

or you can try you solution on system that have 64bit EFI support,
then you will don't need to
worry about set ident mapping even.

Thanks

Yinghai

2015-02-20 09:29:21

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On 02/19/15 at 07:35pm, Yinghai Lu wrote:
> On Thu, Feb 19, 2015 at 6:13 PM, Baoquan He <[email protected]> wrote:
> > On 02/18/15 at 11:47am, Yinghai Lu wrote:
> >
> > Kaslr need both virtual and physical address be randomized, otherwise
> > it doesn't make sense. Please check what hpa said about this issue:
> >
> > https://lkml.org/lkml/2014/10/13/350
> >
>
> If I read correctly, it could be separated. phy one could be on bootloader
> and virtual could be in kernel.

I think people want physical address randomization too. This is
what Vivek replied to hpa's comment:

https://lkml.org/lkml/2014/10/13/377

And after I posted the patchset handling randomization of virtual and
physical address separately, Kees Cook also think it makes sense:
http://www.gossamer-threads.com/lists/linux/kernel/2090014

>
> > And usually no bootloader often load kernel onto a random physical
> > address. Fow now we can often see kexec/kdump did this. I believe Kees
> > introduced kaslr to mainly solve security issue of normal kernel which
> > is not like kexec/kdump for testing or debugging. Randomizing physical
> > address makes sense for kaslr feature.
>
> I put some grub2 patches that extend grub2 to load kernel/initrd above
> 4G into github tree.
>
> https://github.com/yhlu/grub2.git
>
> main purpose for that feature is to handle initrd that bigger than 4G.
>
> or you can try you solution on system that have 64bit EFI support,
> then you will don't need to
> worry about set ident mapping even.

Actually kexec is a bootloader which can put kernel at any address. This
is done in user space program kexec-tools. However kexec-tools make
kernel jump from 64bit into 64bit directly, and has built ident mapping
of whole physical memory. I have tried this and it works. kexec is
mainly used for kernel developer, kaslr is meaningless for kexec.

However kaslr focus on normal kernel, and jump from 32bit to 64bit mode.
I can't figure out a way to work around this.

Now I just want to figure out what register setting cause GPF when
reload kernel above 4G in this jumping from 32bit to 64bit way.

>
> Thanks
>
> Yinghai

2015-02-20 23:53:14

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Fri, Feb 20, 2015 at 1:28 AM, Baoquan He <[email protected]> wrote:
>
> Actually kexec is a bootloader which can put kernel at any address. This
> is done in user space program kexec-tools. However kexec-tools make
> kernel jump from 64bit into 64bit directly, and has built ident mapping
> of whole physical memory. I have tried this and it works. kexec is
> mainly used for kernel developer, kaslr is meaningless for kexec.
>
> However kaslr focus on normal kernel, and jump from 32bit to 64bit mode.
> I can't figure out a way to work around this.
>
> Now I just want to figure out what register setting cause GPF when
> reload kernel above 4G in this jumping from 32bit to 64bit way.

Then you are not setting the ident mapping correctly.

you should make sure add extra ident mapping for the new [output,
output+output_len - 1].
bootloader only cover old [output, output+output_len - 1]

and you should check if the mapping is present before add new one,
otherwise will overrite
the one from 64bit bootloader like kexec-tools or grub2-x86_64 etc.

You could use kernel_ident_mapping_init() from arch/x86/mm/init_64.c
--- may need to cut and paste or split and include to
arch/x86/boot/compressed/misc.c
also you need to find some pages for alloc_pgt_page.

Thanks

Yinghai

2015-02-21 02:49:44

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On 02/20/15 at 03:53pm, Yinghai Lu wrote:
> On Fri, Feb 20, 2015 at 1:28 AM, Baoquan He <[email protected]> wrote:
> >
> > Actually kexec is a bootloader which can put kernel at any address. This
> > is done in user space program kexec-tools. However kexec-tools make
> > kernel jump from 64bit into 64bit directly, and has built ident mapping
> > of whole physical memory. I have tried this and it works. kexec is
> > mainly used for kernel developer, kaslr is meaningless for kexec.
> >
> > However kaslr focus on normal kernel, and jump from 32bit to 64bit mode.
> > I can't figure out a way to work around this.
> >
> > Now I just want to figure out what register setting cause GPF when
> > reload kernel above 4G in this jumping from 32bit to 64bit way.
>
> Then you are not setting the ident mapping correctly.
>
> you should make sure add extra ident mapping for the new [output,
> output+output_len - 1].
> bootloader only cover old [output, output+output_len - 1]
>
> and you should check if the mapping is present before add new one,
> otherwise will overrite
> the one from 64bit bootloader like kexec-tools or grub2-x86_64 etc.
>
> You could use kernel_ident_mapping_init() from arch/x86/mm/init_64.c
> --- may need to cut and paste or split and include to
> arch/x86/boot/compressed/misc.c
> also you need to find some pages for alloc_pgt_page.

At the beginning I did it just as you said, add IDT table and $PF
handler. Get page fault address and built ident mapping around it when
reload kernel above 4G. In this case 3 more pages are enough if kernel
is put to another 512G and cross the boundary of 512G.
kernel_ident_mapping_init code can be borrowed and need be adjusted a
little bit. This works as expected, but a GPF reported and reboot to
BIOS. That's why I made a simple debug patch as I pasted before to
filter unnecessary interference.

Since in arch/x86/boot/compressed/head_64.S 6 pages are used, 1 for pgd,
1 for pud, 4 for pmd, all of them cover 0~4G ident mapping. So I added 4
more pages as pmd, then 0~8G are covered. Now I hardcoded the output of
randomization of physical address as 5G, means kernel will be reloaded
there and decompressed. With these ident mapping for this hard coded
address should be correctly setted, still that GPF will happen. I
borrowed a printf.c from arch/x86/boot and made it work for
boot/compressed, can see in this case it will decompress successfully
and jump into arch/x86/kernel/head_64.S, then it reboot there during a
jump.

>
> Thanks
>
> Yinghai

2015-02-21 20:00:57

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On Fri, Feb 20, 2015 at 6:49 PM, Baoquan He <[email protected]> wrote:
> On 02/20/15 at 03:53pm, Yinghai Lu wrote:
> At the beginning I did it just as you said, add IDT table and $PF
> handler. Get page fault address and built ident mapping around it when
> reload kernel above 4G. In this case 3 more pages are enough if kernel
> is put to another 512G and cross the boundary of 512G.
> kernel_ident_mapping_init code can be borrowed and need be adjusted a
> little bit. This works as expected, but a GPF reported and reboot to
> BIOS. That's why I made a simple debug patch as I pasted before to
> filter unnecessary interference.

Please use attached one to instead of the #PF handler in boot stage.
It works when hard-code to move output above 4G.

From: Yinghai Lu <[email protected]>
Subject: [PATCH] x86, boot: Enable ident_mapping for kasl above 4G for 64bit

split kernel_ident_mapping_init() and call that in boot::misc.c stage.
it will cover new range kernel space that is above 4G.

Signed-off-by: Yinghai Lu <[email protected]>

---
arch/x86/boot/compressed/misc.c | 10 ++++
arch/x86/boot/compressed/misc_pgt.c | 61 +++++++++++++++++++++++++++++
arch/x86/include/asm/page.h | 5 ++
arch/x86/mm/ident_map.c | 74 ++++++++++++++++++++++++++++++++++++
arch/x86/mm/init_64.c | 74 ------------------------------------
5 files changed, 151 insertions(+), 73 deletions(-)

Index: linux-2.6/arch/x86/boot/compressed/misc.c
===================================================================
--- linux-2.6.orig/arch/x86/boot/compressed/misc.c
+++ linux-2.6/arch/x86/boot/compressed/misc.c
@@ -9,6 +9,11 @@
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/

+#ifdef CONFIG_X86_64
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)((unsigned long)(x)))
+#endif
+
#include "misc.h"
#include "../string.h"

@@ -366,6 +371,8 @@ static void parse_elf(void *output)
free(phdrs);
}

+#include "misc_pgt.c"
+
asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
@@ -421,6 +428,9 @@ asmlinkage __visible void *decompress_ke
error("Wrong destination address");
#endif

+ if (output != output_orig)
+ fill_linux64_pagetable((unsigned long)output, output_len);
+
debug_putstr("\nDecompressing Linux... ");
decompress(input_data, input_len, NULL, NULL, output, NULL, error);
parse_elf(output);
Index: linux-2.6/arch/x86/include/asm/page.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page.h
+++ linux-2.6/arch/x86/include/asm/page.h
@@ -37,7 +37,10 @@ static inline void copy_user_page(void *
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE

+#ifndef __pa
#define __pa(x) __phys_addr((unsigned long)(x))
+#endif
+
#define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x))
/* __pa_symbol should be used for C visible symbols.
This seems to be the official gcc blessed way to do such arithmetic. */
@@ -51,7 +54,9 @@ static inline void copy_user_page(void *
#define __pa_symbol(x) \
__phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))

+#ifndef __va
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#endif

#define __boot_va(x) __va(x)
#define __boot_pa(x) __pa(x)
Index: linux-2.6/arch/x86/boot/compressed/misc_pgt.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86/boot/compressed/misc_pgt.c
@@ -0,0 +1,61 @@
+
+#ifdef CONFIG_X86_64
+#include <asm/init.h>
+#include <asm/pgtable.h>
+
+#include "../../mm/ident_map.h"
+
+struct alloc_pgt_data {
+ unsigned char *pgt_buf;
+ unsigned long pgt_buf_size;
+ unsigned long pgt_buf_offset;
+};
+
+static void *alloc_pgt_page(void *context)
+{
+ struct alloc_pgt_data *d = (struct alloc_pgt_data *)context;
+ unsigned char *p = (unsigned char *)d->pgt_buf;
+
+ if (d->pgt_buf_offset >= d->pgt_buf_size) {
+ debug_putstr("out of pgt_buf in misc.c\n");
+ return NULL;
+ }
+
+ p += d->pgt_buf_offset;
+ d->pgt_buf_offset += 4096;
+ memset(p, 0, 4096);
+
+ return p;
+}
+
+#define PGT_BUF_SIZE (4096*4)
+
+unsigned long __force_order;
+static unsigned char pgt_buf[PGT_BUF_SIZE] __aligned(4096);
+
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+ struct alloc_pgt_data data = {
+ .pgt_buf = (unsigned char *) pgt_buf,
+ .pgt_buf_size = sizeof(pgt_buf),
+ .pgt_buf_offset = 0,
+ };
+ struct x86_mapping_info mapping_info = {
+ .alloc_pgt_page = alloc_pgt_page,
+ .context = &data,
+ .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+ };
+ unsigned long end = start + size;
+ pgd_t *level4p = (pgd_t *)read_cr3();
+
+ /* align boundary to 2M */
+ start = (start >> 21) << 21;
+ end = ((end + (1<<21) - 1) >> 21) << 21;
+ if (start >= (1UL<<32))
+ kernel_ident_mapping_init(&mapping_info, level4p, start, end);
+}
+#else
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+}
+#endif
Index: linux-2.6/arch/x86/mm/ident_map.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86/mm/ident_map.c
@@ -0,0 +1,74 @@
+
+static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+ unsigned long addr, unsigned long end)
+{
+ addr &= PMD_MASK;
+ for (; addr < end; addr += PMD_SIZE) {
+ pmd_t *pmd = pmd_page + pmd_index(addr);
+
+ if (!pmd_present(*pmd))
+ set_pmd(pmd, __pmd(addr | pmd_flag));
+ }
+}
+static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next) {
+ pud_t *pud = pud_page + pud_index(addr);
+ pmd_t *pmd;
+
+ next = (addr & PUD_MASK) + PUD_SIZE;
+ if (next > end)
+ next = end;
+
+ if (pud_present(*pud)) {
+ pmd = pmd_offset(pud, 0);
+ ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ continue;
+ }
+ pmd = (pmd_t *)info->alloc_pgt_page(info->context);
+ if (!pmd)
+ return -ENOMEM;
+ ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
+
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ int result;
+ int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
+
+ for (; addr < end; addr = next) {
+ pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+ pud_t *pud;
+
+ next = (addr & PGDIR_MASK) + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd, 0);
+ result = ident_pud_init(info, pud, addr, next);
+ if (result)
+ return result;
+ continue;
+ }
+
+ pud = (pud_t *)info->alloc_pgt_page(info->context);
+ if (!pud)
+ return -ENOMEM;
+ result = ident_pud_init(info, pud, addr, next);
+ if (result)
+ return result;
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -56,79 +56,7 @@

#include "mm_internal.h"

-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
- unsigned long addr, unsigned long end)
-{
- addr &= PMD_MASK;
- for (; addr < end; addr += PMD_SIZE) {
- pmd_t *pmd = pmd_page + pmd_index(addr);
-
- if (!pmd_present(*pmd))
- set_pmd(pmd, __pmd(addr | pmd_flag));
- }
-}
-static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
-
- for (; addr < end; addr = next) {
- pud_t *pud = pud_page + pud_index(addr);
- pmd_t *pmd;
-
- next = (addr & PUD_MASK) + PUD_SIZE;
- if (next > end)
- next = end;
-
- if (pud_present(*pud)) {
- pmd = pmd_offset(pud, 0);
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
- continue;
- }
- pmd = (pmd_t *)info->alloc_pgt_page(info->context);
- if (!pmd)
- return -ENOMEM;
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
- }
-
- return 0;
-}
-
-int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
- int result;
- int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
-
- for (; addr < end; addr = next) {
- pgd_t *pgd = pgd_page + pgd_index(addr) + off;
- pud_t *pud;
-
- next = (addr & PGDIR_MASK) + PGDIR_SIZE;
- if (next > end)
- next = end;
-
- if (pgd_present(*pgd)) {
- pud = pud_offset(pgd, 0);
- result = ident_pud_init(info, pud, addr, next);
- if (result)
- return result;
- continue;
- }
-
- pud = (pud_t *)info->alloc_pgt_page(info->context);
- if (!pud)
- return -ENOMEM;
- result = ident_pud_init(info, pud, addr, next);
- if (result)
- return result;
- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
- }
-
- return 0;
-}
+#include "ident_map.c"

static int __init parse_direct_gbpages_off(char *arg)
{


Attachments:
misc_fill_pgt.patch (8.24 kB)

2015-02-22 13:18:13

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH] x86, boot: Allow 64bit EFI kernel to be loaded above 4G

On 02/21/15 at 10:49am, Baoquan He wrote:
> On 02/20/15 at 03:53pm, Yinghai Lu wrote:
> > Then you are not setting the ident mapping correctly.
> >
> > you should make sure add extra ident mapping for the new [output,
> > output+output_len - 1].
> > bootloader only cover old [output, output+output_len - 1]
> >
> > and you should check if the mapping is present before add new one,
> > otherwise will overrite
> > the one from 64bit bootloader like kexec-tools or grub2-x86_64 etc.
> >
> > You could use kernel_ident_mapping_init() from arch/x86/mm/init_64.c
> > --- may need to cut and paste or split and include to
> > arch/x86/boot/compressed/misc.c
> > also you need to find some pages for alloc_pgt_page.
>
> At the beginning I did it just as you said, add IDT table and $PF
> handler. Get page fault address and built ident mapping around it when
> reload kernel above 4G. In this case 3 more pages are enough if kernel
~~
typo, it should be 4

> is put to another 512G and cross the boundary of 512G.
> kernel_ident_mapping_init code can be borrowed and need be adjusted a
> little bit. This works as expected, but a GPF reported and reboot to
> BIOS. That's why I made a simple debug patch as I pasted before to
> filter unnecessary interference.
>
> Since in arch/x86/boot/compressed/head_64.S 6 pages are used, 1 for pgd,
> 1 for pud, 4 for pmd, all of them cover 0~4G ident mapping. So I added 4
> more pages as pmd, then 0~8G are covered. Now I hardcoded the output of
> randomization of physical address as 5G, means kernel will be reloaded
> there and decompressed. With these ident mapping for this hard coded
> address should be correctly setted, still that GPF will happen. I
> borrowed a printf.c from arch/x86/boot and made it work for
> boot/compressed, can see in this case it will decompress successfully
> and jump into arch/x86/kernel/head_64.S, then it reboot there during a
> jump.
>
> >
> > Thanks
> >
> > Yinghai