2020-02-11 13:58:39

by Joerg Roedel

[permalink] [raw]
Subject: [PATCH 08/62] x86/boot/compressed/64: Add IDT Infrastructure

From: Joerg Roedel <[email protected]>

Add code needed to setup an IDT in the early pre-decompression
boot-code. The IDT is loaded first in startup_64, which is after
EfiExitBootServices() has been called, and later reloaded when the
kernel image has been relocated to the end of the decompression area.

This allows to setup different IDT handlers before and after the
relocation.

Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/boot/compressed/Makefile | 1 +
arch/x86/boot/compressed/head_64.S | 34 +++++++++++
arch/x86/boot/compressed/idt_64.c | 43 +++++++++++++
arch/x86/boot/compressed/idt_handlers_64.S | 71 ++++++++++++++++++++++
arch/x86/boot/compressed/misc.h | 5 ++
arch/x86/include/asm/desc_defs.h | 3 +
6 files changed, 157 insertions(+)
create mode 100644 arch/x86/boot/compressed/idt_64.c
create mode 100644 arch/x86/boot/compressed/idt_handlers_64.S

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e186cc0b628d..54d63526e856 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -81,6 +81,7 @@ vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o
+ vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
vmlinux-objs-y += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o
endif
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1f1f6c8139b3..d27a9ce1bcb0 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -33,6 +33,7 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
+#include <asm/desc_defs.h>
#include "pgtable.h"

/*
@@ -358,6 +359,10 @@ SYM_CODE_START(startup_64)
movq %rax, gdt64+2(%rip)
lgdt gdt64(%rip)

+ pushq %rsi
+ call load_stage1_idt
+ popq %rsi
+
/*
* paging_prepare() sets up the trampoline and checks if we need to
* enable 5-level paging.
@@ -465,6 +470,16 @@ SYM_FUNC_END_ALIAS(efi_stub_entry)
.text
SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)

+/*
+ * Reload GDT after relocation - The GDT at the non-relocated position
+ * might be overwritten soon by the in-place decompression, so reload
+ * GDT at the relocated address. The GDT is referenced by exception
+ * handling and needs to be set up correctly.
+ */
+ leaq gdt(%rip), %rax
+ movq %rax, gdt64+2(%rip)
+ lgdt gdt64(%rip)
+
/*
* Clear BSS (stack is currently empty)
*/
@@ -475,6 +490,13 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
shrq $3, %rcx
rep stosq

+/*
+ * Load stage2 IDT
+ */
+ pushq %rsi
+ call load_stage2_idt
+ popq %rsi
+
/*
* Do the extraction, and jump to the new kernel..
*/
@@ -628,6 +650,18 @@ SYM_DATA_START_LOCAL(gdt)
.quad 0x0000000000000000 /* TS continued */
SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)

+SYM_DATA_START(boot_idt_desc)
+ .word boot_idt_end - boot_idt
+ .quad 0
+SYM_DATA_END(boot_idt_desc)
+ .balign 8
+SYM_DATA_START(boot_idt)
+ .rept BOOT_IDT_ENTRIES
+ .quad 0
+ .quad 0
+ .endr
+SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
+
#ifdef CONFIG_EFI_MIXED
SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0)
SYM_DATA(efi_is64, .byte 1)
diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c
new file mode 100644
index 000000000000..46ecea671b90
--- /dev/null
+++ b/arch/x86/boot/compressed/idt_64.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <asm/trap_defs.h>
+#include <asm/segment.h>
+#include "misc.h"
+
+static void set_idt_entry(int vector, void (*handler)(void))
+{
+ unsigned long address = (unsigned long)handler;
+ gate_desc entry;
+
+ memset(&entry, 0, sizeof(entry));
+
+ entry.offset_low = (u16)(address & 0xffff);
+ entry.segment = __KERNEL_CS;
+ entry.bits.type = GATE_TRAP;
+ entry.bits.p = 1;
+ entry.offset_middle = (u16)((address >> 16) & 0xffff);
+ entry.offset_high = (u32)(address >> 32);
+
+ memcpy(&boot_idt[vector], &entry, sizeof(entry));
+}
+
+/* Have this here so we don't need to include <asm/desc.h> */
+static void load_boot_idt(const struct desc_ptr *dtr)
+{
+ asm volatile("lidt %0"::"m" (*dtr));
+}
+
+/* Setup IDT before kernel jumping to .Lrelocated */
+void load_stage1_idt(void)
+{
+ boot_idt_desc.address = (unsigned long)boot_idt;
+
+ load_boot_idt(&boot_idt_desc);
+}
+
+/* Setup IDT after kernel jumping to .Lrelocated */
+void load_stage2_idt(void)
+{
+ boot_idt_desc.address = (unsigned long)boot_idt;
+
+ load_boot_idt(&boot_idt_desc);
+}
diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S
new file mode 100644
index 000000000000..0b2b6cf747d2
--- /dev/null
+++ b/arch/x86/boot/compressed/idt_handlers_64.S
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Early IDT handler entry points
+ *
+ * Copyright (C) 2019 SUSE
+ *
+ * Author: Joerg Roedel <[email protected]>
+ */
+
+#include <asm/segment.h>
+
+.macro EXCEPTION_HANDLER name function error_code=0
+SYM_FUNC_START(\name)
+
+ /* Build pt_regs */
+ .if \error_code == 0
+ pushq $0
+ .endif
+
+ pushq %rdi
+ pushq %rsi
+ pushq %rdx
+ pushq %rcx
+ pushq %rax
+ pushq %r8
+ pushq %r9
+ pushq %r10
+ pushq %r11
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+ /* Call handler with pt_regs */
+ movq %rsp, %rdi
+ call \function
+
+ /* Restore regs */
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
+
+ /* Remove error code and return */
+ addq $8, %rsp
+
+ /*
+ * Make sure we return to __KERNEL_CS - the CS selector on
+ * the IRET frame might still be from an old BIOS GDT
+ */
+ movq $__KERNEL_CS, 8(%rsp)
+
+ iretq
+SYM_FUNC_END(\name)
+ .endm
+
+ .text
+ .code64
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 726e264410ff..062ae3ae6930 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -23,6 +23,7 @@
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
+#include <asm/desc_defs.h>

#define BOOT_CTYPE_H
#include <linux/acpi.h>
@@ -133,4 +134,8 @@ int count_immovable_mem_regions(void);
static inline int count_immovable_mem_regions(void) { return 0; }
#endif

+/* idt_64.c */
+extern gate_desc boot_idt[BOOT_IDT_ENTRIES];
+extern struct desc_ptr boot_idt_desc;
+
#endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index a91f3b6e4f2a..5621fb3f2d1a 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -109,6 +109,9 @@ struct desc_ptr {

#endif /* !__ASSEMBLY__ */

+/* Boot IDT definitions */
+#define BOOT_IDT_ENTRIES 32
+
/* Access rights as returned by LAR */
#define AR_TYPE_RODATA (0 * (1 << 9))
#define AR_TYPE_RWDATA (1 * (1 << 9))
--
2.17.1


2020-02-11 22:19:30

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH 08/62] x86/boot/compressed/64: Add IDT Infrastructure

On Tue, Feb 11, 2020 at 5:53 AM Joerg Roedel <[email protected]> wrote:
>
> From: Joerg Roedel <[email protected]>
>
> Add code needed to setup an IDT in the early pre-decompression
> boot-code. The IDT is loaded first in startup_64, which is after
> EfiExitBootServices() has been called, and later reloaded when the
> kernel image has been relocated to the end of the decompression area.
>
> This allows to setup different IDT handlers before and after the
> relocation.
>

> diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c
> new file mode 100644
> index 000000000000..46ecea671b90
> --- /dev/null
> +++ b/arch/x86/boot/compressed/idt_64.c
> @@ -0,0 +1,43 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +#include <asm/trap_defs.h>
> +#include <asm/segment.h>
> +#include "misc.h"
> +
> +static void set_idt_entry(int vector, void (*handler)(void))
> +{
> + unsigned long address = (unsigned long)handler;
> + gate_desc entry;
> +
> + memset(&entry, 0, sizeof(entry));
> +
> + entry.offset_low = (u16)(address & 0xffff);
> + entry.segment = __KERNEL_CS;
> + entry.bits.type = GATE_TRAP;

^^^

I realize we're not running a real kernel here, but GATE_TRAP is
madness. Please use GATE_INTERRUPT.

> + entry.bits.p = 1;
> + entry.offset_middle = (u16)((address >> 16) & 0xffff);
> + entry.offset_high = (u32)(address >> 32);
> +
> + memcpy(&boot_idt[vector], &entry, sizeof(entry));
> +}
> +
> +/* Have this here so we don't need to include <asm/desc.h> */
> +static void load_boot_idt(const struct desc_ptr *dtr)
> +{
> + asm volatile("lidt %0"::"m" (*dtr));
> +}
> +
> +/* Setup IDT before kernel jumping to .Lrelocated */
> +void load_stage1_idt(void)
> +{
> + boot_idt_desc.address = (unsigned long)boot_idt;
> +
> + load_boot_idt(&boot_idt_desc);
> +}
> +
> +/* Setup IDT after kernel jumping to .Lrelocated */
> +void load_stage2_idt(void)
> +{
> + boot_idt_desc.address = (unsigned long)boot_idt;
> +
> + load_boot_idt(&boot_idt_desc);
> +}
> diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S
> new file mode 100644
> index 000000000000..0b2b6cf747d2
> --- /dev/null
> +++ b/arch/x86/boot/compressed/idt_handlers_64.S
> @@ -0,0 +1,71 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Early IDT handler entry points
> + *
> + * Copyright (C) 2019 SUSE
> + *
> + * Author: Joerg Roedel <[email protected]>
> + */
> +
> +#include <asm/segment.h>
> +
> +.macro EXCEPTION_HANDLER name function error_code=0
> +SYM_FUNC_START(\name)
> +
> + /* Build pt_regs */
> + .if \error_code == 0
> + pushq $0
> + .endif

cld

> +
> + pushq %rdi
> + pushq %rsi
> + pushq %rdx
> + pushq %rcx
> + pushq %rax
> + pushq %r8
> + pushq %r9
> + pushq %r10
> + pushq %r11
> + pushq %rbx
> + pushq %rbp
> + pushq %r12
> + pushq %r13
> + pushq %r14
> + pushq %r15
> +
> + /* Call handler with pt_regs */
> + movq %rsp, %rdi
> + call \function
> +
> + /* Restore regs */
> + popq %r15
> + popq %r14
> + popq %r13
> + popq %r12
> + popq %rbp
> + popq %rbx
> + popq %r11
> + popq %r10
> + popq %r9
> + popq %r8
> + popq %rax
> + popq %rcx
> + popq %rdx
> + popq %rsi
> + popq %rdi

if error_code?

> +
> + /* Remove error code and return */
> + addq $8, %rsp
> +
> + /*
> + * Make sure we return to __KERNEL_CS - the CS selector on
> + * the IRET frame might still be from an old BIOS GDT
> + */
> + movq $__KERNEL_CS, 8(%rsp)
> +

If this actually happens, you have a major bug. Please sanitize all
the segment registers after installing the GDT rather than hacking
around it here.

2020-02-12 11:21:30

by Joerg Roedel

[permalink] [raw]
Subject: Re: [PATCH 08/62] x86/boot/compressed/64: Add IDT Infrastructure

Hi Andy,

thanks a lot for your valuable reviews!

On Tue, Feb 11, 2020 at 02:18:52PM -0800, Andy Lutomirski wrote:
> On Tue, Feb 11, 2020 at 5:53 AM Joerg Roedel <[email protected]> wrote:
> > + entry.offset_low = (u16)(address & 0xffff);
> > + entry.segment = __KERNEL_CS;
> > + entry.bits.type = GATE_TRAP;
>
> ^^^
>
> I realize we're not running a real kernel here, but GATE_TRAP is
> madness. Please use GATE_INTERRUPT.

Changed that.

> > + /* Build pt_regs */
> > + .if \error_code == 0
> > + pushq $0
> > + .endif
>
> cld

Added.

> > + popq %rdi
>
> if error_code?

The code above pushes a $0 for exceptions without an error code, so it
needs to be removed unconditionally.

> > +
> > + /* Remove error code and return */
> > + addq $8, %rsp
> > +
> > + /*
> > + * Make sure we return to __KERNEL_CS - the CS selector on
> > + * the IRET frame might still be from an old BIOS GDT
> > + */
> > + movq $__KERNEL_CS, 8(%rsp)
> > +
>
> If this actually happens, you have a major bug. Please sanitize all
> the segment registers after installing the GDT rather than hacking
> around it here.

Okay, will change that. I thought I could safe some instructions in the
head_64.S code, but you are right that its better to setup a defined
environment first.


Thanks,

Joerg

2020-02-14 19:42:46

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 08/62] x86/boot/compressed/64: Add IDT Infrastructure

Joerg Roedel <[email protected]> writes:
> + addq $8, %rsp
> +
> + /*
> + * Make sure we return to __KERNEL_CS - the CS selector on
> + * the IRET frame might still be from an old BIOS GDT
> + */
> + movq $__KERNEL_CS, 8(%rsp)

This doesn't make sense. Either it's running on the correct CS
before the exception or not. Likely there's some other problem
here that you patched over with this hack.

-Andi

2020-02-15 12:33:17

by Joerg Roedel

[permalink] [raw]
Subject: Re: [PATCH 08/62] x86/boot/compressed/64: Add IDT Infrastructure

On Fri, Feb 14, 2020 at 11:40:36AM -0800, Andi Kleen wrote:
> Joerg Roedel <[email protected]> writes:
> > + addq $8, %rsp
> > +
> > + /*
> > + * Make sure we return to __KERNEL_CS - the CS selector on
> > + * the IRET frame might still be from an old BIOS GDT
> > + */
> > + movq $__KERNEL_CS, 8(%rsp)
>
> This doesn't make sense. Either it's running on the correct CS
> before the exception or not. Likely there's some other problem
> here that you patched over with this hack.

It is actually a well-known situation and not some other problem. The
boot-code loaded a new GDT and IDT, but did not reload CS with a far
jump/ret/call. The CS value loaded is undefined and comes from the UEFI
BIOS. When an exception is raised, this old CS value is stored in the
IRET frame, and when IRET is executed the processor loads an undefined
CS value, which causes a triple fault with the current IDT setup.

The hack in this patch just fixes the IRET frame up so that it will
return to the correct CS. The reason for this hack was actually to safe
some instructions in the boot-path, because the space is limited there
between the defined offsets of the various entry points.

I removed this hack meanwhile and added a separate function which
reloads CS, DS, SS and ES and which is called from the boot-path, so
that there is no problem with the offsets.

Regards,

Joerg