This patch series fixes a regression introduced in 4.13-rc1: A Xen
HVM guest with KASLR enabled wouldn't boot any longer due to the usage
of __va() before kernel_randomize_memory() was called.
Juergen Gross (3):
x86: provide an init_mem_mapping hypervisor hook
xen: split up xen_hvm_init_shared_info()
xen: fix hvm guest with kaslr enabled
arch/x86/include/asm/hypervisor.h | 10 +++++++
arch/x86/mm/init.c | 3 ++
arch/x86/xen/enlighten_hvm.c | 59 ++++++++++++++++++++++++---------------
3 files changed, 50 insertions(+), 22 deletions(-)
--
2.12.3
A Xen HVM guest running with KASLR enabled will die rather soon today
due to the shared info page mapping is using va() too early. This was
introduced by commit a5d5f328b0e2baa5ee7c119fd66324eb79eeeb66 ("xen:
allocate page for shared info page from low memory").
In order to fix this use early_memremap() to get a temporary virtual
address for shared info until va() can be used safely.
Signed-off-by: Juergen Gross <[email protected]>
---
arch/x86/xen/enlighten_hvm.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index d23531f5f17e..de503c225ae1 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -12,6 +12,7 @@
#include <asm/setup.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>
+#include <asm/early_ioremap.h>
#include <asm/xen/cpuid.h>
#include <asm/xen/hypervisor.h>
@@ -21,6 +22,8 @@
#include "mmu.h"
#include "smp.h"
+static unsigned long shared_info_pfn;
+
void xen_hvm_init_shared_info(void)
{
struct xen_add_to_physmap xatp;
@@ -28,7 +31,7 @@ void xen_hvm_init_shared_info(void)
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
- xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info);
+ xatp.gpfn = shared_info_pfn;
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();
}
@@ -51,8 +54,16 @@ static void __init reserve_shared_info(void)
pa += PAGE_SIZE)
;
+ shared_info_pfn = PHYS_PFN(pa);
+
memblock_reserve(pa, PAGE_SIZE);
- HYPERVISOR_shared_info = __va(pa);
+ HYPERVISOR_shared_info = early_memremap(pa, PAGE_SIZE);
+}
+
+static void __init xen_hvm_init_mem_mapping(void)
+{
+ early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE);
+ HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn));
}
static void __init init_hvm_pv_info(void)
@@ -221,5 +232,6 @@ const struct hypervisor_x86 x86_hyper_xen_hvm = {
.init_platform = xen_hvm_guest_init,
.pin_vcpu = xen_pin_vcpu,
.x2apic_available = xen_x2apic_para_available,
+ .init_mem_mapping = xen_hvm_init_mem_mapping,
};
EXPORT_SYMBOL(x86_hyper_xen_hvm);
--
2.12.3
Provide a hook in hypervisor_x86 called after setting up initial
memory mapping.
This is needed e.g. by Xen HVM guests to map the hypervisor shared
info page.
Signed-off-by: Juergen Gross <[email protected]>
---
arch/x86/include/asm/hypervisor.h | 10 ++++++++++
arch/x86/mm/init.c | 3 +++
2 files changed, 13 insertions(+)
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 21126155a739..60ebda5e5101 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -43,6 +43,9 @@ struct hypervisor_x86 {
/* pin current vcpu to specified physical cpu (run rarely) */
void (*pin_vcpu)(int);
+
+ /* called during init_mem_mapping() to setup early mappings. */
+ void (*init_mem_mapping)(void);
};
extern const struct hypervisor_x86 *x86_hyper;
@@ -57,8 +60,15 @@ extern const struct hypervisor_x86 x86_hyper_kvm;
extern void init_hypervisor_platform(void);
extern bool hypervisor_x2apic_available(void);
extern void hypervisor_pin_vcpu(int cpu);
+
+static inline void hypervisor_init_mem_mapping(void)
+{
+ if (x86_hyper->init_mem_mapping)
+ x86_hyper->init_mem_mapping();
+}
#else
static inline void init_hypervisor_platform(void) { }
static inline bool hypervisor_x2apic_available(void) { return false; }
+static inline void hypervisor_init_mem_mapping(void) { }
#endif /* CONFIG_HYPERVISOR_GUEST */
#endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 673541eb3b3f..bf3f1065d6ad 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,6 +18,7 @@
#include <asm/dma.h> /* for MAX_DMA_PFN */
#include <asm/microcode.h>
#include <asm/kaslr.h>
+#include <asm/hypervisor.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -636,6 +637,8 @@ void __init init_mem_mapping(void)
load_cr3(swapper_pg_dir);
__flush_tlb_all();
+ hypervisor_init_mem_mapping();
+
early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
}
--
2.12.3
Instead of calling xen_hvm_init_shared_info() on boot and resume split
it up into a boot time function searching for the pfn to use and a
mapping function doing the hypervisor mapping call.
Signed-off-by: Juergen Gross <[email protected]>
---
arch/x86/xen/enlighten_hvm.c | 45 +++++++++++++++++++++++---------------------
1 file changed, 24 insertions(+), 21 deletions(-)
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 87d791356ea9..d23531f5f17e 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -21,29 +21,9 @@
#include "mmu.h"
#include "smp.h"
-void __ref xen_hvm_init_shared_info(void)
+void xen_hvm_init_shared_info(void)
{
struct xen_add_to_physmap xatp;
- u64 pa;
-
- if (HYPERVISOR_shared_info == &xen_dummy_shared_info) {
- /*
- * Search for a free page starting at 4kB physical address.
- * Low memory is preferred to avoid an EPT large page split up
- * by the mapping.
- * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
- * the BIOS used for HVM guests is well behaved and won't
- * clobber memory other than the first 4kB.
- */
- for (pa = PAGE_SIZE;
- !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
- memblock_is_reserved(pa);
- pa += PAGE_SIZE)
- ;
-
- memblock_reserve(pa, PAGE_SIZE);
- HYPERVISOR_shared_info = __va(pa);
- }
xatp.domid = DOMID_SELF;
xatp.idx = 0;
@@ -53,6 +33,28 @@ void __ref xen_hvm_init_shared_info(void)
BUG();
}
+static void __init reserve_shared_info(void)
+{
+ u64 pa;
+
+ /*
+ * Search for a free page starting at 4kB physical address.
+ * Low memory is preferred to avoid an EPT large page split up
+ * by the mapping.
+ * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
+ * the BIOS used for HVM guests is well behaved and won't
+ * clobber memory other than the first 4kB.
+ */
+ for (pa = PAGE_SIZE;
+ !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
+ memblock_is_reserved(pa);
+ pa += PAGE_SIZE)
+ ;
+
+ memblock_reserve(pa, PAGE_SIZE);
+ HYPERVISOR_shared_info = __va(pa);
+}
+
static void __init init_hvm_pv_info(void)
{
int major, minor;
@@ -153,6 +155,7 @@ static void __init xen_hvm_guest_init(void)
init_hvm_pv_info();
+ reserve_shared_info();
xen_hvm_init_shared_info();
/*
--
2.12.3
On 07/25/2017 05:50 AM, Juergen Gross wrote:
> Provide a hook in hypervisor_x86 called after setting up initial
> memory mapping.
>
> This is needed e.g. by Xen HVM guests to map the hypervisor shared
> info page.
This is not necessarily a problem specific to guests so I wonder whether
the hook should be part of x86_platform_ops or some other x86 structure.
-boris
On 07/25/2017 05:50 AM, Juergen Gross wrote:
>
> -void __ref xen_hvm_init_shared_info(void)
> +void xen_hvm_init_shared_info(void)
Why are you dropping __ref?
-boris
On 25/07/17 16:19, Boris Ostrovsky wrote:
> On 07/25/2017 05:50 AM, Juergen Gross wrote:
>>
>> -void __ref xen_hvm_init_shared_info(void)
>> +void xen_hvm_init_shared_info(void)
>
> Why are you dropping __ref?
The function no longer calls any __init function.
Juergen
On 25/07/17 16:09, Boris Ostrovsky wrote:
> On 07/25/2017 05:50 AM, Juergen Gross wrote:
>> Provide a hook in hypervisor_x86 called after setting up initial
>> memory mapping.
>>
>> This is needed e.g. by Xen HVM guests to map the hypervisor shared
>> info page.
>
> This is not necessarily a problem specific to guests so I wonder whether
> the hook should be part of x86_platform_ops or some other x86 structure.
I'm fine with any solution. I've chosen the hypervisor struct as right
now there seems to be a need for HVM Xen guests only.
Juergen
FYI, we noticed the following commit:
commit: d5c5dc4befe6cbec1b4d2c961bfa1075d51de4f6 ("x86: provide an init_mem_mapping hypervisor hook")
url: https://github.com/0day-ci/linux/commits/Juergen-Gross/fix-xen-hvm-guest-with-kaslr-enabled/20170727-083556
base: https://git.kernel.org/cgit/linux/kernel/git/xen/tip.git linux-next
in testcase: boot
on test machine: qemu-system-x86_64 -m 420M
caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
+-------------------------------+------------+------------+
| | 96edd61dcf | d5c5dc4bef |
+-------------------------------+------------+------------+
| boot_successes | 15 | 4 |
| boot_failures | 0 | 4 |
| PANIC:early_exception | 0 | 4 |
| BUG:kernel_hang_in_boot_stage | 0 | 4 |
+-------------------------------+------------+------------+
[ 0.000000] Base memory trampoline at [ffff880000099000] 99000 size 24576
[ 0.000000] BRK [0x02515000, 0x02515fff] PGTABLE
[ 0.000000] BRK [0x02516000, 0x02516fff] PGTABLE
[ 0.000000] BRK [0x02517000, 0x02517fff] PGTABLE
[ 0.000000] BRK [0x02518000, 0x02518fff] PGTABLE
PANIC: early exception 0x0e IP 10:ffffffff821003d1 error 0 cr2 0x28
[ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.13.0-rc1-00416-gd5c5dc4 #1
[ 0.000000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-20161025_171302-gandalf 04/01/2014
[ 0.000000] task: ffffffff81e10480 task.stack: ffffffff81e00000
[ 0.000000] RIP: 0010:init_mem_mapping+0x257/0x26d
[ 0.000000] RSP: 0000:ffffffff81e03e20 EFLAGS: 00000082 ORIG_RAX: 0000000000000000
[ 0.000000] RAX: 0000000000000000 RBX: 0000000000100000 RCX: ffffffff81e03de0
[ 0.000000] RDX: 0000000000000630 RSI: 0000000000000400 RDI: 0000000000000046
[ 0.000000] RBP: ffffffff81e03e48 R08: 0000000000000000 R09: 0000000000000001
[ 0.000000] R10: ffffffff81e03e20 R11: 383135323078305b R12: 0000000018800000
[ 0.000000] R13: 000000001a3e0000 R14: 0000000018700000 R15: 0000000020000000
[ 0.000000] FS: 0000000000000000(0000) GS:ffffffff820c2000(0000) knlGS:0000000000000000
[ 0.000000] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 0.000000] CR2: 0000000000000028 CR3: 0000000001e09000 CR4: 00000000000006b0
[ 0.000000] Call Trace:
[ 0.000000] setup_arch+0x662/0xca3
[ 0.000000] ? printk+0x43/0x4b
[ 0.000000] ? early_idt_handler_array+0x120/0x120
[ 0.000000] start_kernel+0x64/0x405
[ 0.000000] ? early_idt_handler_array+0x120/0x120
[ 0.000000] x86_64_start_reservations+0x2a/0x2c
[ 0.000000] x86_64_start_kernel+0x13e/0x14d
[ 0.000000] secondary_startup_64+0x9f/0x9f
[ 0.000000] Code: ff 14 25 18 cc d8 81 48 8b 05 e0 d2 f9 ff 0f ba e0 0d 73 09 ff 14 25 40 cc d8 81 eb 07 ff 14 25 38 cc d8 81 48 8b 05 9f 09 26 00 <48> 8b 40 28 48 85 c0 74 02 ff d0 5b 41 5c 41 5d 41 5e 41 5f 5d
Elapsed time: 510
BUG: kernel hang in boot stage
initrds=(
/osimage/yocto/yocto-tiny-i386-2016-04-22.cgz
/lkp/scheduled/vm-lkp-os-yocto-ia32-1/boot-1-yocto-tiny-i386-2016-04-22.cgz-d5c5dc4befe6cbec1b4d2c961bfa1075d51de4f6-20170727-34117-k9lx8y-0.cgz
/lkp/lkp/lkp-i386.cgz
To reproduce:
git clone https://github.com/01org/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k <bzImage> job-script # job-script is attached in this email
Thanks,
Kernel Test Robot