Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755959AbcJNTOf (ORCPT ); Fri, 14 Oct 2016 15:14:35 -0400 Received: from userp1040.oracle.com ([156.151.31.81]:42466 "EHLO userp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755785AbcJNTOc (ORCPT ); Fri, 14 Oct 2016 15:14:32 -0400 Date: Fri, 14 Oct 2016 15:14:03 -0400 From: Konrad Rzeszutek Wilk To: Boris Ostrovsky Cc: david.vrabel@citrix.com, JGross@suse.com, Matt Fleming , xen-devel@lists.xenproject.org, linux-kernel@vger.kernel.org, roger.pau@citrix.com Subject: Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest Message-ID: <20161014191403.GA16777@localhost.localdomain> References: <1476468318-24422-1-git-send-email-boris.ostrovsky@oracle.com> <1476468318-24422-5-git-send-email-boris.ostrovsky@oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1476468318-24422-5-git-send-email-boris.ostrovsky@oracle.com> User-Agent: Mutt/1.6.1 (2016-04-27) X-Source-IP: userv0022.oracle.com [156.151.31.74] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10210 Lines: 375 On Fri, Oct 14, 2016 at 02:05:14PM -0400, Boris Ostrovsky wrote: > Start PVH guest at XEN_ELFNOTE_PHYS32_ENTRY address. Setup hypercall > page, initialize boot_params, enable early page tables. > > Since this stub is executed before kernel entry point we cannot use > variables in .bss which is cleared by kernel. We explicitly place > variables that are initialized here into .data. > > Signed-off-by: Boris Ostrovsky > Signed-off-by: Matt Fleming > --- > arch/x86/xen/Kconfig | 2 +- > arch/x86/xen/Makefile | 1 + > arch/x86/xen/enlighten.c | 87 +++++++++++++++++++++++++++- > arch/x86/xen/xen-pvh.S | 143 +++++++++++++++++++++++++++++++++++++++++++++++ > include/xen/xen.h | 5 ++ > 5 files changed, 236 insertions(+), 2 deletions(-) > create mode 100644 arch/x86/xen/xen-pvh.S > > diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig > index c7b15f3..76b6dbd 100644 > --- a/arch/x86/xen/Kconfig > +++ b/arch/x86/xen/Kconfig > @@ -53,5 +53,5 @@ config XEN_DEBUG_FS > > config XEN_PVH > bool "Support for running as a PVH guest" > - depends on X86_64 && XEN && XEN_PVHVM > + depends on XEN && XEN_PVHVM && ACPI > def_bool n > diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile > index e47e527..cb0164a 100644 > --- a/arch/x86/xen/Makefile > +++ b/arch/x86/xen/Makefile > @@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o > obj-$(CONFIG_XEN_DOM0) += vga.o > obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o > obj-$(CONFIG_XEN_EFI) += efi.o > +obj-$(CONFIG_XEN_PVH) += xen-pvh.o > diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c > index dc4ed0c..d38d568 100644 > --- a/arch/x86/xen/enlighten.c > +++ b/arch/x86/xen/enlighten.c > @@ -45,6 +45,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -121,7 +122,8 @@ > DEFINE_PER_CPU(uint32_t, xen_vcpu_id); > EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); > > -enum xen_domain_type xen_domain_type = XEN_NATIVE; > +enum xen_domain_type xen_domain_type > + __attribute__((section(".data"))) = XEN_NATIVE; > EXPORT_SYMBOL_GPL(xen_domain_type); > > unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; > @@ -176,6 +178,17 @@ struct tls_descs { > */ > static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); > > +#ifdef CONFIG_XEN_PVH > +/* > + * PVH variables. These need to live in data segment since they are > + * initialized before startup_{32|64}, which clear .bss, are invoked. > + */ > +int xen_pvh __attribute__((section(".data"))) = 0; unsigned int? > +struct hvm_start_info pvh_start_info __attribute__((section(".data"))); > +uint pvh_start_info_sz = sizeof(pvh_start_info); unsigned int please. Typedefs in Linux are frowned upon. > +struct boot_params pvh_bootparams __attribute__((section(".data"))); > +#endif > + > static void clamp_max_cpus(void) > { > #ifdef CONFIG_SMP > @@ -1669,6 +1682,78 @@ asmlinkage __visible void __init xen_start_kernel(void) > #endif > } > > +#ifdef CONFIG_XEN_PVH > +static void __init init_pvh_bootparams(void) > +{ > + struct xen_memory_map memmap; > + int i; unsigned int? > + > + memset(&pvh_bootparams, 0, sizeof(pvh_bootparams)); > + > + memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map); > + set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map); > + if (HYPERVISOR_memory_op(XENMEM_memory_map, &memmap)) { > + xen_raw_console_write("XENMEM_memory_map failed\n"); Should we print the error value at least? > + BUG(); > + } > + > + pvh_bootparams.e820_map[memmap.nr_entries].addr = > + ISA_START_ADDRESS; What if nr_entries is 128? Should we double-check for that? > + pvh_bootparams.e820_map[memmap.nr_entries].size = > + ISA_END_ADDRESS - ISA_START_ADDRESS; > + pvh_bootparams.e820_map[memmap.nr_entries++].type = > + E820_RESERVED; > + > + sanitize_e820_map(pvh_bootparams.e820_map, > + ARRAY_SIZE(pvh_bootparams.e820_map), > + &memmap.nr_entries); > + > + pvh_bootparams.e820_entries = memmap.nr_entries; > + for (i = 0; i < pvh_bootparams.e820_entries; i++) > + e820_add_region(pvh_bootparams.e820_map[i].addr, > + pvh_bootparams.e820_map[i].size, > + pvh_bootparams.e820_map[i].type); > + > + pvh_bootparams.hdr.cmd_line_ptr = > + pvh_start_info.cmdline_paddr; > + > + /* The first module is always ramdisk */ Could you add an period at end please? > + if (pvh_start_info.nr_modules) { > + struct hvm_modlist_entry *modaddr = > + __va(pvh_start_info.modlist_paddr); > + pvh_bootparams.hdr.ramdisk_image = modaddr->paddr; > + pvh_bootparams.hdr.ramdisk_size = modaddr->size; > + } > + > + /* > + * See Documentation/x86/boot.txt. > + * > + * Version 2.12 supports Xen entry point but we will use default x86/PC > + * environment (i.e. hardware_subarch 0). > + */ > + pvh_bootparams.hdr.version = 0x212; > + pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */ > +} > + > +/* > + * This routine (and those that it might call) should not use > + * anything that lives in .bss since that segment will be cleared later And maybe one here too? > + */ > +void __init xen_prepare_pvh(void) > +{ > + u32 eax, ecx, edx, msr; msr = 0 ? > + u64 pfn; > + > + xen_pvh = 1; > + > + cpuid(xen_cpuid_base() + 2, &eax, &msr, &ecx, &edx); cpuid_ebx ? And that way you don't have have ecx and edx? > + pfn = __pa(hypercall_page); > + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); > + > + init_pvh_bootparams(); > +} > +#endif > + > void __ref xen_hvm_init_shared_info(void) > { > int cpu; > diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S > new file mode 100644 > index 0000000..58c477b > --- /dev/null > +++ b/arch/x86/xen/xen-pvh.S > @@ -0,0 +1,143 @@ > +/* > + * Copyright C 2016, Oracle and/or its affiliates. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with this program. If not, see . > + */ > + > + .code32 > + .text > +#define _pa(x) ((x) - __START_KERNEL_map) > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > + __HEAD > + .code32 > + > +/* Entry point for PVH guests */ > +ENTRY(pvh_start_xen) You are missing the ENDPROC macro at the end. > + cli > + cld > + > + mov $_pa(gdt), %eax > + lgdt (%eax) > + > + movl $(__BOOT_DS),%eax > + movl %eax,%ds > + movl %eax,%es > + movl %eax,%ss > + > + /* Stash hvm_start_info */ > + mov $_pa(pvh_start_info), %edi > + mov %ebx, %esi Should we derference the first byte or such to check for the magic string? Actually I am not even seeing the check in the C code? > + mov $_pa(pvh_start_info_sz), %ecx > + mov (%ecx), %ecx > + rep > + movsb > + > + movl $_pa(early_stack_end), %eax > + movl %eax, %esp > + > + /* Enable PAE mode */ Periods are nice! Truly! > + movl %cr4, %eax > + orl $X86_CR4_PAE, %eax > + movl %eax, %cr4 > + > +#ifdef CONFIG_X86_64 > + /* Enable Long mode */ :-) I think you know what I am going to say here. > + movl $MSR_EFER, %ecx > + rdmsr > + btsl $_EFER_LME, %eax > + wrmsr > + > + /* Enable pre-constructed page tables */ And here. > + mov $_pa(init_level4_pgt), %eax > + movl %eax, %cr3 > + movl $(X86_CR0_PG | X86_CR0_PE), %eax > + movl %eax, %cr0 > + > + /* Jump to 64-bit mode. */ > + pushl $__KERNEL_CS > + leal _pa(1f), %eax > + pushl %eax > + lret > + > + /* 64-bit entry point */ And right here. > + .code64 > +1: > + call xen_prepare_pvh > + > + /* startup_64 expects boot_params in %rsi */ .. > + mov $_pa(pvh_bootparams), %rsi > + movq $_pa(startup_64), %rax > + jmp *%rax > + > +#else /* CONFIG_X86_64 */ > + > + call setup_pgtable_32 > + > + mov $_pa(initial_page_table), %eax > + movl %eax, %cr3 > + > + movl %cr0, %eax > + orl $(X86_CR0_PG | X86_CR0_PE), %eax > + movl %eax, %cr0 > + > + ljmp $__BOOT_CS,$1f > +1: > + call xen_prepare_pvh > + mov $_pa(pvh_bootparams), %esi > + > + /* startup_32 doesn't expect paging and PAE to be on */ Should 'startup_32' be documented with this? > + ljmp $__BOOT_CS,$_pa(2f) > +2: > + movl %cr0, %eax > + andl $~X86_CR0_PG, %eax > + movl %eax, %cr0 > + movl %cr4, %eax > + andl $~X86_CR4_PAE, %eax > + movl %eax, %cr4 > + > + ljmp $0x10, $_pa(startup_32) > +#endif > + > + .data > +gdt: > + .word gdt_end - gdt > + .long _pa(gdt) > + .word 0 > + .quad 0x0000000000000000 /* NULL descriptor */ > +#ifdef CONFIG_X86_64 > + .quad 0x00af9a000000ffff /* __KERNEL_CS */ > +#else > + .quad 0x00cf9a000000ffff /* __KERNEL_CS */ > +#endif > + .quad 0x00cf92000000ffff /* __KERNEL_DS */ > +gdt_end: > + > + .bss > + .balign 4 > +early_stack: > + .fill 16, 1, 0 > +early_stack_end: > + > + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, > + _ASM_PTR (pvh_start_xen - __START_KERNEL_map)) > diff --git a/include/xen/xen.h b/include/xen/xen.h > index d0f9684..ed3f841 100644 > --- a/include/xen/xen.h > +++ b/include/xen/xen.h > @@ -29,6 +29,11 @@ enum xen_domain_type { > #define xen_initial_domain() (0) > #endif /* CONFIG_XEN_DOM0 */ > > +#ifdef CONFIG_XEN_PVH > +extern int xen_pvh; > +#define xen_pvh_domain() (xen_hvm_domain() && xen_pvh) > +#else > #define xen_pvh_domain() (0) > +#endif > > #endif /* _XEN_XEN_H */ > -- > 1.8.3.1 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > https://lists.xen.org/xen-devel