Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932712AbcDYNXY (ORCPT ); Mon, 25 Apr 2016 09:23:24 -0400 Received: from aserp1040.oracle.com ([141.146.126.69]:48897 "EHLO aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932174AbcDYNXV (ORCPT ); Mon, 25 Apr 2016 09:23:21 -0400 Subject: Re: [PATCH v2 02/11] xen/hvmlite: Bootstrap HVMlite guest To: Borislav Petkov , "H. Peter Anvin" , Ingo Molnar , Thomas Gleixner References: <1454341137-14110-1-git-send-email-boris.ostrovsky@oracle.com> <1454341137-14110-3-git-send-email-boris.ostrovsky@oracle.com> <20160424202314.GA3973@pd.tnic> Cc: david.vrabel@citrix.com, konrad.wilk@oracle.com, xen-devel@lists.xenproject.org, linux-kernel@vger.kernel.org, roger.pau@citrix.com, mcgrof@suse.com From: Boris Ostrovsky Message-ID: <571E19D7.1080301@oracle.com> Date: Mon, 25 Apr 2016 09:21:27 -0400 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.1.0 MIME-Version: 1.0 In-Reply-To: <20160424202314.GA3973@pd.tnic> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit X-Source-IP: aserv0021.oracle.com [141.146.126.233] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 11177 Lines: 366 On 04/24/2016 04:23 PM, Borislav Petkov wrote: > On Mon, Feb 01, 2016 at 10:38:48AM -0500, Boris Ostrovsky wrote: >> Start HVMlite guest at XEN_ELFNOTE_PHYS32_ENTRY address. Setup hypercall >> page, initialize boot_params, enable early page tables. >> >> Since this stub is executed before kernel entry point we cannot use >> variables in .bss which is cleared by kernel. We explicitly place >> variables that are initialized here into .data. >> >> Signed-off-by: Boris Ostrovsky >> --- >> arch/x86/xen/Makefile | 1 + >> arch/x86/xen/enlighten.c | 86 +++++++++++++++++++++- >> arch/x86/xen/xen-hvmlite.S | 175 ++++++++++++++++++++++++++++++++++++++++++++ >> include/xen/xen.h | 6 ++ >> 4 files changed, 267 insertions(+), 1 deletions(-) >> create mode 100644 arch/x86/xen/xen-hvmlite.S >> >> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile >> index e47e527..1d913d7 100644 >> --- a/arch/x86/xen/Makefile >> +++ b/arch/x86/xen/Makefile >> @@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o >> obj-$(CONFIG_XEN_DOM0) += vga.o >> obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o >> obj-$(CONFIG_XEN_EFI) += efi.o >> +obj-$(CONFIG_XEN_PVHVM) += xen-hvmlite.o >> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c >> index 5774800..5f05fa2 100644 >> --- a/arch/x86/xen/enlighten.c >> +++ b/arch/x86/xen/enlighten.c >> @@ -118,7 +118,8 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); >> */ >> DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); >> >> -enum xen_domain_type xen_domain_type = XEN_NATIVE; >> +enum xen_domain_type xen_domain_type >> + __attribute__((section(".data"))) = XEN_NATIVE; >> EXPORT_SYMBOL_GPL(xen_domain_type); >> >> unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; >> @@ -171,6 +172,17 @@ struct tls_descs { >> */ >> static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); >> >> +#ifdef CONFIG_XEN_PVHVM >> +/* >> + * HVMlite variables. These need to live in data segment since they are >> + * initialized before startup_{32|64}, which clear .bss, are invoked. > So this jumps into startup_32/64 and I don't think we have talked about > it yet, have we? I'm not aware of any threads about it. Are we fine with > it, are we not? > > I think we need to agree on API where xen guests should jump into > arch/x86/ and adhere to it. Otherwise, we will break xen again if we change > stuff in x86 and we do like to change stuff in x86 all the time. > > Adding tip guys and leaving in the rest for reference. I was following Documentation/x86/boot.txt (plus comments in code preceding those two routines) which I considered to be the API. We are supposed to come to startup_32 with paging off and %esi pointing to boot_params. For 64-bit paging is on, %rsi points to zero-page. Plus certain requirements on segment registers and interrupt being disabled. (I just noticed that for 32-bit %ebp, %edi and %ebx are supposed to be zero, so I'll need to do that) -boris > > ... > > > >> + */ >> +int xen_hvmlite __attribute__((section(".data"))) = 0; >> +struct hvm_start_info hvmlite_start_info __attribute__((section(".data"))); >> +uint hvmlite_start_info_sz = sizeof(hvmlite_start_info); >> +struct boot_params xen_hvmlite_boot_params __attribute__((section(".data"))); >> +#endif >> + >> static void clamp_max_cpus(void) >> { >> #ifdef CONFIG_SMP >> @@ -1731,6 +1743,78 @@ asmlinkage __visible void __init xen_start_kernel(void) >> #endif >> } >> >> +#ifdef CONFIG_XEN_PVHVM >> +static void __init hvmlite_bootparams(void) >> +{ >> + struct xen_memory_map memmap; >> + int i; >> + >> + memset(&xen_hvmlite_boot_params, 0, sizeof(xen_hvmlite_boot_params)); >> + >> + memmap.nr_entries = ARRAY_SIZE(xen_hvmlite_boot_params.e820_map); >> + set_xen_guest_handle(memmap.buffer, xen_hvmlite_boot_params.e820_map); >> + if (HYPERVISOR_memory_op(XENMEM_memory_map, &memmap)) { >> + xen_raw_console_write("XENMEM_memory_map failed\n"); >> + BUG(); >> + } >> + >> + xen_hvmlite_boot_params.e820_map[memmap.nr_entries].addr = >> + ISA_START_ADDRESS; >> + xen_hvmlite_boot_params.e820_map[memmap.nr_entries].size = >> + ISA_END_ADDRESS - ISA_START_ADDRESS; >> + xen_hvmlite_boot_params.e820_map[memmap.nr_entries++].type = >> + E820_RESERVED; >> + >> + sanitize_e820_map(xen_hvmlite_boot_params.e820_map, >> + ARRAY_SIZE(xen_hvmlite_boot_params.e820_map), >> + &memmap.nr_entries); >> + >> + xen_hvmlite_boot_params.e820_entries = memmap.nr_entries; >> + for (i = 0; i < xen_hvmlite_boot_params.e820_entries; i++) >> + e820_add_region(xen_hvmlite_boot_params.e820_map[i].addr, >> + xen_hvmlite_boot_params.e820_map[i].size, >> + xen_hvmlite_boot_params.e820_map[i].type); >> + >> + xen_hvmlite_boot_params.hdr.cmd_line_ptr = >> + hvmlite_start_info.cmdline_paddr; >> + >> + /* The first module is always ramdisk */ >> + if (hvmlite_start_info.nr_modules) { >> + struct hvm_modlist_entry *modaddr = >> + __va(hvmlite_start_info.modlist_paddr); >> + xen_hvmlite_boot_params.hdr.ramdisk_image = modaddr->paddr; >> + xen_hvmlite_boot_params.hdr.ramdisk_size = modaddr->size; >> + } >> + >> + /* >> + * See Documentation/x86/boot.txt. >> + * >> + * Version 2.12 supports Xen entry point but we will use default x86/PC >> + * environment (i.e. hardware_subarch 0). >> + */ >> + xen_hvmlite_boot_params.hdr.version = 0x212; >> + xen_hvmlite_boot_params.hdr.type_of_loader = 9; /* Xen loader */ >> +} >> + >> +/* >> + * This routine (and those that it might call) should not use >> + * anything that lives in .bss since that segment will be cleared later >> + */ >> +void __init xen_prepare_hvmlite(void) >> +{ >> + u32 eax, ecx, edx, msr; >> + u64 pfn; >> + >> + xen_hvmlite = 1; >> + >> + cpuid(xen_cpuid_base() + 2, &eax, &msr, &ecx, &edx); >> + pfn = __pa(hypercall_page); >> + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); >> + >> + hvmlite_bootparams(); >> +} >> +#endif >> + >> void __ref xen_hvm_init_shared_info(void) >> { >> int cpu; >> diff --git a/arch/x86/xen/xen-hvmlite.S b/arch/x86/xen/xen-hvmlite.S >> new file mode 100644 >> index 0000000..fc7c08c >> --- /dev/null >> +++ b/arch/x86/xen/xen-hvmlite.S >> @@ -0,0 +1,175 @@ >> +/* >> + * Copyright C 2016, Oracle and/or its affiliates. All rights reserved. >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + * >> + * You should have received a copy of the GNU General Public License along >> + * with this program. If not, see . >> + */ >> + >> + .code32 >> + .text >> +#define _pa(x) ((x) - __START_KERNEL_map) >> + >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> + >> + __HEAD >> + .code32 >> + >> +/* Entry point for HVMlite guests */ >> +ENTRY(hvmlite_start_xen) >> + cli >> + cld >> + >> + mov $_pa(gdt), %eax >> + lgdt (%eax) >> + >> + movl $(__BOOT_DS),%eax >> + movl %eax,%ds >> + movl %eax,%es >> + movl %eax,%ss >> + >> + /* Stash hvm_start_info */ >> + mov $_pa(hvmlite_start_info), %edi >> + mov %ebx, %esi >> + mov $_pa(hvmlite_start_info_sz), %ecx >> + mov (%ecx), %ecx >> + rep >> + movsb >> + >> + movl $_pa(early_stack_end), %eax >> + movl %eax, %esp >> + >> + /* Enable PAE mode */ >> + movl %cr4, %eax >> + orl $X86_CR4_PAE, %eax >> + movl %eax, %cr4 >> + >> +#ifdef CONFIG_X86_64 >> + /* Enable Long mode */ >> + movl $MSR_EFER, %ecx >> + rdmsr >> + btsl $_EFER_LME, %eax >> + wrmsr >> + >> + /* Enable pre-constructed page tables */ >> + mov $_pa(init_level4_pgt), %eax >> + movl %eax, %cr3 >> + movl $(X86_CR0_PG | X86_CR0_PE), %eax >> + movl %eax, %cr0 >> + >> + /* Jump to 64-bit mode. */ >> + pushl $__KERNEL_CS >> + leal _pa(1f), %eax >> + pushl %eax >> + lret >> + >> + /* 64-bit entry point */ >> + .code64 >> +1: >> + call xen_prepare_hvmlite >> + >> + /* startup_64 expects boot_params in %rsi */ >> + mov $_pa(xen_hvmlite_boot_params), %rsi >> + movq $_pa(startup_64), %rax >> + jmp *%rax >> + >> +#else /* CONFIG_X86_64 */ >> + >> + /* Clear boot page tables */ >> + movl $_pa(early_pgtable), %edi >> + xorl %eax, %eax >> + movl $((PAGE_SIZE*5)/4), %ecx >> + rep stosl >> + >> + /* Level 3 */ >> + movl $_pa(early_pgtable), %edi >> + leal (PAGE_SIZE +_PAGE_PRESENT)(%edi), %eax >> + movl $4, %ecx >> +1: >> + movl %eax, 0x00(%edi) >> + addl $8, %edi >> + decl %ecx >> + jnz 1b >> + >> + /* Level 2 (2M entries) */ >> + movl $(_pa(early_pgtable) + PAGE_SIZE), %edi >> + movl $(_PAGE_PSE | _PAGE_RW | _PAGE_PRESENT), %eax >> + movl $2048, %ecx >> +2: >> + movl %eax, 0(%edi) >> + addl $0x00200000, %eax >> + addl $8, %edi >> + decl %ecx >> + jnz 2b >> + >> + /* Enable the boot paging */ >> + movl $_pa(early_pgtable), %eax >> + movl %eax, %cr3 >> + movl %cr0, %eax >> + orl $(X86_CR0_PG | X86_CR0_PE), %eax >> + movl %eax, %cr0 >> + >> + ljmp $__BOOT_CS,$3f >> +3: >> + call xen_prepare_hvmlite >> + mov $_pa(xen_hvmlite_boot_params), %esi >> + >> + /* startup_32 doesn't expect paging and PAE to be on */ >> + ljmp $__BOOT_CS,$_pa(4f) >> +4: >> + movl %cr0, %eax >> + andl $~X86_CR0_PG, %eax >> + movl %eax, %cr0 >> + movl %cr4, %eax >> + andl $~X86_CR4_PAE, %eax >> + movl %eax, %cr4 >> + >> + ljmp $0x10, $_pa(startup_32) >> +#endif >> + >> + .data >> +gdt: >> + .word gdt_end - gdt >> + .long _pa(gdt) >> + .word 0 >> + .quad 0x0000000000000000 /* NULL descriptor */ >> +#ifdef CONFIG_X86_64 >> + .quad 0x00af9a000000ffff /* __KERNEL_CS */ >> +#else >> + .quad 0x00cf9a000000ffff /* __KERNEL_CS */ >> +#endif >> + .quad 0x00cf92000000ffff /* __KERNEL_DS */ >> +gdt_end: >> + >> + .bss >> + .balign 4 >> +early_stack: >> + .fill 16, 1, 0 >> +early_stack_end: >> + >> +#ifdef CONFIG_X86_32 >> + .section ".pgtable","a",@nobits >> + .balign 4096 >> +early_pgtable: >> + .fill 5*4096, 1, 0 >> +#endif >> + >> + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, >> + _ASM_PTR (hvmlite_start_xen - __START_KERNEL_map)) >> diff --git a/include/xen/xen.h b/include/xen/xen.h >> index 0c0e3ef..6a0d3f3 100644 >> --- a/include/xen/xen.h >> +++ b/include/xen/xen.h >> @@ -29,6 +29,12 @@ extern enum xen_domain_type xen_domain_type; >> #define xen_initial_domain() (0) >> #endif /* CONFIG_XEN_DOM0 */ >> >> +#ifdef CONFIG_XEN_PVHVM >> +extern int xen_hvmlite; >> +#else >> +#define xen_hvmlite (0) >> +#endif >> + >> #ifdef CONFIG_XEN_PVH >> /* This functionality exists only for x86. The XEN_PVHVM support exists >> * only in x86 world - hence on ARM it will be always disabled. >> -- >> 1.7.1 >> >>