Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751947AbcJNSLn (ORCPT ); Fri, 14 Oct 2016 14:11:43 -0400 Received: from userp1040.oracle.com ([156.151.31.81]:44293 "EHLO userp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932240AbcJNSKh (ORCPT ); Fri, 14 Oct 2016 14:10:37 -0400 From: Boris Ostrovsky To: david.vrabel@citrix.com, JGross@suse.com Cc: roger.pau@citrix.com, linux-kernel@vger.kernel.org, xen-devel@lists.xenproject.org, Boris Ostrovsky , Thomas Gleixner , Ingo Molnar , "H. Peter Anvin" , x86@kernel.org, Matt Fleming Subject: [PATCH 2/8] x86/head: Refactor 32-bit pgtable setup Date: Fri, 14 Oct 2016 14:05:12 -0400 Message-Id: <1476468318-24422-3-git-send-email-boris.ostrovsky@oracle.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1476468318-24422-1-git-send-email-boris.ostrovsky@oracle.com> References: <1476468318-24422-1-git-send-email-boris.ostrovsky@oracle.com> X-Source-IP: userv0022.oracle.com [156.151.31.74] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 13108 Lines: 451 From: Matt Fleming The new Xen PVH entry point requires page tables to be setup by the kernel since it is entered with paging disabled. Pull the common code out of head_32.S and into pgtable_32.S so that setup_pgtable_32 can be invoked from both the new Xen entry point and the existing startup_32 code. Cc: Boris Ostrovsky Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Signed-off-by: Matt Fleming --- arch/x86/Makefile | 2 + arch/x86/kernel/Makefile | 2 + arch/x86/kernel/head_32.S | 168 +------------------------------------ arch/x86/kernel/pgtable_32.S | 196 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 201 insertions(+), 167 deletions(-) create mode 100644 arch/x86/kernel/pgtable_32.S diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d44933..67cc771 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -204,6 +204,8 @@ head-y += arch/x86/kernel/head$(BITS).o head-y += arch/x86/kernel/ebda.o head-y += arch/x86/kernel/platform-quirks.o +head-$(CONFIG_X86_32) += arch/x86/kernel/pgtable_32.o + libs-y += arch/x86/lib/ # See arch/x86/Kbuild for content of core part of the kernel diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4dd5d50..eae85a5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -8,6 +8,8 @@ extra-y += ebda.o extra-y += platform-quirks.o extra-y += vmlinux.lds +extra-$(CONFIG_X86_32) += pgtable_32.o + CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 5f40126..0db066e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -41,51 +41,6 @@ #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id /* - * This is how much memory in addition to the memory covered up to - * and including _end we need mapped initially. - * We need: - * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) - * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) - * - * Modulo rounding, each megabyte assigned here requires a kilobyte of - * memory, which is currently unreclaimed. - * - * This should be a multiple of a page. - * - * KERNEL_IMAGE_SIZE should be greater than pa(_end) - * and small than max_low_pfn, otherwise will waste some page table entries - */ - -#if PTRS_PER_PMD > 1 -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) -#else -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) -#endif - -/* - * Number of possible pages in the lowmem region. - * - * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a - * gas warning about overflowing shift count when gas has been compiled - * with only a host target support using a 32-bit type for internal - * representation. - */ -LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) - -/* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT - -/* - * Worst-case size of the kernel mapping we need to make: - * a relocatable kernel can live anywhere in lowmem, so we need to be able - * to map all of lowmem. - */ -KERNEL_PAGES = LOWMEM_PAGES - -INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE -RESERVE_BRK(pagetables, INIT_MAP_SIZE) - -/* * 32-bit kernel entrypoint; only used by the boot CPU. On entry, * %esi points to the real-mode code as a 32-bit pointer. * CS and DS must be 4 GB flat segments, but we don't depend on @@ -157,92 +112,7 @@ ENTRY(startup_32) call load_ucode_bsp #endif -/* - * Initialize page tables. This creates a PDE and a set of page - * tables, which are located immediately beyond __brk_base. The variable - * _brk_end is set up to point to the first "safe" location. - * Mappings are created both at virtual address 0 (identity mapping) - * and PAGE_OFFSET for up to _end. - */ -#ifdef CONFIG_X86_PAE - - /* - * In PAE mode initial_page_table is statically defined to contain - * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 - * entries). The identity mapping is handled by pointing two PGD entries - * to the first kernel PMD. - * - * Note the upper half of each PMD or PTE are always zero at this stage. - */ - -#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ - - xorl %ebx,%ebx /* %ebx is kept at zero */ - - movl $pa(__brk_base), %edi - movl $pa(initial_pg_pmd), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ - movl %ecx,(%edx) /* Store PMD entry */ - /* Upper half already zero */ - addl $8,%edx - movl $512,%ecx -11: - stosl - xchgl %eax,%ebx - stosl - xchgl %eax,%ebx - addl $0x1000,%eax - loop 11b - - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b -1: - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) - - /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax - movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) -#else /* Not PAE */ - -page_pde_offset = (__PAGE_OFFSET >> 20); - - movl $pa(__brk_base), %edi - movl $pa(initial_page_table), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ - movl %ecx,(%edx) /* Store identity PDE entry */ - movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ - addl $4,%edx - movl $1024, %ecx -11: - stosl - addl $0x1000,%eax - loop 11b - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) - - /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax - movl %eax,pa(initial_page_table+0xffc) -#endif + call setup_pgtable_32 #ifdef CONFIG_PARAVIRT /* This is can only trip for a broken bootloader... */ @@ -660,47 +530,11 @@ ENTRY(setup_once_ref) */ __PAGE_ALIGNED_BSS .align PAGE_SIZE -#ifdef CONFIG_X86_PAE -initial_pg_pmd: - .fill 1024*KPMDS,4,0 -#else -ENTRY(initial_page_table) - .fill 1024,4,0 -#endif -initial_pg_fixmap: - .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 ENTRY(swapper_pg_dir) .fill 1024,4,0 -/* - * This starts the data section. - */ -#ifdef CONFIG_X86_PAE -__PAGE_ALIGNED_DATA - /* Page-aligned for the benefit of paravirt? */ - .align PAGE_SIZE -ENTRY(initial_page_table) - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ -# if KPMDS == 3 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0 -# elif KPMDS == 2 - .long 0,0 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 -# elif KPMDS == 1 - .long 0,0 - .long 0,0 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 -# else -# error "Kernel PMDs should be 1, 2 or 3" -# endif - .align PAGE_SIZE /* needs to be page-sized too */ -#endif - .data .balign 4 ENTRY(initial_stack) diff --git a/arch/x86/kernel/pgtable_32.S b/arch/x86/kernel/pgtable_32.S new file mode 100644 index 0000000..aded718 --- /dev/null +++ b/arch/x86/kernel/pgtable_32.S @@ -0,0 +1,196 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Physical address */ +#define pa(X) ((X) - __PAGE_OFFSET) + +/* + * This is how much memory in addition to the memory covered up to + * and including _end we need mapped initially. + * We need: + * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) + * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) + * + * Modulo rounding, each megabyte assigned here requires a kilobyte of + * memory, which is currently unreclaimed. + * + * This should be a multiple of a page. + * + * KERNEL_IMAGE_SIZE should be greater than pa(_end) + * and small than max_low_pfn, otherwise will waste some page table entries + */ + +#if PTRS_PER_PMD > 1 +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) +#else +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) +#endif + +/* + * Number of possible pages in the lowmem region. + * + * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a + * gas warning about overflowing shift count when gas has been compiled + * with only a host target support using a 32-bit type for internal + * representation. + */ +LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) + +/* Enough space to fit pagetables for the low memory linear map */ +MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT + +/* + * Worst-case size of the kernel mapping we need to make: + * a relocatable kernel can live anywhere in lowmem, so we need to be able + * to map all of lowmem. + */ +KERNEL_PAGES = LOWMEM_PAGES + +INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE +RESERVE_BRK(pagetables, INIT_MAP_SIZE) + +/* + * Initialize page tables. This creates a PDE and a set of page + * tables, which are located immediately beyond __brk_base. The variable + * _brk_end is set up to point to the first "safe" location. + * Mappings are created both at virtual address 0 (identity mapping) + * and PAGE_OFFSET for up to _end. + */ + .text +ENTRY(setup_pgtable_32) +#ifdef CONFIG_X86_PAE + /* + * In PAE mode initial_page_table is statically defined to contain + * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 + * entries). The identity mapping is handled by pointing two PGD entries + * to the first kernel PMD. + * + * Note the upper half of each PMD or PTE are always zero at this stage. + */ + +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ + + xorl %ebx,%ebx /* %ebx is kept at zero */ + + movl $pa(__brk_base), %edi + movl $pa(initial_pg_pmd), %edx + movl $PTE_IDENT_ATTR, %eax +10: + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ + movl %ecx,(%edx) /* Store PMD entry */ + /* Upper half already zero */ + addl $8,%edx + movl $512,%ecx +11: + stosl + xchgl %eax,%ebx + stosl + xchgl %eax,%ebx + addl $0x1000,%eax + loop 11b + + /* + * End condition: we must map up to the end + MAPPING_BEYOND_END. + */ + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp + cmpl %ebp,%eax + jb 10b +1: + addl $__PAGE_OFFSET, %edi + movl %edi, pa(_brk_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax + movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) +#else /* Not PAE */ + +page_pde_offset = (__PAGE_OFFSET >> 20); + + movl $pa(__brk_base), %edi + movl $pa(initial_page_table), %edx + movl $PTE_IDENT_ATTR, %eax +10: + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ + movl %ecx,(%edx) /* Store identity PDE entry */ + movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ + addl $4,%edx + movl $1024, %ecx +11: + stosl + addl $0x1000,%eax + loop 11b + /* + * End condition: we must map up to the end + MAPPING_BEYOND_END. + */ + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp + cmpl %ebp,%eax + jb 10b + addl $__PAGE_OFFSET, %edi + movl %edi, pa(_brk_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax + movl %eax,pa(initial_page_table+0xffc) +#endif + ret +ENDPROC(setup_pgtable_32) + +/* + * BSS section + */ +__PAGE_ALIGNED_BSS + .align PAGE_SIZE +#ifdef CONFIG_X86_PAE +initial_pg_pmd: + .fill 1024*KPMDS,4,0 +#else +ENTRY(initial_page_table) + .fill 1024,4,0 +#endif +initial_pg_fixmap: + .fill 1024,4,0 + +/* + * This starts the data section. + */ +#ifdef CONFIG_X86_PAE +__PAGE_ALIGNED_DATA + /* Page-aligned for the benefit of paravirt? */ + .align PAGE_SIZE +ENTRY(initial_page_table) + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ +# if KPMDS == 3 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0 +# elif KPMDS == 2 + .long 0,0 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 +# elif KPMDS == 1 + .long 0,0 + .long 0,0 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 +# else +# error "Kernel PMDs should be 1, 2 or 3" +# endif + .align PAGE_SIZE /* needs to be page-sized too */ +#endif -- 1.8.3.1