Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S941707AbcJaMfH (ORCPT ); Mon, 31 Oct 2016 08:35:07 -0400 Received: from aserp1040.oracle.com ([141.146.126.69]:38470 "EHLO aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S941384AbcJaMfE (ORCPT ); Mon, 31 Oct 2016 08:35:04 -0400 Subject: Re: [PATCH 2/8] x86/head: Refactor 32-bit pgtable setup To: david.vrabel@citrix.com, JGross@suse.com, Thomas Gleixner , Ingo Molnar , "H. Peter Anvin" References: <1476468318-24422-1-git-send-email-boris.ostrovsky@oracle.com> <1476468318-24422-3-git-send-email-boris.ostrovsky@oracle.com> Cc: roger.pau@citrix.com, linux-kernel@vger.kernel.org, xen-devel@lists.xenproject.org, x86@kernel.org, Matt Fleming From: Boris Ostrovsky Message-ID: <0b9894e4-753c-7e49-45a7-3ee5e82abe3e@oracle.com> Date: Mon, 31 Oct 2016 08:33:12 -0400 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.4.0 MIME-Version: 1.0 In-Reply-To: <1476468318-24422-3-git-send-email-boris.ostrovsky@oracle.com> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit X-Source-IP: aserv0022.oracle.com [141.146.126.234] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 14125 Lines: 462 On 10/14/2016 02:05 PM, Boris Ostrovsky wrote: > From: Matt Fleming > > The new Xen PVH entry point requires page tables to be setup by the > kernel since it is entered with paging disabled. > > Pull the common code out of head_32.S and into pgtable_32.S so that > setup_pgtable_32 can be invoked from both the new Xen entry point and > the existing startup_32 code. Ping to x86 maintainers. Peter, you had questions about this patch. Did I answer them? -boris > > Cc: Boris Ostrovsky > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: "H. Peter Anvin" > Cc: x86@kernel.org > Signed-off-by: Matt Fleming > --- > arch/x86/Makefile | 2 + > arch/x86/kernel/Makefile | 2 + > arch/x86/kernel/head_32.S | 168 +------------------------------------ > arch/x86/kernel/pgtable_32.S | 196 +++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 201 insertions(+), 167 deletions(-) > create mode 100644 arch/x86/kernel/pgtable_32.S > > diff --git a/arch/x86/Makefile b/arch/x86/Makefile > index 2d44933..67cc771 100644 > --- a/arch/x86/Makefile > +++ b/arch/x86/Makefile > @@ -204,6 +204,8 @@ head-y += arch/x86/kernel/head$(BITS).o > head-y += arch/x86/kernel/ebda.o > head-y += arch/x86/kernel/platform-quirks.o > > +head-$(CONFIG_X86_32) += arch/x86/kernel/pgtable_32.o > + > libs-y += arch/x86/lib/ > > # See arch/x86/Kbuild for content of core part of the kernel > diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile > index 4dd5d50..eae85a5 100644 > --- a/arch/x86/kernel/Makefile > +++ b/arch/x86/kernel/Makefile > @@ -8,6 +8,8 @@ extra-y += ebda.o > extra-y += platform-quirks.o > extra-y += vmlinux.lds > > +extra-$(CONFIG_X86_32) += pgtable_32.o > + > CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) > > ifdef CONFIG_FUNCTION_TRACER > diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S > index 5f40126..0db066e 100644 > --- a/arch/x86/kernel/head_32.S > +++ b/arch/x86/kernel/head_32.S > @@ -41,51 +41,6 @@ > #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id > > /* > - * This is how much memory in addition to the memory covered up to > - * and including _end we need mapped initially. > - * We need: > - * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) > - * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) > - * > - * Modulo rounding, each megabyte assigned here requires a kilobyte of > - * memory, which is currently unreclaimed. > - * > - * This should be a multiple of a page. > - * > - * KERNEL_IMAGE_SIZE should be greater than pa(_end) > - * and small than max_low_pfn, otherwise will waste some page table entries > - */ > - > -#if PTRS_PER_PMD > 1 > -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) > -#else > -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) > -#endif > - > -/* > - * Number of possible pages in the lowmem region. > - * > - * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a > - * gas warning about overflowing shift count when gas has been compiled > - * with only a host target support using a 32-bit type for internal > - * representation. > - */ > -LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) > - > -/* Enough space to fit pagetables for the low memory linear map */ > -MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT > - > -/* > - * Worst-case size of the kernel mapping we need to make: > - * a relocatable kernel can live anywhere in lowmem, so we need to be able > - * to map all of lowmem. > - */ > -KERNEL_PAGES = LOWMEM_PAGES > - > -INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE > -RESERVE_BRK(pagetables, INIT_MAP_SIZE) > - > -/* > * 32-bit kernel entrypoint; only used by the boot CPU. On entry, > * %esi points to the real-mode code as a 32-bit pointer. > * CS and DS must be 4 GB flat segments, but we don't depend on > @@ -157,92 +112,7 @@ ENTRY(startup_32) > call load_ucode_bsp > #endif > > -/* > - * Initialize page tables. This creates a PDE and a set of page > - * tables, which are located immediately beyond __brk_base. The variable > - * _brk_end is set up to point to the first "safe" location. > - * Mappings are created both at virtual address 0 (identity mapping) > - * and PAGE_OFFSET for up to _end. > - */ > -#ifdef CONFIG_X86_PAE > - > - /* > - * In PAE mode initial_page_table is statically defined to contain > - * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 > - * entries). The identity mapping is handled by pointing two PGD entries > - * to the first kernel PMD. > - * > - * Note the upper half of each PMD or PTE are always zero at this stage. > - */ > - > -#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ > - > - xorl %ebx,%ebx /* %ebx is kept at zero */ > - > - movl $pa(__brk_base), %edi > - movl $pa(initial_pg_pmd), %edx > - movl $PTE_IDENT_ATTR, %eax > -10: > - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ > - movl %ecx,(%edx) /* Store PMD entry */ > - /* Upper half already zero */ > - addl $8,%edx > - movl $512,%ecx > -11: > - stosl > - xchgl %eax,%ebx > - stosl > - xchgl %eax,%ebx > - addl $0x1000,%eax > - loop 11b > - > - /* > - * End condition: we must map up to the end + MAPPING_BEYOND_END. > - */ > - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp > - cmpl %ebp,%eax > - jb 10b > -1: > - addl $__PAGE_OFFSET, %edi > - movl %edi, pa(_brk_end) > - shrl $12, %eax > - movl %eax, pa(max_pfn_mapped) > - > - /* Do early initialization of the fixmap area */ > - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax > - movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) > -#else /* Not PAE */ > - > -page_pde_offset = (__PAGE_OFFSET >> 20); > - > - movl $pa(__brk_base), %edi > - movl $pa(initial_page_table), %edx > - movl $PTE_IDENT_ATTR, %eax > -10: > - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ > - movl %ecx,(%edx) /* Store identity PDE entry */ > - movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ > - addl $4,%edx > - movl $1024, %ecx > -11: > - stosl > - addl $0x1000,%eax > - loop 11b > - /* > - * End condition: we must map up to the end + MAPPING_BEYOND_END. > - */ > - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp > - cmpl %ebp,%eax > - jb 10b > - addl $__PAGE_OFFSET, %edi > - movl %edi, pa(_brk_end) > - shrl $12, %eax > - movl %eax, pa(max_pfn_mapped) > - > - /* Do early initialization of the fixmap area */ > - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax > - movl %eax,pa(initial_page_table+0xffc) > -#endif > + call setup_pgtable_32 > > #ifdef CONFIG_PARAVIRT > /* This is can only trip for a broken bootloader... */ > @@ -660,47 +530,11 @@ ENTRY(setup_once_ref) > */ > __PAGE_ALIGNED_BSS > .align PAGE_SIZE > -#ifdef CONFIG_X86_PAE > -initial_pg_pmd: > - .fill 1024*KPMDS,4,0 > -#else > -ENTRY(initial_page_table) > - .fill 1024,4,0 > -#endif > -initial_pg_fixmap: > - .fill 1024,4,0 > ENTRY(empty_zero_page) > .fill 4096,1,0 > ENTRY(swapper_pg_dir) > .fill 1024,4,0 > > -/* > - * This starts the data section. > - */ > -#ifdef CONFIG_X86_PAE > -__PAGE_ALIGNED_DATA > - /* Page-aligned for the benefit of paravirt? */ > - .align PAGE_SIZE > -ENTRY(initial_page_table) > - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ > -# if KPMDS == 3 > - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 > - .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 > - .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0 > -# elif KPMDS == 2 > - .long 0,0 > - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 > - .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 > -# elif KPMDS == 1 > - .long 0,0 > - .long 0,0 > - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 > -# else > -# error "Kernel PMDs should be 1, 2 or 3" > -# endif > - .align PAGE_SIZE /* needs to be page-sized too */ > -#endif > - > .data > .balign 4 > ENTRY(initial_stack) > diff --git a/arch/x86/kernel/pgtable_32.S b/arch/x86/kernel/pgtable_32.S > new file mode 100644 > index 0000000..aded718 > --- /dev/null > +++ b/arch/x86/kernel/pgtable_32.S > @@ -0,0 +1,196 @@ > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +/* Physical address */ > +#define pa(X) ((X) - __PAGE_OFFSET) > + > +/* > + * This is how much memory in addition to the memory covered up to > + * and including _end we need mapped initially. > + * We need: > + * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) > + * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) > + * > + * Modulo rounding, each megabyte assigned here requires a kilobyte of > + * memory, which is currently unreclaimed. > + * > + * This should be a multiple of a page. > + * > + * KERNEL_IMAGE_SIZE should be greater than pa(_end) > + * and small than max_low_pfn, otherwise will waste some page table entries > + */ > + > +#if PTRS_PER_PMD > 1 > +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) > +#else > +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) > +#endif > + > +/* > + * Number of possible pages in the lowmem region. > + * > + * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a > + * gas warning about overflowing shift count when gas has been compiled > + * with only a host target support using a 32-bit type for internal > + * representation. > + */ > +LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) > + > +/* Enough space to fit pagetables for the low memory linear map */ > +MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT > + > +/* > + * Worst-case size of the kernel mapping we need to make: > + * a relocatable kernel can live anywhere in lowmem, so we need to be able > + * to map all of lowmem. > + */ > +KERNEL_PAGES = LOWMEM_PAGES > + > +INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE > +RESERVE_BRK(pagetables, INIT_MAP_SIZE) > + > +/* > + * Initialize page tables. This creates a PDE and a set of page > + * tables, which are located immediately beyond __brk_base. The variable > + * _brk_end is set up to point to the first "safe" location. > + * Mappings are created both at virtual address 0 (identity mapping) > + * and PAGE_OFFSET for up to _end. > + */ > + .text > +ENTRY(setup_pgtable_32) > +#ifdef CONFIG_X86_PAE > + /* > + * In PAE mode initial_page_table is statically defined to contain > + * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 > + * entries). The identity mapping is handled by pointing two PGD entries > + * to the first kernel PMD. > + * > + * Note the upper half of each PMD or PTE are always zero at this stage. > + */ > + > +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ > + > + xorl %ebx,%ebx /* %ebx is kept at zero */ > + > + movl $pa(__brk_base), %edi > + movl $pa(initial_pg_pmd), %edx > + movl $PTE_IDENT_ATTR, %eax > +10: > + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ > + movl %ecx,(%edx) /* Store PMD entry */ > + /* Upper half already zero */ > + addl $8,%edx > + movl $512,%ecx > +11: > + stosl > + xchgl %eax,%ebx > + stosl > + xchgl %eax,%ebx > + addl $0x1000,%eax > + loop 11b > + > + /* > + * End condition: we must map up to the end + MAPPING_BEYOND_END. > + */ > + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp > + cmpl %ebp,%eax > + jb 10b > +1: > + addl $__PAGE_OFFSET, %edi > + movl %edi, pa(_brk_end) > + shrl $12, %eax > + movl %eax, pa(max_pfn_mapped) > + > + /* Do early initialization of the fixmap area */ > + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax > + movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) > +#else /* Not PAE */ > + > +page_pde_offset = (__PAGE_OFFSET >> 20); > + > + movl $pa(__brk_base), %edi > + movl $pa(initial_page_table), %edx > + movl $PTE_IDENT_ATTR, %eax > +10: > + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ > + movl %ecx,(%edx) /* Store identity PDE entry */ > + movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ > + addl $4,%edx > + movl $1024, %ecx > +11: > + stosl > + addl $0x1000,%eax > + loop 11b > + /* > + * End condition: we must map up to the end + MAPPING_BEYOND_END. > + */ > + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp > + cmpl %ebp,%eax > + jb 10b > + addl $__PAGE_OFFSET, %edi > + movl %edi, pa(_brk_end) > + shrl $12, %eax > + movl %eax, pa(max_pfn_mapped) > + > + /* Do early initialization of the fixmap area */ > + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax > + movl %eax,pa(initial_page_table+0xffc) > +#endif > + ret > +ENDPROC(setup_pgtable_32) > + > +/* > + * BSS section > + */ > +__PAGE_ALIGNED_BSS > + .align PAGE_SIZE > +#ifdef CONFIG_X86_PAE > +initial_pg_pmd: > + .fill 1024*KPMDS,4,0 > +#else > +ENTRY(initial_page_table) > + .fill 1024,4,0 > +#endif > +initial_pg_fixmap: > + .fill 1024,4,0 > + > +/* > + * This starts the data section. > + */ > +#ifdef CONFIG_X86_PAE > +__PAGE_ALIGNED_DATA > + /* Page-aligned for the benefit of paravirt? */ > + .align PAGE_SIZE > +ENTRY(initial_page_table) > + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ > +# if KPMDS == 3 > + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 > + .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 > + .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0 > +# elif KPMDS == 2 > + .long 0,0 > + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 > + .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 > +# elif KPMDS == 1 > + .long 0,0 > + .long 0,0 > + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 > +# else > +# error "Kernel PMDs should be 1, 2 or 3" > +# endif > + .align PAGE_SIZE /* needs to be page-sized too */ > +#endif >