Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755285AbcJNSbm (ORCPT ); Fri, 14 Oct 2016 14:31:42 -0400 Received: from terminus.zytor.com ([198.137.202.10]:57634 "EHLO mail.zytor.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753885AbcJNSba (ORCPT ); Fri, 14 Oct 2016 14:31:30 -0400 User-Agent: K-9 Mail for Android In-Reply-To: <1476468318-24422-3-git-send-email-boris.ostrovsky@oracle.com> References: <1476468318-24422-1-git-send-email-boris.ostrovsky@oracle.com> <1476468318-24422-3-git-send-email-boris.ostrovsky@oracle.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset=UTF-8 Subject: Re: [PATCH 2/8] x86/head: Refactor 32-bit pgtable setup From: hpa@zytor.com Date: Fri, 14 Oct 2016 11:31:03 -0700 To: Boris Ostrovsky , david.vrabel@citrix.com, JGross@suse.com CC: roger.pau@citrix.com, linux-kernel@vger.kernel.org, xen-devel@lists.xenproject.org, Thomas Gleixner , Ingo Molnar , x86@kernel.org, Matt Fleming Message-ID: <119777C5-564B-4171-9452-3E216BC7442E@zytor.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 13800 Lines: 475 On October 14, 2016 11:05:12 AM PDT, Boris Ostrovsky wrote: >From: Matt Fleming > >The new Xen PVH entry point requires page tables to be setup by the >kernel since it is entered with paging disabled. > >Pull the common code out of head_32.S and into pgtable_32.S so that >setup_pgtable_32 can be invoked from both the new Xen entry point and >the existing startup_32 code. > >Cc: Boris Ostrovsky >Cc: Thomas Gleixner >Cc: Ingo Molnar >Cc: "H. Peter Anvin" >Cc: x86@kernel.org >Signed-off-by: Matt Fleming >--- > arch/x86/Makefile | 2 + > arch/x86/kernel/Makefile | 2 + >arch/x86/kernel/head_32.S | 168 >+------------------------------------ >arch/x86/kernel/pgtable_32.S | 196 >+++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 201 insertions(+), 167 deletions(-) > create mode 100644 arch/x86/kernel/pgtable_32.S > >diff --git a/arch/x86/Makefile b/arch/x86/Makefile >index 2d44933..67cc771 100644 >--- a/arch/x86/Makefile >+++ b/arch/x86/Makefile >@@ -204,6 +204,8 @@ head-y += arch/x86/kernel/head$(BITS).o > head-y += arch/x86/kernel/ebda.o > head-y += arch/x86/kernel/platform-quirks.o > >+head-$(CONFIG_X86_32) += arch/x86/kernel/pgtable_32.o >+ > libs-y += arch/x86/lib/ > > # See arch/x86/Kbuild for content of core part of the kernel >diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile >index 4dd5d50..eae85a5 100644 >--- a/arch/x86/kernel/Makefile >+++ b/arch/x86/kernel/Makefile >@@ -8,6 +8,8 @@ extra-y += ebda.o > extra-y += platform-quirks.o > extra-y += vmlinux.lds > >+extra-$(CONFIG_X86_32) += pgtable_32.o >+ > CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) > > ifdef CONFIG_FUNCTION_TRACER >diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S >index 5f40126..0db066e 100644 >--- a/arch/x86/kernel/head_32.S >+++ b/arch/x86/kernel/head_32.S >@@ -41,51 +41,6 @@ > #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id > > /* >- * This is how much memory in addition to the memory covered up to >- * and including _end we need mapped initially. >- * We need: >- * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) >- * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) >- * >- * Modulo rounding, each megabyte assigned here requires a kilobyte of >- * memory, which is currently unreclaimed. >- * >- * This should be a multiple of a page. >- * >- * KERNEL_IMAGE_SIZE should be greater than pa(_end) >- * and small than max_low_pfn, otherwise will waste some page table >entries >- */ >- >-#if PTRS_PER_PMD > 1 >-#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + >PTRS_PER_PGD) >-#else >-#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) >-#endif >- >-/* >- * Number of possible pages in the lowmem region. >- * >- * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a >- * gas warning about overflowing shift count when gas has been >compiled >- * with only a host target support using a 32-bit type for internal >- * representation. >- */ >-LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) >- >-/* Enough space to fit pagetables for the low memory linear map */ >-MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT >- >-/* >- * Worst-case size of the kernel mapping we need to make: >- * a relocatable kernel can live anywhere in lowmem, so we need to be >able >- * to map all of lowmem. >- */ >-KERNEL_PAGES = LOWMEM_PAGES >- >-INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE >-RESERVE_BRK(pagetables, INIT_MAP_SIZE) >- >-/* > * 32-bit kernel entrypoint; only used by the boot CPU. On entry, > * %esi points to the real-mode code as a 32-bit pointer. > * CS and DS must be 4 GB flat segments, but we don't depend on >@@ -157,92 +112,7 @@ ENTRY(startup_32) > call load_ucode_bsp > #endif > >-/* >- * Initialize page tables. This creates a PDE and a set of page >- * tables, which are located immediately beyond __brk_base. The >variable >- * _brk_end is set up to point to the first "safe" location. >- * Mappings are created both at virtual address 0 (identity mapping) >- * and PAGE_OFFSET for up to _end. >- */ >-#ifdef CONFIG_X86_PAE >- >- /* >- * In PAE mode initial_page_table is statically defined to contain >- * enough entries to cover the VMSPLIT option (that is the top 1, 2 >or 3 >- * entries). The identity mapping is handled by pointing two PGD >entries >- * to the first kernel PMD. >- * >- * Note the upper half of each PMD or PTE are always zero at this >stage. >- */ >- >-#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs >*/ >- >- xorl %ebx,%ebx /* %ebx is kept at zero */ >- >- movl $pa(__brk_base), %edi >- movl $pa(initial_pg_pmd), %edx >- movl $PTE_IDENT_ATTR, %eax >-10: >- leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ >- movl %ecx,(%edx) /* Store PMD entry */ >- /* Upper half already zero */ >- addl $8,%edx >- movl $512,%ecx >-11: >- stosl >- xchgl %eax,%ebx >- stosl >- xchgl %eax,%ebx >- addl $0x1000,%eax >- loop 11b >- >- /* >- * End condition: we must map up to the end + MAPPING_BEYOND_END. >- */ >- movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp >- cmpl %ebp,%eax >- jb 10b >-1: >- addl $__PAGE_OFFSET, %edi >- movl %edi, pa(_brk_end) >- shrl $12, %eax >- movl %eax, pa(max_pfn_mapped) >- >- /* Do early initialization of the fixmap area */ >- movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax >- movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) >-#else /* Not PAE */ >- >-page_pde_offset = (__PAGE_OFFSET >> 20); >- >- movl $pa(__brk_base), %edi >- movl $pa(initial_page_table), %edx >- movl $PTE_IDENT_ATTR, %eax >-10: >- leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ >- movl %ecx,(%edx) /* Store identity PDE entry */ >- movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ >- addl $4,%edx >- movl $1024, %ecx >-11: >- stosl >- addl $0x1000,%eax >- loop 11b >- /* >- * End condition: we must map up to the end + MAPPING_BEYOND_END. >- */ >- movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp >- cmpl %ebp,%eax >- jb 10b >- addl $__PAGE_OFFSET, %edi >- movl %edi, pa(_brk_end) >- shrl $12, %eax >- movl %eax, pa(max_pfn_mapped) >- >- /* Do early initialization of the fixmap area */ >- movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax >- movl %eax,pa(initial_page_table+0xffc) >-#endif >+ call setup_pgtable_32 > > #ifdef CONFIG_PARAVIRT > /* This is can only trip for a broken bootloader... */ >@@ -660,47 +530,11 @@ ENTRY(setup_once_ref) > */ > __PAGE_ALIGNED_BSS > .align PAGE_SIZE >-#ifdef CONFIG_X86_PAE >-initial_pg_pmd: >- .fill 1024*KPMDS,4,0 >-#else >-ENTRY(initial_page_table) >- .fill 1024,4,0 >-#endif >-initial_pg_fixmap: >- .fill 1024,4,0 > ENTRY(empty_zero_page) > .fill 4096,1,0 > ENTRY(swapper_pg_dir) > .fill 1024,4,0 > >-/* >- * This starts the data section. >- */ >-#ifdef CONFIG_X86_PAE >-__PAGE_ALIGNED_DATA >- /* Page-aligned for the benefit of paravirt? */ >- .align PAGE_SIZE >-ENTRY(initial_page_table) >- .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ >-# if KPMDS == 3 >- .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 >- .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 >- .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0 >-# elif KPMDS == 2 >- .long 0,0 >- .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 >- .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 >-# elif KPMDS == 1 >- .long 0,0 >- .long 0,0 >- .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 >-# else >-# error "Kernel PMDs should be 1, 2 or 3" >-# endif >- .align PAGE_SIZE /* needs to be page-sized too */ >-#endif >- > .data > .balign 4 > ENTRY(initial_stack) >diff --git a/arch/x86/kernel/pgtable_32.S >b/arch/x86/kernel/pgtable_32.S >new file mode 100644 >index 0000000..aded718 >--- /dev/null >+++ b/arch/x86/kernel/pgtable_32.S >@@ -0,0 +1,196 @@ >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+ >+/* Physical address */ >+#define pa(X) ((X) - __PAGE_OFFSET) >+ >+/* >+ * This is how much memory in addition to the memory covered up to >+ * and including _end we need mapped initially. >+ * We need: >+ * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) >+ * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) >+ * >+ * Modulo rounding, each megabyte assigned here requires a kilobyte of >+ * memory, which is currently unreclaimed. >+ * >+ * This should be a multiple of a page. >+ * >+ * KERNEL_IMAGE_SIZE should be greater than pa(_end) >+ * and small than max_low_pfn, otherwise will waste some page table >entries >+ */ >+ >+#if PTRS_PER_PMD > 1 >+#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + >PTRS_PER_PGD) >+#else >+#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) >+#endif >+ >+/* >+ * Number of possible pages in the lowmem region. >+ * >+ * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a >+ * gas warning about overflowing shift count when gas has been >compiled >+ * with only a host target support using a 32-bit type for internal >+ * representation. >+ */ >+LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) >+ >+/* Enough space to fit pagetables for the low memory linear map */ >+MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT >+ >+/* >+ * Worst-case size of the kernel mapping we need to make: >+ * a relocatable kernel can live anywhere in lowmem, so we need to be >able >+ * to map all of lowmem. >+ */ >+KERNEL_PAGES = LOWMEM_PAGES >+ >+INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE >+RESERVE_BRK(pagetables, INIT_MAP_SIZE) >+ >+/* >+ * Initialize page tables. This creates a PDE and a set of page >+ * tables, which are located immediately beyond __brk_base. The >variable >+ * _brk_end is set up to point to the first "safe" location. >+ * Mappings are created both at virtual address 0 (identity mapping) >+ * and PAGE_OFFSET for up to _end. >+ */ >+ .text >+ENTRY(setup_pgtable_32) >+#ifdef CONFIG_X86_PAE >+ /* >+ * In PAE mode initial_page_table is statically defined to contain >+ * enough entries to cover the VMSPLIT option (that is the top 1, 2 >or 3 >+ * entries). The identity mapping is handled by pointing two PGD >entries >+ * to the first kernel PMD. >+ * >+ * Note the upper half of each PMD or PTE are always zero at this >stage. >+ */ >+ >+#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs >*/ >+ >+ xorl %ebx,%ebx /* %ebx is kept at zero */ >+ >+ movl $pa(__brk_base), %edi >+ movl $pa(initial_pg_pmd), %edx >+ movl $PTE_IDENT_ATTR, %eax >+10: >+ leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ >+ movl %ecx,(%edx) /* Store PMD entry */ >+ /* Upper half already zero */ >+ addl $8,%edx >+ movl $512,%ecx >+11: >+ stosl >+ xchgl %eax,%ebx >+ stosl >+ xchgl %eax,%ebx >+ addl $0x1000,%eax >+ loop 11b >+ >+ /* >+ * End condition: we must map up to the end + MAPPING_BEYOND_END. >+ */ >+ movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp >+ cmpl %ebp,%eax >+ jb 10b >+1: >+ addl $__PAGE_OFFSET, %edi >+ movl %edi, pa(_brk_end) >+ shrl $12, %eax >+ movl %eax, pa(max_pfn_mapped) >+ >+ /* Do early initialization of the fixmap area */ >+ movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax >+ movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) >+#else /* Not PAE */ >+ >+page_pde_offset = (__PAGE_OFFSET >> 20); >+ >+ movl $pa(__brk_base), %edi >+ movl $pa(initial_page_table), %edx >+ movl $PTE_IDENT_ATTR, %eax >+10: >+ leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ >+ movl %ecx,(%edx) /* Store identity PDE entry */ >+ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ >+ addl $4,%edx >+ movl $1024, %ecx >+11: >+ stosl >+ addl $0x1000,%eax >+ loop 11b >+ /* >+ * End condition: we must map up to the end + MAPPING_BEYOND_END. >+ */ >+ movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp >+ cmpl %ebp,%eax >+ jb 10b >+ addl $__PAGE_OFFSET, %edi >+ movl %edi, pa(_brk_end) >+ shrl $12, %eax >+ movl %eax, pa(max_pfn_mapped) >+ >+ /* Do early initialization of the fixmap area */ >+ movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax >+ movl %eax,pa(initial_page_table+0xffc) >+#endif >+ ret >+ENDPROC(setup_pgtable_32) >+ >+/* >+ * BSS section >+ */ >+__PAGE_ALIGNED_BSS >+ .align PAGE_SIZE >+#ifdef CONFIG_X86_PAE >+initial_pg_pmd: >+ .fill 1024*KPMDS,4,0 >+#else >+ENTRY(initial_page_table) >+ .fill 1024,4,0 >+#endif >+initial_pg_fixmap: >+ .fill 1024,4,0 >+ >+/* >+ * This starts the data section. >+ */ >+#ifdef CONFIG_X86_PAE >+__PAGE_ALIGNED_DATA >+ /* Page-aligned for the benefit of paravirt? */ >+ .align PAGE_SIZE >+ENTRY(initial_page_table) >+ .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ >+# if KPMDS == 3 >+ .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 >+ .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 >+ .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0 >+# elif KPMDS == 2 >+ .long 0,0 >+ .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 >+ .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 >+# elif KPMDS == 1 >+ .long 0,0 >+ .long 0,0 >+ .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 >+# else >+# error "Kernel PMDs should be 1, 2 or 3" >+# endif >+ .align PAGE_SIZE /* needs to be page-sized too */ >+#endif And why does it need a separate entry point as opposed to the plain one? -- Sent from my Android device with K-9 Mail. Please excuse my brevity.