2018-02-14 11:20:33

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 0/9] x86/mm: Dynamic memory layout

This is the next batch of patches required to bring boot-time switching
between 4- and 5-level paging. Please review and consider applying.

Patches in this patchset makes memory layout dynamic enough to be able to
switch between paging modes at boot-time.

Ingo, it worth noticing that we discussed some parts of the patchset
before (back in October) and had disagrement on how to handle situation.
You can read my position on the matter by the link[1].

Some changes made in this patchset can be replaced by patching constants
in code once we have infrastructure for this.

[1] http://lkml.kernel.org/r/[email protected]

Kirill A. Shutemov (9):
x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52
mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS
x86/mm: Make virtual memory layout movable for CONFIG_X86_5LEVEL
x86: Introduce pgtable_l5_enabled
x86/mm: Make LDT_BASE_ADDR dynamic
x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable
x86/mm: Make MAX_PHYSADDR_BITS and MAX_PHYSMEM_BITS dynamic
x86/mm: Make __VIRTUAL_MASK_SHIFT dynamic
x86/mm: Adjust virtual address space layout in early boot

arch/x86/Kconfig | 9 ++++
arch/x86/boot/compressed/kaslr.c | 14 ++++++
arch/x86/entry/entry_64.S | 12 ++++++
arch/x86/include/asm/kaslr.h | 4 --
arch/x86/include/asm/page_64.h | 4 ++
arch/x86/include/asm/page_64_types.h | 20 ++++-----
arch/x86/include/asm/pgtable-3level_types.h | 1 +
arch/x86/include/asm/pgtable_32.h | 2 +
arch/x86/include/asm/pgtable_32_types.h | 2 +
arch/x86/include/asm/pgtable_64_types.h | 67 ++++++++++++++++++-----------
arch/x86/include/asm/sparsemem.h | 9 +---
arch/x86/kernel/cpu/mcheck/mce.c | 18 +++-----
arch/x86/kernel/head64.c | 57 ++++++++++++++++++++++--
arch/x86/kernel/head_64.S | 2 +-
arch/x86/kernel/setup.c | 5 +--
arch/x86/mm/dump_pagetables.c | 32 +++++++++-----
arch/x86/mm/init_64.c | 2 +-
arch/x86/mm/kasan_init_64.c | 2 +-
arch/x86/mm/kaslr.c | 23 ++++------
arch/x86/platform/efi/efi_64.c | 4 +-
include/asm-generic/5level-fixup.h | 1 +
include/asm-generic/pgtable-nop4d.h | 9 ++--
include/linux/kasan.h | 2 +-
mm/kasan/kasan_init.c | 2 +-
mm/zsmalloc.c | 13 +++---
25 files changed, 208 insertions(+), 108 deletions(-)

--
2.15.1



2018-02-14 11:18:20

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 4/9] x86: Introduce pgtable_l5_enabled

The new flag would indicate what paging mode we are in.

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/boot/compressed/kaslr.c | 4 ++++
arch/x86/include/asm/pgtable_32_types.h | 2 ++
arch/x86/include/asm/pgtable_64_types.h | 6 ++++++
arch/x86/kernel/head64.c | 5 +++++
4 files changed, 17 insertions(+)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 8199a6187251..bd69e1830fbe 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -46,6 +46,10 @@
#define STATIC
#include <linux/decompress/mm.h>

+#ifdef CONFIG_X86_5LEVEL
+unsigned int pgtable_l5_enabled __ro_after_init = 1;
+#endif
+
extern unsigned long get_cmd_line_ptr(void);

/* Simplified build-specific string for starting entropy. */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 0777e18a1d23..e3225e83db7d 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -15,6 +15,8 @@
# include <asm/pgtable-2level_types.h>
#endif

+#define pgtable_l5_enabled 0
+
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))

diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index a0db91ab63b8..5e2d724f8f47 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -20,6 +20,12 @@ typedef unsigned long pgprotval_t;

typedef struct { pteval_t pte; } pte_t;

+#ifdef CONFIG_X86_5LEVEL
+extern unsigned int pgtable_l5_enabled;
+#else
+#define pgtable_l5_enabled 0
+#endif
+
#endif /* !__ASSEMBLY__ */

#define SHARED_KERNEL_PMD 0
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index bf5c9ba63ba1..17d00d1886de 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -39,6 +39,11 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);

+#ifdef CONFIG_X86_5LEVEL
+unsigned int pgtable_l5_enabled __ro_after_init = 1;
+EXPORT_SYMBOL(pgtable_l5_enabled);
+#endif
+
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE;
EXPORT_SYMBOL(page_offset_base);
--
2.15.1


2018-02-14 11:18:51

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 2/9] mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS

With boot-time switching between paging mode we will have variable
MAX_PHYSMEM_BITS.

Let's use the maximum variable possible for CONFIG_X86_5LEVEL=y
configuration to define zsmalloc data structures.

The patch introduces MAX_POSSIBLE_PHYSMEM_BITS to cover such case.
It also suits well to handle PAE special case.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Acked-by: Minchan Kim <[email protected]>
Reviewed-by: Nitin Gupta <[email protected]>
Cc: Sergey Senozhatsky <[email protected]>
---
arch/x86/include/asm/pgtable-3level_types.h | 1 +
arch/x86/include/asm/pgtable_64_types.h | 2 ++
mm/zsmalloc.c | 13 +++++++------
3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 876b4c77d983..6a59a6d0cc50 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -44,5 +44,6 @@ typedef union {
*/
#define PTRS_PER_PTE 512

+#define MAX_POSSIBLE_PHYSMEM_BITS 36

#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6b8f73dcbc2c..7168de7d34eb 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -40,6 +40,8 @@ typedef struct { pteval_t pte; } pte_t;
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE - 1))

+#define MAX_POSSIBLE_PHYSMEM_BITS 52
+
#else /* CONFIG_X86_5LEVEL */

/*
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c3013505c305..b7f61cd1c709 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -84,18 +84,19 @@
* This is made more complicated by various memory models and PAE.
*/

-#ifndef MAX_PHYSMEM_BITS
-#ifdef CONFIG_HIGHMEM64G
-#define MAX_PHYSMEM_BITS 36
-#else /* !CONFIG_HIGHMEM64G */
+#ifndef MAX_POSSIBLE_PHYSMEM_BITS
+#ifdef MAX_PHYSMEM_BITS
+#define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS
+#else
/*
* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
* be PAGE_SHIFT
*/
-#define MAX_PHYSMEM_BITS BITS_PER_LONG
+#define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG
#endif
#endif
-#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
+
+#define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)

/*
* Memory for allocating for handle keeps object position by
--
2.15.1


2018-02-14 11:18:53

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 8/9] x86/mm: Make __VIRTUAL_MASK_SHIFT dynamic

For boot-time switching between paging modes, we need to be able to
adjust virtual mask shifts.

The change doesn't affect the kernel image size much:

text data bss dec hex filename
8628892 4734340 1368064 14731296 e0c820 vmlinux.before
8628966 4734340 1368064 14731370 e0c86a vmlinux.after

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/entry/entry_64.S | 12 ++++++++++++
arch/x86/include/asm/page_64_types.h | 2 +-
arch/x86/mm/dump_pagetables.c | 12 ++++++++++--
arch/x86/mm/kaslr.c | 4 +++-
4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index cd216c9431e1..1608b13a0b36 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -260,8 +260,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
* Change top bits to match most significant bit (47th or 56th bit
* depending on paging mode) in the address.
*/
+#ifdef CONFIG_X86_5LEVEL
+ testl $1, pgtable_l5_enabled(%rip)
+ jz 1f
+ shl $(64 - 57), %rcx
+ sar $(64 - 57), %rcx
+ jmp 2f
+1:
+ shl $(64 - 48), %rcx
+ sar $(64 - 48), %rcx
+2:
+#else
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
+#endif

/* If this changed %rcx, it was not canonical */
cmpq %rcx, %r11
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index d54a3d5b5b3b..fa7dc7cd8c19 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -56,7 +56,7 @@
#define __PHYSICAL_MASK_SHIFT 52

#ifdef CONFIG_X86_5LEVEL
-#define __VIRTUAL_MASK_SHIFT 56
+#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47)
#else
#define __VIRTUAL_MASK_SHIFT 47
#endif
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 420058b05d39..9efee6f464ab 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -85,8 +85,12 @@ static struct addr_marker address_markers[] = {
[VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
[VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
#ifdef CONFIG_KASAN
- [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
- [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
+ /*
+ * These fields get initialized with the (dynamic)
+ * KASAN_SHADOW_{START,END} values in pt_dump_init().
+ */
+ [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" },
+ [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" },
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
[LDT_NR] = { 0UL, "LDT remap" },
@@ -571,6 +575,10 @@ static int __init pt_dump_init(void)
#ifdef CONFIG_MODIFY_LDT_SYSCALL
address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
#endif
+#ifdef CONFIG_KASAN
+ address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
+ address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
+#endif
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 515b98a8ccee..d079878c6cbc 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -52,7 +52,7 @@ static __initdata struct kaslr_memory_region {
unsigned long *base;
unsigned long size_tb;
} kaslr_regions[] = {
- { &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ },
+ { &page_offset_base, 0 },
{ &vmalloc_base, VMALLOC_SIZE_TB },
{ &vmemmap_base, 1 },
};
@@ -93,6 +93,8 @@ void __init kernel_randomize_memory(void)
if (!kaslr_memory_enabled())
return;

+ kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+
/*
* Update Physical memory mapping to available and
* add padding if needed (especially for memory hotplug support).
--
2.15.1


2018-02-14 11:18:57

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 9/9] x86/mm: Adjust virtual address space layout in early boot

We need to adjust virtual address space to support switching between
paging modes.

The adjustment happens in __startup_64().

We also have to change KASLR code that doesn't expect variable
VMALLOC_SIZE_TB.

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/boot/compressed/kaslr.c | 14 ++++++++--
arch/x86/include/asm/page_64_types.h | 9 ++----
arch/x86/include/asm/pgtable_64_types.h | 25 +++++++++--------
arch/x86/kernel/head64.c | 49 +++++++++++++++++++++++++++------
arch/x86/kernel/head_64.S | 2 +-
arch/x86/mm/dump_pagetables.c | 3 ++
arch/x86/mm/kaslr.c | 11 ++++----
7 files changed, 77 insertions(+), 36 deletions(-)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index b18e8f9512de..66e42a098d70 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -47,9 +47,9 @@
#include <linux/decompress/mm.h>

#ifdef CONFIG_X86_5LEVEL
-unsigned int pgtable_l5_enabled __ro_after_init = 1;
-unsigned int pgdir_shift __ro_after_init = 48;
-unsigned int ptrs_per_p4d __ro_after_init = 512;
+unsigned int pgtable_l5_enabled __ro_after_init;
+unsigned int pgdir_shift __ro_after_init = 39;
+unsigned int ptrs_per_p4d __ro_after_init = 1;
#endif

extern unsigned long get_cmd_line_ptr(void);
@@ -729,6 +729,14 @@ void choose_random_location(unsigned long input,
return;
}

+#ifdef CONFIG_X86_5LEVEL
+ if (__read_cr4() & X86_CR4_LA57) {
+ pgtable_l5_enabled = 1;
+ pgdir_shift = 48;
+ ptrs_per_p4d = 512;
+ }
+#endif
+
boot_params->hdr.loadflags |= KASLR_FLAG;

/* Prepare to add new identity pagetables on demand. */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index fa7dc7cd8c19..2c5a966dc222 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -37,16 +37,13 @@
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
* what Xen requires.
*/
-#ifdef CONFIG_X86_5LEVEL
-#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL)
-#else
-#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
-#endif
+#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL)
+#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL)

#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
#define __PAGE_OFFSET page_offset_base
#else
-#define __PAGE_OFFSET __PAGE_OFFSET_BASE
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */

#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 59d971c85de5..68909a68e5b9 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -102,25 +102,26 @@ extern unsigned int ptrs_per_p4d;
#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)

-#ifdef CONFIG_X86_5LEVEL
-# define VMALLOC_SIZE_TB _AC(12800, UL)
-# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
-# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
-#else
-# define VMALLOC_SIZE_TB _AC(32, UL)
-# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
-# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
-#endif
+#define __VMALLOC_BASE_L4 0xffffc90000000000
+#define __VMALLOC_BASE_L5 0xffa0000000000000
+
+#define VMALLOC_SIZE_TB_L4 32UL
+#define VMALLOC_SIZE_TB_L5 12800UL
+
+#define __VMEMMAP_BASE_L4 0xffffea0000000000
+#define __VMEMMAP_BASE_L5 0xffd4000000000000

#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
# define VMALLOC_START vmalloc_base
+# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
# define VMEMMAP_START vmemmap_base
#else
-# define VMALLOC_START __VMALLOC_BASE
-# define VMEMMAP_START __VMEMMAP_BASE
+# define VMALLOC_START __VMALLOC_BASE_L4
+# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4
+# define VMEMMAP_START __VMEMMAP_BASE_L4
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */

-#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)

#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 98b0ff49b220..795e762f3c66 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -40,20 +40,20 @@ static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);

#ifdef CONFIG_X86_5LEVEL
-unsigned int pgtable_l5_enabled __ro_after_init = 1;
+unsigned int pgtable_l5_enabled __ro_after_init;
EXPORT_SYMBOL(pgtable_l5_enabled);
-unsigned int pgdir_shift __ro_after_init = 48;
+unsigned int pgdir_shift __ro_after_init = 39;
EXPORT_SYMBOL(pgdir_shift);
-unsigned int ptrs_per_p4d __ro_after_init = 512;
+unsigned int ptrs_per_p4d __ro_after_init = 1;
EXPORT_SYMBOL(ptrs_per_p4d);
#endif

#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
-unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE;
+unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
EXPORT_SYMBOL(page_offset_base);
-unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE;
+unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
EXPORT_SYMBOL(vmalloc_base);
-unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE;
+unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
EXPORT_SYMBOL(vmemmap_base);
#endif

@@ -64,10 +64,40 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
return ptr - (void *)_text + (void *)physaddr;
}

+static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr)
+{
+ return fixup_pointer(ptr, physaddr);
+}
+
+#ifdef CONFIG_X86_5LEVEL
+static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
+{
+ return fixup_pointer(ptr, physaddr);
+}
+
+static void __head check_la57_support(unsigned long physaddr)
+{
+ if (native_cpuid_eax(0) < 7)
+ return;
+
+ if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+ return;
+
+ *fixup_int(&pgtable_l5_enabled, physaddr) = 1;
+ *fixup_int(&pgdir_shift, physaddr) = 48;
+ *fixup_int(&ptrs_per_p4d, physaddr) = 512;
+ *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
+ *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5;
+ *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5;
+}
+#else
+static void __head check_la57_support(unsigned long physaddr) {}
+#endif
+
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
- unsigned long load_delta, *p;
+ unsigned long load_delta;
unsigned long pgtable_flags;
pgdval_t *pgd;
p4dval_t *p4d;
@@ -76,6 +106,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
int i;
unsigned int *next_pgt_ptr;

+ check_la57_support(physaddr);
+
/* Is the address too large? */
if (physaddr >> MAX_PHYSMEM_BITS)
for (;;);
@@ -172,8 +204,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
* Fixup phys_base - remove the memory encryption mask to obtain
* the true physical address.
*/
- p = fixup_pointer(&phys_base, physaddr);
- *p += load_delta - sme_get_me_mask();
+ *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask();

/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 04a625f0fcda..d3f8b43d541a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -41,7 +41,7 @@
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))

#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
-PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
+PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE_L4)
PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
#endif
L3_START_KERNEL = pud_index(__START_KERNEL_map)
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 9efee6f464ab..a32f0621d664 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -579,6 +579,9 @@ static int __init pt_dump_init(void)
address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
#endif
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
+#endif
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index d079878c6cbc..641169d38184 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -34,13 +34,10 @@
#define TB_SHIFT 40

/*
- * Virtual address start and end range for randomization.
- *
* The end address could depend on more configuration options to make the
* highest amount of space for randomization available, but that's too hard
* to keep straight and caused issues already.
*/
-static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;

/*
@@ -53,7 +50,7 @@ static __initdata struct kaslr_memory_region {
unsigned long size_tb;
} kaslr_regions[] = {
{ &page_offset_base, 0 },
- { &vmalloc_base, VMALLOC_SIZE_TB },
+ { &vmalloc_base, 0 },
{ &vmemmap_base, 1 },
};

@@ -76,11 +73,14 @@ static inline bool kaslr_memory_enabled(void)
void __init kernel_randomize_memory(void)
{
size_t i;
- unsigned long vaddr = vaddr_start;
+ unsigned long vaddr_start, vaddr;
unsigned long rand, memory_tb;
struct rnd_state rand_state;
unsigned long remain_entropy;

+ vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
+ vaddr = vaddr_start;
+
/*
* These BUILD_BUG_ON checks ensure the memory layout is consistent
* with the vaddr_start/vaddr_end variables. These checks are very
@@ -94,6 +94,7 @@ void __init kernel_randomize_memory(void)
return;

kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+ kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;

/*
* Update Physical memory mapping to available and
--
2.15.1


2018-02-14 11:19:15

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 6/9] x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable

For boot-time switching between 4- and 5-level paging we need to be able
to fold p4d page table level at runtime. It requires variable
PGDIR_SHIFT and PTRS_PER_P4D.

The change doesn't affect the kernel image size much:

text data bss dec hex filename
8628091 4734304 1368064 14730459 e0c4db vmlinux.before
8628393 4734340 1368064 14730797 e0c62d vmlinux.after

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/boot/compressed/kaslr.c | 2 ++
arch/x86/include/asm/pgtable_32.h | 2 ++
arch/x86/include/asm/pgtable_64_types.h | 19 ++++++++++++-------
arch/x86/kernel/cpu/mcheck/mce.c | 18 ++++++------------
arch/x86/kernel/head64.c | 6 +++++-
arch/x86/mm/dump_pagetables.c | 12 +++++-------
arch/x86/mm/init_64.c | 2 +-
arch/x86/mm/kasan_init_64.c | 2 +-
arch/x86/platform/efi/efi_64.c | 4 ++--
include/asm-generic/5level-fixup.h | 1 +
include/asm-generic/pgtable-nop4d.h | 9 +++++----
include/linux/kasan.h | 2 +-
mm/kasan/kasan_init.c | 2 +-
13 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index bd69e1830fbe..b18e8f9512de 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -48,6 +48,8 @@

#ifdef CONFIG_X86_5LEVEL
unsigned int pgtable_l5_enabled __ro_after_init = 1;
+unsigned int pgdir_shift __ro_after_init = 48;
+unsigned int ptrs_per_p4d __ro_after_init = 512;
#endif

extern unsigned long get_cmd_line_ptr(void);
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index e55466760ff8..b838c51d8c78 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -33,6 +33,8 @@ static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
void paging_init(void);

+static inline int pgd_large(pgd_t pgd) { return 0; }
+
/*
* Define this if things work differently on an i386 and an i486:
* it will (on an i486) warn about kernel memory accesses that are
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 903e4d054bcb..0c48d80e11d4 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -26,6 +26,9 @@ extern unsigned int pgtable_l5_enabled;
#define pgtable_l5_enabled 0
#endif

+extern unsigned int pgdir_shift;
+extern unsigned int ptrs_per_p4d;
+
#endif /* !__ASSEMBLY__ */

#define SHARED_KERNEL_PMD 0
@@ -35,16 +38,17 @@ extern unsigned int pgtable_l5_enabled;
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
-#define PGDIR_SHIFT 48
+#define PGDIR_SHIFT pgdir_shift
#define PTRS_PER_PGD 512

/*
* 4th level page in 5-level paging case
*/
-#define P4D_SHIFT 39
-#define PTRS_PER_P4D 512
-#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
-#define P4D_MASK (~(P4D_SIZE - 1))
+#define P4D_SHIFT 39
+#define MAX_PTRS_PER_P4D 512
+#define PTRS_PER_P4D ptrs_per_p4d
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE - 1))

#define MAX_POSSIBLE_PHYSMEM_BITS 52

@@ -53,8 +57,9 @@ extern unsigned int pgtable_l5_enabled;
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
-#define PGDIR_SHIFT 39
-#define PTRS_PER_PGD 512
+#define PGDIR_SHIFT 39
+#define PTRS_PER_PGD 512
+#define MAX_PTRS_PER_P4D 1

#endif /* CONFIG_X86_5LEVEL */

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8ff94d1e2dce..3d8c573a9a27 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1088,19 +1088,7 @@ static void mce_unmap_kpfn(unsigned long pfn)
* a legal address.
*/

-/*
- * Build time check to see if we have a spare virtual bit. Don't want
- * to leave this until run time because most developers don't have a
- * system that can exercise this code path. This will only become a
- * problem if/when we move beyond 5-level page tables.
- *
- * Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD)
- */
-#if PGDIR_SHIFT + 9 < 63
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
-#else
-#error "no unused virtual bit available"
-#endif

if (set_memory_np(decoy_addr, 1))
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
@@ -2333,6 +2321,12 @@ static __init int mcheck_init_device(void)
{
int err;

+ /*
+ * Check if we have a spare virtual bit. This will only become
+ * a problem if/when we move beyond 5-level page tables.
+ */
+ MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63);
+
if (!mce_available(&boot_cpu_data)) {
err = -EIO;
goto err_out;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 17d00d1886de..98b0ff49b220 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -42,6 +42,10 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
#ifdef CONFIG_X86_5LEVEL
unsigned int pgtable_l5_enabled __ro_after_init = 1;
EXPORT_SYMBOL(pgtable_l5_enabled);
+unsigned int pgdir_shift __ro_after_init = 48;
+EXPORT_SYMBOL(pgdir_shift);
+unsigned int ptrs_per_p4d __ro_after_init = 512;
+EXPORT_SYMBOL(ptrs_per_p4d);
#endif

#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
@@ -336,7 +340,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
- BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+ MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index a89f2dbc3531..420058b05d39 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -428,14 +428,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
#define p4d_none(a) pud_none(__pud(p4d_val(a)))
#endif

-#if PTRS_PER_P4D > 1
-
static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
{
int i;
p4d_t *start, *p4d_start;
pgprotval_t prot;

+ if (PTRS_PER_P4D == 1)
+ return walk_pud_level(m, st, __p4d(pgd_val(addr)), P);
+
p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);

for (i = 0; i < PTRS_PER_P4D; i++) {
@@ -455,11 +456,8 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
}
}

-#else
-#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p)
-#define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
-#define pgd_none(a) p4d_none(__p4d(pgd_val(a)))
-#endif
+#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
+#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))

static inline bool is_hypervisor_range(int idx)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 8b72923f1d35..c90cfb18405f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -143,7 +143,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
* With folded p4d, pgd_none() is always false, we need to
* handle synchonization on p4d level.
*/
- BUILD_BUG_ON(pgd_none(*pgd_ref));
+ MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
p4d_ref = p4d_offset(pgd_ref, addr);

if (p4d_none(*p4d_ref))
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index af6f2f9c6a26..12ec90f62457 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -19,7 +19,7 @@

extern struct range pfn_mapped[E820_MAX_ENTRIES];

-static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);

static __init void *early_alloc(size_t size, int nid, bool panic)
{
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 780460aa5ea5..d52aaa7dc088 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -257,8 +257,8 @@ void efi_sync_low_kernel_mappings(void)
* only span a single PGD entry and that the entry also maps
* other important kernel regions.
*/
- BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
- BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
+ MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
+ MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
(EFI_VA_END & PGDIR_MASK));

pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
index dfbd9d990637..9c2e0708eb82 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -8,6 +8,7 @@
#define P4D_SHIFT PGDIR_SHIFT
#define P4D_SIZE PGDIR_SIZE
#define P4D_MASK PGDIR_MASK
+#define MAX_PTRS_PER_P4D 1
#define PTRS_PER_P4D 1

#define p4d_t pgd_t
diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h
index 8f22f55de17a..1a29b2a0282b 100644
--- a/include/asm-generic/pgtable-nop4d.h
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -8,10 +8,11 @@

typedef struct { pgd_t pgd; } p4d_t;

-#define P4D_SHIFT PGDIR_SHIFT
-#define PTRS_PER_P4D 1
-#define P4D_SIZE (1UL << P4D_SHIFT)
-#define P4D_MASK (~(P4D_SIZE-1))
+#define P4D_SHIFT PGDIR_SHIFT
+#define MAX_PTRS_PER_P4D 1
+#define PTRS_PER_P4D 1
+#define P4D_SIZE (1UL << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE-1))

/*
* The "pgd_xxx()" functions here are trivial for a folded two-level
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index adc13474a53b..d6459bd1376d 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -18,7 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
extern pte_t kasan_zero_pte[PTRS_PER_PTE];
extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
extern pud_t kasan_zero_pud[PTRS_PER_PUD];
-extern p4d_t kasan_zero_p4d[PTRS_PER_P4D];
+extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D];

void kasan_populate_zero_shadow(const void *shadow_start,
const void *shadow_end);
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 554e4c0f23a2..f436246ccc79 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -31,7 +31,7 @@
unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;

#if CONFIG_PGTABLE_LEVELS > 4
-p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss;
+p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
#endif
#if CONFIG_PGTABLE_LEVELS > 3
pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
--
2.15.1


2018-02-14 11:19:22

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 7/9] x86/mm: Make MAX_PHYSADDR_BITS and MAX_PHYSMEM_BITS dynamic

For boot-time switching between paging modes, we need to be able to
adjust size of physical address space at runtime.

As part of making physical address space size variable, we have to make
X86_5LEVEL dependent on SPARSEMEM_VMEMMAP. !SPARSEMEM_VMEMMAP
configuration doesn't build with variable MAX_PHYSMEM_BITS.

For !SPARSEMEM_VMEMMAP SECTIONS_WIDTH depends on MAX_PHYSMEM_BITS:

SECTIONS_WIDTH
SECTIONS_SHIFT
MAX_PHYSMEM_BITS

And SECTIONS_WIDTH is used on pre-processor stage, it doesn't work if it's
dyncamic. See include/linux/page-flags-layout.h.

Affect on kernel image size:

text data bss dec hex filename
8628393 4734340 1368064 14730797 e0c62d vmlinux.before
8628892 4734340 1368064 14731296 e0c820 vmlinux.after

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/pgtable_64_types.h | 2 +-
arch/x86/include/asm/sparsemem.h | 9 ++-------
arch/x86/kernel/setup.c | 5 ++---
4 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f925274ddf79..0d780a3ee924 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1472,6 +1472,7 @@ config X86_PAE
config X86_5LEVEL
bool "Enable 5-level page tables support"
select DYNAMIC_MEMORY_LAYOUT
+ select SPARSEMEM_VMEMMAP
depends on X86_64
---help---
5-level paging enables access to larger address space:
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 0c48d80e11d4..59d971c85de5 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -95,7 +95,7 @@ extern unsigned int ptrs_per_p4d;
* range must not overlap with anything except the KASAN shadow area, which
* is correct as KASAN disables KASLR.
*/
-#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define MAXMEM (1UL << MAX_PHYSMEM_BITS)

#define LDT_PGD_ENTRY_L4 -3UL
#define LDT_PGD_ENTRY_L5 -112UL
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 4fc1e9d3c43e..4617a2bf123c 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,13 +27,8 @@
# endif
#else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
-# ifdef CONFIG_X86_5LEVEL
-# define MAX_PHYSADDR_BITS 52
-# define MAX_PHYSMEM_BITS 52
-# else
-# define MAX_PHYSADDR_BITS 44
-# define MAX_PHYSMEM_BITS 46
-# endif
+# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44)
+# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46)
#endif

#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1ae67e982af7..399d0f7fa8f1 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -189,9 +189,7 @@ struct ist_info ist_info;
#endif

#else
-struct cpuinfo_x86 boot_cpu_data __read_mostly = {
- .x86_phys_bits = MAX_PHYSMEM_BITS,
-};
+struct cpuinfo_x86 boot_cpu_data __read_mostly;
EXPORT_SYMBOL(boot_cpu_data);
#endif

@@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p)
__flush_tlb_all();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
+ boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS;
#endif

/*
--
2.15.1


2018-02-14 11:20:06

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 3/9] x86/mm: Make virtual memory layout movable for CONFIG_X86_5LEVEL

We need to be able to adjust virtual memory layout at runtime to be able
to switch between 4- and 5-level paging at boot-time.

KASLR already has movable __VMALLOC_BASE, __VMEMMAP_BASE and __PAGE_OFFSET.
Let's re-use it.

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/Kconfig | 8 ++++++++
arch/x86/include/asm/kaslr.h | 4 ----
arch/x86/include/asm/page_64.h | 4 ++++
arch/x86/include/asm/page_64_types.h | 4 ++--
arch/x86/include/asm/pgtable_64_types.h | 4 ++--
arch/x86/kernel/head64.c | 9 +++++++++
arch/x86/mm/kaslr.c | 8 --------
7 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a528c14d45a5..f925274ddf79 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1471,6 +1471,7 @@ config X86_PAE

config X86_5LEVEL
bool "Enable 5-level page tables support"
+ select DYNAMIC_MEMORY_LAYOUT
depends on X86_64
---help---
5-level paging enables access to larger address space:
@@ -2184,10 +2185,17 @@ config PHYSICAL_ALIGN

Don't change this unless you know what you are doing.

+config DYNAMIC_MEMORY_LAYOUT
+ bool
+ ---help---
+ This option makes base addresses of vmalloc and vmemmap as well as
+ __PAGE_OFFSET movable during boot.
+
config RANDOMIZE_MEMORY
bool "Randomize the kernel memory sections"
depends on X86_64
depends on RANDOMIZE_BASE
+ select DYNAMIC_MEMORY_LAYOUT
default RANDOMIZE_BASE
---help---
Randomizes the base virtual address of kernel memory sections
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 460991e3b529..db7ba2feb947 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -5,10 +5,6 @@
unsigned long kaslr_get_random_long(const char *purpose);

#ifdef CONFIG_RANDOMIZE_MEMORY
-extern unsigned long page_offset_base;
-extern unsigned long vmalloc_base;
-extern unsigned long vmemmap_base;
-
void kernel_randomize_memory(void);
#else
static inline void kernel_randomize_memory(void) { }
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 9ca8dae9c716..939b1cff4a7b 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -11,6 +11,10 @@
extern unsigned long max_pfn;
extern unsigned long phys_base;

+extern unsigned long page_offset_base;
+extern unsigned long vmalloc_base;
+extern unsigned long vmemmap_base;
+
static inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index f68e6526891d..d54a3d5b5b3b 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -43,11 +43,11 @@
#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
#endif

-#ifdef CONFIG_RANDOMIZE_MEMORY
+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
#define __PAGE_OFFSET page_offset_base
#else
#define __PAGE_OFFSET __PAGE_OFFSET_BASE
-#endif /* CONFIG_RANDOMIZE_MEMORY */
+#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */

#define __START_KERNEL_map _AC(0xffffffff80000000, UL)

diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 7168de7d34eb..a0db91ab63b8 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -100,13 +100,13 @@ typedef struct { pteval_t pte; } pte_t;
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif

-#ifdef CONFIG_RANDOMIZE_MEMORY
+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
# define VMALLOC_START vmalloc_base
# define VMEMMAP_START vmemmap_base
#else
# define VMALLOC_START __VMALLOC_BASE
# define VMEMMAP_START __VMEMMAP_BASE
-#endif /* CONFIG_RANDOMIZE_MEMORY */
+#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */

#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 7ba5d819ebe3..bf5c9ba63ba1 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -39,6 +39,15 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);

+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
+unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE;
+EXPORT_SYMBOL(page_offset_base);
+unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE;
+EXPORT_SYMBOL(vmalloc_base);
+unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE;
+EXPORT_SYMBOL(vmemmap_base);
+#endif
+
#define __head __section(.head.text)

static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index aedebd2ebf1e..515b98a8ccee 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -43,14 +43,6 @@
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;

-/* Default values */
-unsigned long page_offset_base = __PAGE_OFFSET_BASE;
-EXPORT_SYMBOL(page_offset_base);
-unsigned long vmalloc_base = __VMALLOC_BASE;
-EXPORT_SYMBOL(vmalloc_base);
-unsigned long vmemmap_base = __VMEMMAP_BASE;
-EXPORT_SYMBOL(vmemmap_base);
-
/*
* Memory regions randomized by KASLR (except modules that use a separate logic
* earlier during boot). The list is ordered based on virtual addresses. This
--
2.15.1


2018-02-14 11:20:14

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 5/9] x86/mm: Make LDT_BASE_ADDR dynamic

LDT_BASE_ADDR has different value in 4- and 5-level paging
configurations.

We need to make it dynamic in preparation for boot-time switching
between paging modes.

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/include/asm/pgtable_64_types.h | 9 +++++----
arch/x86/mm/dump_pagetables.c | 5 ++++-
2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 5e2d724f8f47..903e4d054bcb 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -92,18 +92,19 @@ extern unsigned int pgtable_l5_enabled;
*/
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)

+#define LDT_PGD_ENTRY_L4 -3UL
+#define LDT_PGD_ENTRY_L5 -112UL
+#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
+#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
+
#ifdef CONFIG_X86_5LEVEL
# define VMALLOC_SIZE_TB _AC(12800, UL)
# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
-# define LDT_PGD_ENTRY _AC(-112, UL)
-# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#else
# define VMALLOC_SIZE_TB _AC(32, UL)
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
-# define LDT_PGD_ENTRY _AC(-3, UL)
-# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif

#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 2a4849e92831..a89f2dbc3531 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -89,7 +89,7 @@ static struct addr_marker address_markers[] = {
[KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
- [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
+ [LDT_NR] = { 0UL, "LDT remap" },
#endif
[CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
#ifdef CONFIG_X86_ESPFIX64
@@ -570,6 +570,9 @@ static int __init pt_dump_init(void)
address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
+#endif
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
--
2.15.1


2018-02-14 11:20:55

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 1/9] x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52

__PHYSICAL_MASK_SHIFT is used to define the mask that helps to extract
physical address from a page table entry.

Although, real physical address space available may differ between
machines, it's safe to use 52 as __PHYSICAL_MASK_SHIFT. Unused bits
above log2(MAXPHYADDR) up to bit 51 are reserved and must be 0.

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/include/asm/page_64_types.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index e1407312c412..f68e6526891d 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -52,11 +52,12 @@
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)

/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#ifdef CONFIG_X86_5LEVEL
+
#define __PHYSICAL_MASK_SHIFT 52
+
+#ifdef CONFIG_X86_5LEVEL
#define __VIRTUAL_MASK_SHIFT 56
#else
-#define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 47
#endif

--
2.15.1


2018-02-14 12:11:56

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 9/9] x86/mm: Adjust virtual address space layout in early boot


* Kirill A. Shutemov <[email protected]> wrote:

> We need to adjust virtual address space to support switching between
> paging modes.
>
> The adjustment happens in __startup_64().
>
> We also have to change KASLR code that doesn't expect variable
> VMALLOC_SIZE_TB.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> ---
> arch/x86/boot/compressed/kaslr.c | 14 ++++++++--
> arch/x86/include/asm/page_64_types.h | 9 ++----
> arch/x86/include/asm/pgtable_64_types.h | 25 +++++++++--------
> arch/x86/kernel/head64.c | 49 +++++++++++++++++++++++++++------
> arch/x86/kernel/head_64.S | 2 +-
> arch/x86/mm/dump_pagetables.c | 3 ++
> arch/x86/mm/kaslr.c | 11 ++++----
> 7 files changed, 77 insertions(+), 36 deletions(-)

This is too large and risky - would it be possible to split this up into multiple,
smaller patches?

Thanks,

Ingo

2018-02-14 12:21:36

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH 9/9] x86/mm: Adjust virtual address space layout in early boot

On Wed, Feb 14, 2018 at 12:10:49PM +0000, Ingo Molnar wrote:
>
> * Kirill A. Shutemov <[email protected]> wrote:
>
> > We need to adjust virtual address space to support switching between
> > paging modes.
> >
> > The adjustment happens in __startup_64().
> >
> > We also have to change KASLR code that doesn't expect variable
> > VMALLOC_SIZE_TB.
> >
> > Signed-off-by: Kirill A. Shutemov <[email protected]>
> > ---
> > arch/x86/boot/compressed/kaslr.c | 14 ++++++++--
> > arch/x86/include/asm/page_64_types.h | 9 ++----
> > arch/x86/include/asm/pgtable_64_types.h | 25 +++++++++--------
> > arch/x86/kernel/head64.c | 49 +++++++++++++++++++++++++++------
> > arch/x86/kernel/head_64.S | 2 +-
> > arch/x86/mm/dump_pagetables.c | 3 ++
> > arch/x86/mm/kaslr.c | 11 ++++----
> > 7 files changed, 77 insertions(+), 36 deletions(-)
>
> This is too large and risky - would it be possible to split this up into multiple,
> smaller patches?

Let me check what I can do here.

If you are fine with previous patches please apply. I will send split up
of this patch separately.

--
Kirill A. Shutemov

2018-02-14 12:27:53

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 9/9] x86/mm: Adjust virtual address space layout in early boot


* Kirill A. Shutemov <[email protected]> wrote:

> On Wed, Feb 14, 2018 at 12:10:49PM +0000, Ingo Molnar wrote:
> >
> > * Kirill A. Shutemov <[email protected]> wrote:
> >
> > > We need to adjust virtual address space to support switching between
> > > paging modes.
> > >
> > > The adjustment happens in __startup_64().
> > >
> > > We also have to change KASLR code that doesn't expect variable
> > > VMALLOC_SIZE_TB.
> > >
> > > Signed-off-by: Kirill A. Shutemov <[email protected]>
> > > ---
> > > arch/x86/boot/compressed/kaslr.c | 14 ++++++++--
> > > arch/x86/include/asm/page_64_types.h | 9 ++----
> > > arch/x86/include/asm/pgtable_64_types.h | 25 +++++++++--------
> > > arch/x86/kernel/head64.c | 49 +++++++++++++++++++++++++++------
> > > arch/x86/kernel/head_64.S | 2 +-
> > > arch/x86/mm/dump_pagetables.c | 3 ++
> > > arch/x86/mm/kaslr.c | 11 ++++----
> > > 7 files changed, 77 insertions(+), 36 deletions(-)
> >
> > This is too large and risky - would it be possible to split this up into multiple,
> > smaller patches?
>
> Let me check what I can do here.
>
> If you are fine with previous patches please apply. I will send split up
> of this patch separately.

Yes, the first 8 patches are looking good and I have already applied them locally,
will push them out after testing.

Thanks,

Ingo

Subject: [tip:x86/mm] x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52

Commit-ID: b83ce5ee91471d19c403ff91227204fb37c95fb2
Gitweb: https://git.kernel.org/tip/b83ce5ee91471d19c403ff91227204fb37c95fb2
Author: Kirill A. Shutemov <[email protected]>
AuthorDate: Wed, 14 Feb 2018 14:16:48 +0300
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 14 Feb 2018 13:11:13 +0100

x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52

__PHYSICAL_MASK_SHIFT is used to define the mask that helps to extract
physical address from a page table entry.

Although real physical address space available may differ between
machines, it's safe to use 52 as __PHYSICAL_MASK_SHIFT. Unused bits
above log2(MAXPHYADDR) up to bit 51 are reserved and must be 0.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/page_64_types.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index e140731..f68e652 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -52,11 +52,12 @@
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)

/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#ifdef CONFIG_X86_5LEVEL
+
#define __PHYSICAL_MASK_SHIFT 52
+
+#ifdef CONFIG_X86_5LEVEL
#define __VIRTUAL_MASK_SHIFT 56
#else
-#define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 47
#endif


Subject: [tip:x86/mm] x86/mm: Make virtual memory layout dynamic for CONFIG_X86_5LEVEL=y

Commit-ID: eedb92abb9bb03ef21442614a6f5867eaac6e77f
Gitweb: https://git.kernel.org/tip/eedb92abb9bb03ef21442614a6f5867eaac6e77f
Author: Kirill A. Shutemov <[email protected]>
AuthorDate: Wed, 14 Feb 2018 14:16:50 +0300
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 14 Feb 2018 13:11:13 +0100

x86/mm: Make virtual memory layout dynamic for CONFIG_X86_5LEVEL=y

We need to be able to adjust virtual memory layout at runtime to be able
to switch between 4- and 5-level paging at boot-time.

KASLR already has movable __VMALLOC_BASE, __VMEMMAP_BASE and __PAGE_OFFSET.
Let's re-use it.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/Kconfig | 8 ++++++++
arch/x86/include/asm/kaslr.h | 4 ----
arch/x86/include/asm/page_64.h | 4 ++++
arch/x86/include/asm/page_64_types.h | 4 ++--
arch/x86/include/asm/pgtable_64_types.h | 4 ++--
arch/x86/kernel/head64.c | 9 +++++++++
arch/x86/mm/kaslr.c | 8 --------
7 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 63bf349..9225648 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1430,6 +1430,7 @@ config X86_PAE

config X86_5LEVEL
bool "Enable 5-level page tables support"
+ select DYNAMIC_MEMORY_LAYOUT
depends on X86_64
---help---
5-level paging enables access to larger address space:
@@ -2143,10 +2144,17 @@ config PHYSICAL_ALIGN

Don't change this unless you know what you are doing.

+config DYNAMIC_MEMORY_LAYOUT
+ bool
+ ---help---
+ This option makes base addresses of vmalloc and vmemmap as well as
+ __PAGE_OFFSET movable during boot.
+
config RANDOMIZE_MEMORY
bool "Randomize the kernel memory sections"
depends on X86_64
depends on RANDOMIZE_BASE
+ select DYNAMIC_MEMORY_LAYOUT
default RANDOMIZE_BASE
---help---
Randomizes the base virtual address of kernel memory sections
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 460991e..db7ba2f 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -5,10 +5,6 @@
unsigned long kaslr_get_random_long(const char *purpose);

#ifdef CONFIG_RANDOMIZE_MEMORY
-extern unsigned long page_offset_base;
-extern unsigned long vmalloc_base;
-extern unsigned long vmemmap_base;
-
void kernel_randomize_memory(void);
#else
static inline void kernel_randomize_memory(void) { }
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 4baa6bc..09637865 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -11,6 +11,10 @@
extern unsigned long max_pfn;
extern unsigned long phys_base;

+extern unsigned long page_offset_base;
+extern unsigned long vmalloc_base;
+extern unsigned long vmemmap_base;
+
static inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index f68e652..d54a3d5 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -43,11 +43,11 @@
#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
#endif

-#ifdef CONFIG_RANDOMIZE_MEMORY
+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
#define __PAGE_OFFSET page_offset_base
#else
#define __PAGE_OFFSET __PAGE_OFFSET_BASE
-#endif /* CONFIG_RANDOMIZE_MEMORY */
+#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */

#define __START_KERNEL_map _AC(0xffffffff80000000, UL)

diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 7168de7..a0db91a 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -100,13 +100,13 @@ typedef struct { pteval_t pte; } pte_t;
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif

-#ifdef CONFIG_RANDOMIZE_MEMORY
+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
# define VMALLOC_START vmalloc_base
# define VMEMMAP_START vmemmap_base
#else
# define VMALLOC_START __VMALLOC_BASE
# define VMEMMAP_START __VMEMMAP_BASE
-#endif /* CONFIG_RANDOMIZE_MEMORY */
+#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */

#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 7ba5d81..bf5c9ba 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -39,6 +39,15 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);

+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
+unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE;
+EXPORT_SYMBOL(page_offset_base);
+unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE;
+EXPORT_SYMBOL(vmalloc_base);
+unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE;
+EXPORT_SYMBOL(vmemmap_base);
+#endif
+
#define __head __section(.head.text)

static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index aedebd2..515b98a 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -43,14 +43,6 @@
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;

-/* Default values */
-unsigned long page_offset_base = __PAGE_OFFSET_BASE;
-EXPORT_SYMBOL(page_offset_base);
-unsigned long vmalloc_base = __VMALLOC_BASE;
-EXPORT_SYMBOL(vmalloc_base);
-unsigned long vmemmap_base = __VMEMMAP_BASE;
-EXPORT_SYMBOL(vmemmap_base);
-
/*
* Memory regions randomized by KASLR (except modules that use a separate logic
* earlier during boot). The list is ordered based on virtual addresses. This

Subject: [tip:x86/mm] mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS

Commit-ID: 02390b87a9459937cdb299e6b34ff33992512ec7
Gitweb: https://git.kernel.org/tip/02390b87a9459937cdb299e6b34ff33992512ec7
Author: Kirill A. Shutemov <[email protected]>
AuthorDate: Wed, 14 Feb 2018 14:16:49 +0300
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 14 Feb 2018 13:11:13 +0100

mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS

With boot-time switching between paging mode we will have variable
MAX_PHYSMEM_BITS.

Let's use the maximum variable possible for CONFIG_X86_5LEVEL=y
configuration to define zsmalloc data structures.

The patch introduces MAX_POSSIBLE_PHYSMEM_BITS to cover such case.
It also suits well to handle PAE special case.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Reviewed-by: Nitin Gupta <[email protected]>
Acked-by: Minchan Kim <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Sergey Senozhatsky <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/pgtable-3level_types.h | 1 +
arch/x86/include/asm/pgtable_64_types.h | 2 ++
mm/zsmalloc.c | 13 +++++++------
3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 876b4c7..6a59a6d 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -44,5 +44,6 @@ typedef union {
*/
#define PTRS_PER_PTE 512

+#define MAX_POSSIBLE_PHYSMEM_BITS 36

#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6b8f73d..7168de7 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -40,6 +40,8 @@ typedef struct { pteval_t pte; } pte_t;
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE - 1))

+#define MAX_POSSIBLE_PHYSMEM_BITS 52
+
#else /* CONFIG_X86_5LEVEL */

/*
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c301350..b7f61cd 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -84,18 +84,19 @@
* This is made more complicated by various memory models and PAE.
*/

-#ifndef MAX_PHYSMEM_BITS
-#ifdef CONFIG_HIGHMEM64G
-#define MAX_PHYSMEM_BITS 36
-#else /* !CONFIG_HIGHMEM64G */
+#ifndef MAX_POSSIBLE_PHYSMEM_BITS
+#ifdef MAX_PHYSMEM_BITS
+#define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS
+#else
/*
* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
* be PAGE_SHIFT
*/
-#define MAX_PHYSMEM_BITS BITS_PER_LONG
+#define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG
#endif
#endif
-#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
+
+#define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)

/*
* Memory for allocating for handle keeps object position by

Subject: [tip:x86/mm] x86/mm: Introduce 'pgtable_l5_enabled'

Commit-ID: e626e6bb0dfaca41487241d49ce0ae827716101a
Gitweb: https://git.kernel.org/tip/e626e6bb0dfaca41487241d49ce0ae827716101a
Author: Kirill A. Shutemov <[email protected]>
AuthorDate: Wed, 14 Feb 2018 14:16:51 +0300
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 14 Feb 2018 13:11:14 +0100

x86/mm: Introduce 'pgtable_l5_enabled'

The new flag would indicate what paging mode we are in.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/boot/compressed/kaslr.c | 4 ++++
arch/x86/include/asm/pgtable_32_types.h | 2 ++
arch/x86/include/asm/pgtable_64_types.h | 6 ++++++
arch/x86/kernel/head64.c | 5 +++++
4 files changed, 17 insertions(+)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 8199a61..bd69e18 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -46,6 +46,10 @@
#define STATIC
#include <linux/decompress/mm.h>

+#ifdef CONFIG_X86_5LEVEL
+unsigned int pgtable_l5_enabled __ro_after_init = 1;
+#endif
+
extern unsigned long get_cmd_line_ptr(void);

/* Simplified build-specific string for starting entropy. */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 0777e18..e3225e8 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -15,6 +15,8 @@
# include <asm/pgtable-2level_types.h>
#endif

+#define pgtable_l5_enabled 0
+
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))

diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index a0db91a..5e2d724 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -20,6 +20,12 @@ typedef unsigned long pgprotval_t;

typedef struct { pteval_t pte; } pte_t;

+#ifdef CONFIG_X86_5LEVEL
+extern unsigned int pgtable_l5_enabled;
+#else
+#define pgtable_l5_enabled 0
+#endif
+
#endif /* !__ASSEMBLY__ */

#define SHARED_KERNEL_PMD 0
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index bf5c9ba..17d00d1 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -39,6 +39,11 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);

+#ifdef CONFIG_X86_5LEVEL
+unsigned int pgtable_l5_enabled __ro_after_init = 1;
+EXPORT_SYMBOL(pgtable_l5_enabled);
+#endif
+
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE;
EXPORT_SYMBOL(page_offset_base);

Subject: [tip:x86/mm] x86/mm: Make LDT_BASE_ADDR dynamic

Commit-ID: 5c7919bb1994f8dc7fed219a5db09e6bb9d473a5
Gitweb: https://git.kernel.org/tip/5c7919bb1994f8dc7fed219a5db09e6bb9d473a5
Author: Kirill A. Shutemov <[email protected]>
AuthorDate: Wed, 14 Feb 2018 14:16:52 +0300
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 14 Feb 2018 13:11:14 +0100

x86/mm: Make LDT_BASE_ADDR dynamic

LDT_BASE_ADDR has different value in 4- and 5-level paging
configurations.

We need to make it dynamic in preparation for boot-time switching
between paging modes.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/pgtable_64_types.h | 9 +++++----
arch/x86/mm/dump_pagetables.c | 5 ++++-
2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 5e2d724..903e4d05 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -92,18 +92,19 @@ extern unsigned int pgtable_l5_enabled;
*/
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)

+#define LDT_PGD_ENTRY_L4 -3UL
+#define LDT_PGD_ENTRY_L5 -112UL
+#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
+#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
+
#ifdef CONFIG_X86_5LEVEL
# define VMALLOC_SIZE_TB _AC(12800, UL)
# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
-# define LDT_PGD_ENTRY _AC(-112, UL)
-# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#else
# define VMALLOC_SIZE_TB _AC(32, UL)
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
-# define LDT_PGD_ENTRY _AC(-3, UL)
-# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif

#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 2a4849e..a89f2db 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -89,7 +89,7 @@ static struct addr_marker address_markers[] = {
[KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
- [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
+ [LDT_NR] = { 0UL, "LDT remap" },
#endif
[CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
#ifdef CONFIG_X86_ESPFIX64
@@ -570,6 +570,9 @@ static int __init pt_dump_init(void)
address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
+#endif
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;

Subject: [tip:x86/mm] x86/mm: Make MAX_PHYSADDR_BITS and MAX_PHYSMEM_BITS dynamic

Commit-ID: 162434e7f58b21f0b6c9cc5fb02222cd7d9064cc
Gitweb: https://git.kernel.org/tip/162434e7f58b21f0b6c9cc5fb02222cd7d9064cc
Author: Kirill A. Shutemov <[email protected]>
AuthorDate: Wed, 14 Feb 2018 14:16:54 +0300
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 14 Feb 2018 13:11:15 +0100

x86/mm: Make MAX_PHYSADDR_BITS and MAX_PHYSMEM_BITS dynamic

For boot-time switching between paging modes, we need to be able to
adjust size of physical address space at runtime.

As part of making physical address space size variable, we have to make
X86_5LEVEL dependent on SPARSEMEM_VMEMMAP. !SPARSEMEM_VMEMMAP
configuration doesn't build with variable MAX_PHYSMEM_BITS.

For !SPARSEMEM_VMEMMAP SECTIONS_WIDTH depends on MAX_PHYSMEM_BITS:

SECTIONS_WIDTH
SECTIONS_SHIFT
MAX_PHYSMEM_BITS

And SECTIONS_WIDTH is used on pre-processor stage, it doesn't work if it's
dyncamic. See include/linux/page-flags-layout.h.

Effect on kernel image size:

text data bss dec hex filename
8628393 4734340 1368064 14730797 e0c62d vmlinux.before
8628892 4734340 1368064 14731296 e0c820 vmlinux.after

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/pgtable_64_types.h | 2 +-
arch/x86/include/asm/sparsemem.h | 9 ++-------
arch/x86/kernel/setup.c | 5 ++---
4 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9225648..fcc3f88 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1431,6 +1431,7 @@ config X86_PAE
config X86_5LEVEL
bool "Enable 5-level page tables support"
select DYNAMIC_MEMORY_LAYOUT
+ select SPARSEMEM_VMEMMAP
depends on X86_64
---help---
5-level paging enables access to larger address space:
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 0c48d80..59d971c 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -95,7 +95,7 @@ extern unsigned int ptrs_per_p4d;
* range must not overlap with anything except the KASAN shadow area, which
* is correct as KASAN disables KASLR.
*/
-#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define MAXMEM (1UL << MAX_PHYSMEM_BITS)

#define LDT_PGD_ENTRY_L4 -3UL
#define LDT_PGD_ENTRY_L5 -112UL
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 4fc1e9d..4617a2b 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,13 +27,8 @@
# endif
#else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
-# ifdef CONFIG_X86_5LEVEL
-# define MAX_PHYSADDR_BITS 52
-# define MAX_PHYSMEM_BITS 52
-# else
-# define MAX_PHYSADDR_BITS 44
-# define MAX_PHYSMEM_BITS 46
-# endif
+# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44)
+# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46)
#endif

#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1ae67e9..399d0f7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -189,9 +189,7 @@ struct ist_info ist_info;
#endif

#else
-struct cpuinfo_x86 boot_cpu_data __read_mostly = {
- .x86_phys_bits = MAX_PHYSMEM_BITS,
-};
+struct cpuinfo_x86 boot_cpu_data __read_mostly;
EXPORT_SYMBOL(boot_cpu_data);
#endif

@@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p)
__flush_tlb_all();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
+ boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS;
#endif

/*

Subject: [tip:x86/mm] x86/mm: Make __VIRTUAL_MASK_SHIFT dynamic

Commit-ID: 09e61a779e7f171c50325e6d7108a593afb2e5d4
Gitweb: https://git.kernel.org/tip/09e61a779e7f171c50325e6d7108a593afb2e5d4
Author: Kirill A. Shutemov <[email protected]>
AuthorDate: Wed, 14 Feb 2018 14:16:55 +0300
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 14 Feb 2018 13:11:15 +0100

x86/mm: Make __VIRTUAL_MASK_SHIFT dynamic

For boot-time switching between paging modes, we need to be able to
adjust virtual mask shifts.

The change doesn't affect the kernel image size much:

text data bss dec hex filename
8628892 4734340 1368064 14731296 e0c820 vmlinux.before
8628966 4734340 1368064 14731370 e0c86a vmlinux.after

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/entry/entry_64.S | 12 ++++++++++++
arch/x86/include/asm/page_64_types.h | 2 +-
arch/x86/mm/dump_pagetables.c | 12 ++++++++++--
arch/x86/mm/kaslr.c | 4 +++-
4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 30c8c53..2c06348 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -274,8 +274,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
* Change top bits to match most significant bit (47th or 56th bit
* depending on paging mode) in the address.
*/
+#ifdef CONFIG_X86_5LEVEL
+ testl $1, pgtable_l5_enabled(%rip)
+ jz 1f
+ shl $(64 - 57), %rcx
+ sar $(64 - 57), %rcx
+ jmp 2f
+1:
+ shl $(64 - 48), %rcx
+ sar $(64 - 48), %rcx
+2:
+#else
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
+#endif

/* If this changed %rcx, it was not canonical */
cmpq %rcx, %r11
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index d54a3d5..fa7dc7c 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -56,7 +56,7 @@
#define __PHYSICAL_MASK_SHIFT 52

#ifdef CONFIG_X86_5LEVEL
-#define __VIRTUAL_MASK_SHIFT 56
+#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47)
#else
#define __VIRTUAL_MASK_SHIFT 47
#endif
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 420058b..9efee6f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -85,8 +85,12 @@ static struct addr_marker address_markers[] = {
[VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
[VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
#ifdef CONFIG_KASAN
- [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
- [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
+ /*
+ * These fields get initialized with the (dynamic)
+ * KASAN_SHADOW_{START,END} values in pt_dump_init().
+ */
+ [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" },
+ [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" },
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
[LDT_NR] = { 0UL, "LDT remap" },
@@ -571,6 +575,10 @@ static int __init pt_dump_init(void)
#ifdef CONFIG_MODIFY_LDT_SYSCALL
address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
#endif
+#ifdef CONFIG_KASAN
+ address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
+ address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
+#endif
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 515b98a..d079878 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -52,7 +52,7 @@ static __initdata struct kaslr_memory_region {
unsigned long *base;
unsigned long size_tb;
} kaslr_regions[] = {
- { &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ },
+ { &page_offset_base, 0 },
{ &vmalloc_base, VMALLOC_SIZE_TB },
{ &vmemmap_base, 1 },
};
@@ -93,6 +93,8 @@ void __init kernel_randomize_memory(void)
if (!kaslr_memory_enabled())
return;

+ kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+
/*
* Update Physical memory mapping to available and
* add padding if needed (especially for memory hotplug support).

Subject: [tip:x86/mm] x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable

Commit-ID: c65e774fb3f6af212641538694b9778ff9ab4300
Gitweb: https://git.kernel.org/tip/c65e774fb3f6af212641538694b9778ff9ab4300
Author: Kirill A. Shutemov <[email protected]>
AuthorDate: Wed, 14 Feb 2018 14:16:53 +0300
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 14 Feb 2018 13:11:14 +0100

x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable

For boot-time switching between 4- and 5-level paging we need to be able
to fold p4d page table level at runtime. It requires variable
PGDIR_SHIFT and PTRS_PER_P4D.

The change doesn't affect the kernel image size much:

text data bss dec hex filename
8628091 4734304 1368064 14730459 e0c4db vmlinux.before
8628393 4734340 1368064 14730797 e0c62d vmlinux.after

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/boot/compressed/kaslr.c | 2 ++
arch/x86/include/asm/pgtable_32.h | 2 ++
arch/x86/include/asm/pgtable_64_types.h | 19 ++++++++++++-------
arch/x86/kernel/cpu/mcheck/mce.c | 18 ++++++------------
arch/x86/kernel/head64.c | 6 +++++-
arch/x86/mm/dump_pagetables.c | 12 +++++-------
arch/x86/mm/init_64.c | 2 +-
arch/x86/mm/kasan_init_64.c | 2 +-
arch/x86/platform/efi/efi_64.c | 4 ++--
include/asm-generic/5level-fixup.h | 1 +
include/asm-generic/pgtable-nop4d.h | 9 +++++----
include/linux/kasan.h | 2 +-
mm/kasan/kasan_init.c | 2 +-
13 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index bd69e18..b18e8f9 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -48,6 +48,8 @@

#ifdef CONFIG_X86_5LEVEL
unsigned int pgtable_l5_enabled __ro_after_init = 1;
+unsigned int pgdir_shift __ro_after_init = 48;
+unsigned int ptrs_per_p4d __ro_after_init = 512;
#endif

extern unsigned long get_cmd_line_ptr(void);
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index e67c062..d829360 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -33,6 +33,8 @@ static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
void paging_init(void);

+static inline int pgd_large(pgd_t pgd) { return 0; }
+
/*
* Define this if things work differently on an i386 and an i486:
* it will (on an i486) warn about kernel memory accesses that are
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 903e4d05..0c48d80 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -26,6 +26,9 @@ extern unsigned int pgtable_l5_enabled;
#define pgtable_l5_enabled 0
#endif

+extern unsigned int pgdir_shift;
+extern unsigned int ptrs_per_p4d;
+
#endif /* !__ASSEMBLY__ */

#define SHARED_KERNEL_PMD 0
@@ -35,16 +38,17 @@ extern unsigned int pgtable_l5_enabled;
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
-#define PGDIR_SHIFT 48
+#define PGDIR_SHIFT pgdir_shift
#define PTRS_PER_PGD 512

/*
* 4th level page in 5-level paging case
*/
-#define P4D_SHIFT 39
-#define PTRS_PER_P4D 512
-#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
-#define P4D_MASK (~(P4D_SIZE - 1))
+#define P4D_SHIFT 39
+#define MAX_PTRS_PER_P4D 512
+#define PTRS_PER_P4D ptrs_per_p4d
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE - 1))

#define MAX_POSSIBLE_PHYSMEM_BITS 52

@@ -53,8 +57,9 @@ extern unsigned int pgtable_l5_enabled;
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
-#define PGDIR_SHIFT 39
-#define PTRS_PER_PGD 512
+#define PGDIR_SHIFT 39
+#define PTRS_PER_PGD 512
+#define MAX_PTRS_PER_P4D 1

#endif /* CONFIG_X86_5LEVEL */

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3a8e88a..cbb3af7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1082,19 +1082,7 @@ void arch_unmap_kpfn(unsigned long pfn)
* a legal address.
*/

-/*
- * Build time check to see if we have a spare virtual bit. Don't want
- * to leave this until run time because most developers don't have a
- * system that can exercise this code path. This will only become a
- * problem if/when we move beyond 5-level page tables.
- *
- * Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD)
- */
-#if PGDIR_SHIFT + 9 < 63
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
-#else
-#error "no unused virtual bit available"
-#endif

if (set_memory_np(decoy_addr, 1))
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
@@ -2328,6 +2316,12 @@ static __init int mcheck_init_device(void)
{
int err;

+ /*
+ * Check if we have a spare virtual bit. This will only become
+ * a problem if/when we move beyond 5-level page tables.
+ */
+ MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63);
+
if (!mce_available(&boot_cpu_data)) {
err = -EIO;
goto err_out;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 17d00d1..98b0ff4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -42,6 +42,10 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
#ifdef CONFIG_X86_5LEVEL
unsigned int pgtable_l5_enabled __ro_after_init = 1;
EXPORT_SYMBOL(pgtable_l5_enabled);
+unsigned int pgdir_shift __ro_after_init = 48;
+EXPORT_SYMBOL(pgdir_shift);
+unsigned int ptrs_per_p4d __ro_after_init = 512;
+EXPORT_SYMBOL(ptrs_per_p4d);
#endif

#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
@@ -336,7 +340,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
- BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+ MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index a89f2db..420058b 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -428,14 +428,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
#define p4d_none(a) pud_none(__pud(p4d_val(a)))
#endif

-#if PTRS_PER_P4D > 1
-
static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
{
int i;
p4d_t *start, *p4d_start;
pgprotval_t prot;

+ if (PTRS_PER_P4D == 1)
+ return walk_pud_level(m, st, __p4d(pgd_val(addr)), P);
+
p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);

for (i = 0; i < PTRS_PER_P4D; i++) {
@@ -455,11 +456,8 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
}
}

-#else
-#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p)
-#define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
-#define pgd_none(a) p4d_none(__p4d(pgd_val(a)))
-#endif
+#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
+#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))

static inline bool is_hypervisor_range(int idx)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ab42c8..6a4b20b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -143,7 +143,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
* With folded p4d, pgd_none() is always false, we need to
* handle synchonization on p4d level.
*/
- BUILD_BUG_ON(pgd_none(*pgd_ref));
+ MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
p4d_ref = p4d_offset(pgd_ref, addr);

if (p4d_none(*p4d_ref))
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index af6f2f9..12ec90f 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -19,7 +19,7 @@

extern struct range pfn_mapped[E820_MAX_ENTRIES];

-static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);

static __init void *early_alloc(size_t size, int nid, bool panic)
{
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 780460a..d52aaa7 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -257,8 +257,8 @@ void efi_sync_low_kernel_mappings(void)
* only span a single PGD entry and that the entry also maps
* other important kernel regions.
*/
- BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
- BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
+ MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
+ MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
(EFI_VA_END & PGDIR_MASK));

pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
index dfbd9d9..9c2e070 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -8,6 +8,7 @@
#define P4D_SHIFT PGDIR_SHIFT
#define P4D_SIZE PGDIR_SIZE
#define P4D_MASK PGDIR_MASK
+#define MAX_PTRS_PER_P4D 1
#define PTRS_PER_P4D 1

#define p4d_t pgd_t
diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h
index 8f22f55..1a29b2a 100644
--- a/include/asm-generic/pgtable-nop4d.h
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -8,10 +8,11 @@

typedef struct { pgd_t pgd; } p4d_t;

-#define P4D_SHIFT PGDIR_SHIFT
-#define PTRS_PER_P4D 1
-#define P4D_SIZE (1UL << P4D_SHIFT)
-#define P4D_MASK (~(P4D_SIZE-1))
+#define P4D_SHIFT PGDIR_SHIFT
+#define MAX_PTRS_PER_P4D 1
+#define PTRS_PER_P4D 1
+#define P4D_SIZE (1UL << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE-1))

/*
* The "pgd_xxx()" functions here are trivial for a folded two-level
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index adc1347..d6459bd 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -18,7 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
extern pte_t kasan_zero_pte[PTRS_PER_PTE];
extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
extern pud_t kasan_zero_pud[PTRS_PER_PUD];
-extern p4d_t kasan_zero_p4d[PTRS_PER_P4D];
+extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D];

void kasan_populate_zero_shadow(const void *shadow_start,
const void *shadow_end);
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 554e4c0..f436246 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -31,7 +31,7 @@
unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;

#if CONFIG_PGTABLE_LEVELS > 4
-p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss;
+p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
#endif
#if CONFIG_PGTABLE_LEVELS > 3
pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;

2018-02-14 13:34:08

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH 5/9] x86/mm: Make LDT_BASE_ADDR dynamic

On Wed, Feb 14, 2018 at 11:16:52AM +0000, Kirill A. Shutemov wrote:
> LDT_BASE_ADDR has different value in 4- and 5-level paging
> configurations.
>
> We need to make it dynamic in preparation for boot-time switching
> between paging modes.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>

I've just realized that the patch that I'm splitting has hunk that belong
here.

Could you fold it in?

Sorry for this.

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 9efee6f464ab..a32f0621d664 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -579,6 +579,9 @@ static int __init pt_dump_init(void)
address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
#endif
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
+#endif
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
--
Kirill A. Shutemov

2018-02-14 17:56:50

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH 8/9] x86/mm: Make __VIRTUAL_MASK_SHIFT dynamic

On Wed, Feb 14, 2018 at 05:22:58PM +0000, Andy Lutomirski wrote:
> On Wed, Feb 14, 2018 at 11:16 AM, Kirill A. Shutemov
> <[email protected]> wrote:
> > For boot-time switching between paging modes, we need to be able to
> > adjust virtual mask shifts.
> >
> > The change doesn't affect the kernel image size much:
> >
> > text data bss dec hex filename
> > 8628892 4734340 1368064 14731296 e0c820 vmlinux.before
> > 8628966 4734340 1368064 14731370 e0c86a vmlinux.after
> >
> > Signed-off-by: Kirill A. Shutemov <[email protected]>
> > ---
> > arch/x86/entry/entry_64.S | 12 ++++++++++++
> > arch/x86/include/asm/page_64_types.h | 2 +-
> > arch/x86/mm/dump_pagetables.c | 12 ++++++++++--
> > arch/x86/mm/kaslr.c | 4 +++-
> > 4 files changed, 26 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> > index cd216c9431e1..1608b13a0b36 100644
> > --- a/arch/x86/entry/entry_64.S
> > +++ b/arch/x86/entry/entry_64.S
> > @@ -260,8 +260,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
> > * Change top bits to match most significant bit (47th or 56th bit
> > * depending on paging mode) in the address.
> > */
> > +#ifdef CONFIG_X86_5LEVEL
> > + testl $1, pgtable_l5_enabled(%rip)
> > + jz 1f
> > + shl $(64 - 57), %rcx
> > + sar $(64 - 57), %rcx
> > + jmp 2f
> > +1:
> > + shl $(64 - 48), %rcx
> > + sar $(64 - 48), %rcx
> > +2:
> > +#else
> > shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> > sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> > +#endif
>
> Eww.
>
> Can't this be ALTERNATIVE "shl ... sar ...", "shl ... sar ...",
> X86_FEATURE_5LEVEL or similar?

Optimization comes in a separate patch:

https://git.kernel.org/pub/scm/linux/kernel/git/kas/linux.git/commit/?h=la57/boot-switching/wip&id=015fa3576a7f2b8bd271096bb3a12b06cdc845af

--
Kirill A. Shutemov

2018-02-14 20:11:20

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH 8/9] x86/mm: Make __VIRTUAL_MASK_SHIFT dynamic

On Wed, Feb 14, 2018 at 5:55 PM, Kirill A. Shutemov
<[email protected]> wrote:
> On Wed, Feb 14, 2018 at 05:22:58PM +0000, Andy Lutomirski wrote:
>> On Wed, Feb 14, 2018 at 11:16 AM, Kirill A. Shutemov
>> <[email protected]> wrote:
>> > For boot-time switching between paging modes, we need to be able to
>> > adjust virtual mask shifts.
>> >
>> > The change doesn't affect the kernel image size much:
>> >
>> > text data bss dec hex filename
>> > 8628892 4734340 1368064 14731296 e0c820 vmlinux.before
>> > 8628966 4734340 1368064 14731370 e0c86a vmlinux.after
>> >
>> > Signed-off-by: Kirill A. Shutemov <[email protected]>
>> > ---
>> > arch/x86/entry/entry_64.S | 12 ++++++++++++
>> > arch/x86/include/asm/page_64_types.h | 2 +-
>> > arch/x86/mm/dump_pagetables.c | 12 ++++++++++--
>> > arch/x86/mm/kaslr.c | 4 +++-
>> > 4 files changed, 26 insertions(+), 4 deletions(-)
>> >
>> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>> > index cd216c9431e1..1608b13a0b36 100644
>> > --- a/arch/x86/entry/entry_64.S
>> > +++ b/arch/x86/entry/entry_64.S
>> > @@ -260,8 +260,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
>> > * Change top bits to match most significant bit (47th or 56th bit
>> > * depending on paging mode) in the address.
>> > */
>> > +#ifdef CONFIG_X86_5LEVEL
>> > + testl $1, pgtable_l5_enabled(%rip)
>> > + jz 1f
>> > + shl $(64 - 57), %rcx
>> > + sar $(64 - 57), %rcx
>> > + jmp 2f
>> > +1:
>> > + shl $(64 - 48), %rcx
>> > + sar $(64 - 48), %rcx
>> > +2:
>> > +#else
>> > shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
>> > sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
>> > +#endif
>>
>> Eww.
>>
>> Can't this be ALTERNATIVE "shl ... sar ...", "shl ... sar ...",
>> X86_FEATURE_5LEVEL or similar?
>
> Optimization comes in a separate patch:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/kas/linux.git/commit/?h=la57/boot-switching/wip&id=015fa3576a7f2b8bd271096bb3a12b06cdc845af
>

Nice!

2018-02-14 20:11:22

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH 8/9] x86/mm: Make __VIRTUAL_MASK_SHIFT dynamic

On Wed, Feb 14, 2018 at 11:16 AM, Kirill A. Shutemov
<[email protected]> wrote:
> For boot-time switching between paging modes, we need to be able to
> adjust virtual mask shifts.
>
> The change doesn't affect the kernel image size much:
>
> text data bss dec hex filename
> 8628892 4734340 1368064 14731296 e0c820 vmlinux.before
> 8628966 4734340 1368064 14731370 e0c86a vmlinux.after
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> ---
> arch/x86/entry/entry_64.S | 12 ++++++++++++
> arch/x86/include/asm/page_64_types.h | 2 +-
> arch/x86/mm/dump_pagetables.c | 12 ++++++++++--
> arch/x86/mm/kaslr.c | 4 +++-
> 4 files changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index cd216c9431e1..1608b13a0b36 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -260,8 +260,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
> * Change top bits to match most significant bit (47th or 56th bit
> * depending on paging mode) in the address.
> */
> +#ifdef CONFIG_X86_5LEVEL
> + testl $1, pgtable_l5_enabled(%rip)
> + jz 1f
> + shl $(64 - 57), %rcx
> + sar $(64 - 57), %rcx
> + jmp 2f
> +1:
> + shl $(64 - 48), %rcx
> + sar $(64 - 48), %rcx
> +2:
> +#else
> shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> +#endif

Eww.

Can't this be ALTERNATIVE "shl ... sar ...", "shl ... sar ...",
X86_FEATURE_5LEVEL or similar?

--Andy