2008-02-01 09:53:42

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [0/12] GBPAGES patchkit for 2.6.25 v3


This patchkit implements support for the 1GB pages of AMD Fam10h CPUs
in the kernel direct mapping.

Change to previous versions:
- Incorporated feedback from Thomas Gleixner
- Switch direct mapping setup over to set_pte() / pte_pfn()
- Split up patches some more

Includes one not strictly required cleanup patch now (the last one)

I believe this patchkit is ready for the 2.6.25 merge.

-Andi


2008-02-01 09:53:29

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [1/12] GBPAGES: Add feature macros for the gbpages cpuid bit


Signed-off-by: Andi Kleen <[email protected]>

---
include/asm-x86/cpufeature.h | 2 ++
1 file changed, 2 insertions(+)

Index: linux/include/asm-x86/cpufeature.h
===================================================================
--- linux.orig/include/asm-x86/cpufeature.h
+++ linux/include/asm-x86/cpufeature.h
@@ -49,6 +49,7 @@
#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_GBPAGES (1*32+26) /* GB pages */
#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
@@ -175,6 +176,7 @@
#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS)
#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH)
#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS)
+#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES)

#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1

2008-02-01 09:53:55

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [2/12] GBPAGES: Rename LARGE_PAGE_SIZE to PMD_PAGE_SIZE


Fix up all users.

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/boot/compressed/head_64.S | 8 ++++----
arch/x86/kernel/head_64.S | 4 ++--
arch/x86/kernel/pci-gart_64.c | 2 +-
arch/x86/mm/init_64.c | 6 +++---
arch/x86/mm/pageattr.c | 2 +-
include/asm-x86/page.h | 4 ++--
6 files changed, 13 insertions(+), 13 deletions(-)

Index: linux/arch/x86/boot/compressed/head_64.S
===================================================================
--- linux.orig/arch/x86/boot/compressed/head_64.S
+++ linux/arch/x86/boot/compressed/head_64.S
@@ -80,8 +80,8 @@ startup_32:

#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
- addl $(LARGE_PAGE_SIZE -1), %ebx
- andl $LARGE_PAGE_MASK, %ebx
+ addl $(PMD_PAGE_SIZE -1), %ebx
+ andl $PMD_PAGE_MASK, %ebx
#else
movl $CONFIG_PHYSICAL_START, %ebx
#endif
@@ -220,8 +220,8 @@ ENTRY(startup_64)
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
- addq $(LARGE_PAGE_SIZE - 1), %rbp
- andq $LARGE_PAGE_MASK, %rbp
+ addq $(PMD_PAGE_SIZE - 1), %rbp
+ andq $PMD_PAGE_MASK, %rbp
movq %rbp, %rbx
#else
movq $CONFIG_PHYSICAL_START, %rbp
Index: linux/arch/x86/kernel/pci-gart_64.c
===================================================================
--- linux.orig/arch/x86/kernel/pci-gart_64.c
+++ linux/arch/x86/kernel/pci-gart_64.c
@@ -501,7 +501,7 @@ static __init unsigned long check_iommu_
}

a = aper + iommu_size;
- iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
+ iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;

if (iommu_size < 64*1024*1024) {
printk(KERN_WARNING
Index: linux/arch/x86/kernel/head_64.S
===================================================================
--- linux.orig/arch/x86/kernel/head_64.S
+++ linux/arch/x86/kernel/head_64.S
@@ -63,7 +63,7 @@ startup_64:

/* Is the address not 2M aligned? */
movq %rbp, %rax
- andl $~LARGE_PAGE_MASK, %eax
+ andl $~PMD_PAGE_MASK, %eax
testl %eax, %eax
jnz bad_address

@@ -88,7 +88,7 @@ startup_64:

/* Add an Identity mapping if I am above 1G */
leaq _text(%rip), %rdi
- andq $LARGE_PAGE_MASK, %rdi
+ andq $PMD_PAGE_MASK, %rdi

movq %rdi, %rax
shrq $PUD_SHIFT, %rax
Index: linux/arch/x86/mm/init_64.c
===================================================================
--- linux.orig/arch/x86/mm/init_64.c
+++ linux/arch/x86/mm/init_64.c
@@ -449,10 +449,10 @@ void __init clear_kernel_mapping(unsigne
{
unsigned long end = address + size;

- BUG_ON(address & ~LARGE_PAGE_MASK);
- BUG_ON(size & ~LARGE_PAGE_MASK);
+ BUG_ON(address & ~PMD_PAGE_MASK);
+ BUG_ON(size & ~PMD_PAGE_MASK);

- for (; address < end; address += LARGE_PAGE_SIZE) {
+ for (; address < end; address += PMD_PAGE_SIZE) {
pgd_t *pgd = pgd_offset_k(address);
pud_t *pud;
pmd_t *pmd;
Index: linux/include/asm-x86/page.h
===================================================================
--- linux.orig/include/asm-x86/page.h
+++ linux/include/asm-x86/page.h
@@ -13,8 +13,8 @@
#define PHYSICAL_PAGE_MASK (PAGE_MASK & __PHYSICAL_MASK)
#define PTE_MASK (_AT(long, PHYSICAL_PAGE_MASK))

-#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT)
-#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
+#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
+#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))

#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -219,7 +219,7 @@ static int split_large_page(pte_t *kpte,
}

address = __pa(address);
- addr = address & LARGE_PAGE_MASK;
+ addr = address & PMD_PAGE_MASK;
pbase = (pte_t *)page_address(base);
#ifdef CONFIG_X86_32
paravirt_alloc_pt(&init_mm, page_to_pfn(base));

2008-02-01 09:54:18

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [3/12] GBPAGES: Add PUD_PAGE_SIZE


Signed-off-by: Andi Kleen <[email protected]>

---
include/asm-x86/page_64.h | 3 +++
1 file changed, 3 insertions(+)

Index: linux/include/asm-x86/page_64.h
===================================================================
--- linux.orig/include/asm-x86/page_64.h
+++ linux/include/asm-x86/page_64.h
@@ -23,6 +23,9 @@
#define MCE_STACK 5
#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */

+#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
+
#define __PAGE_OFFSET _AC(0xffff810000000000, UL)

#define __PHYSICAL_START CONFIG_PHYSICAL_START

2008-02-01 09:54:33

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [4/12] Add pgtable accessor functions for GB pages


Signed-off-by: Andi Kleen <[email protected]>

---
include/asm-x86/pgtable_32.h | 2 ++
include/asm-x86/pgtable_64.h | 6 ++++++
2 files changed, 8 insertions(+)

Index: linux/include/asm-x86/pgtable_64.h
===================================================================
--- linux.orig/include/asm-x86/pgtable_64.h
+++ linux/include/asm-x86/pgtable_64.h
@@ -199,6 +199,12 @@ static inline unsigned long pmd_bad(pmd_
#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
#define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT)

+static inline int pud_large(pud_t pte)
+{
+ return (pud_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
+ (_PAGE_PSE|_PAGE_PRESENT);
+}
+
/* PMD - Level 2 access */
#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
Index: linux/include/asm-x86/pgtable_32.h
===================================================================
--- linux.orig/include/asm-x86/pgtable_32.h
+++ linux/include/asm-x86/pgtable_32.h
@@ -148,6 +148,8 @@ static inline void clone_pgd_range(pgd_t
*/
#define pgd_offset_k(address) pgd_offset(&init_mm, address)

+static inline int pud_large(pud_t pud) { return 0; }
+
/*
* the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
*

2008-02-01 09:54:48

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [5/12] GBPAGES: Support gbpages in pagetable dump


Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/fault.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

Index: linux/arch/x86/mm/fault.c
===================================================================
--- linux.orig/arch/x86/mm/fault.c
+++ linux/arch/x86/mm/fault.c
@@ -240,7 +240,8 @@ void dump_pagetable(unsigned long addres
pud = pud_offset(pgd, address);
if (bad_address(pud)) goto bad;
printk("PUD %lx ", pud_val(*pud));
- if (!pud_present(*pud)) goto ret;
+ if (!pud_present(*pud) || pud_large(*pud))
+ goto ret;

pmd = pmd_offset(pud, address);
if (bad_address(pmd)) goto bad;

2008-02-01 09:55:14

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [6/12] GBPAGES: Add gbpages support to lookup_address


Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/pageattr.c | 5 +++++
1 file changed, 5 insertions(+)

Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -155,6 +155,11 @@ pte_t *lookup_address(unsigned long addr
pud = pud_offset(pgd, address);
if (pud_none(*pud))
return NULL;
+
+ *level = PG_LEVEL_1G;
+ if (pud_large(*pud))
+ return (pte_t *)pud;
+
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
return NULL;

2008-02-01 09:55:39

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [7/12] GBPAGES: Add an option to disable direct mapping gbpages and a global variable


Signed-off-by: Andi Kleen <[email protected]>

---
Documentation/x86_64/boot-options.txt | 5 +++++
arch/x86/mm/init_64.c | 16 ++++++++++++++++
include/asm-x86/pgtable_64.h | 7 +++++++
3 files changed, 28 insertions(+)

Index: linux/arch/x86/mm/init_64.c
===================================================================
--- linux.orig/arch/x86/mm/init_64.c
+++ linux/arch/x86/mm/init_64.c
@@ -53,6 +53,22 @@ static unsigned long dma_reserve __initd

DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);

+enum gbopt direct_gbpages __meminitdata = GBP_DEFAULT;
+
+static int __init parse_direct_gbpages(char *arg)
+{
+ if (!strcmp(arg, "off")) {
+ direct_gbpages = GBP_OFF;
+ return 0;
+ }
+ if (!strcmp(arg, "on")) {
+ direct_gbpages = GBP_ON;
+ return 0;
+ }
+ return -1;
+}
+early_param("direct_gbpages", parse_direct_gbpages);
+
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move
Index: linux/include/asm-x86/pgtable_64.h
===================================================================
--- linux.orig/include/asm-x86/pgtable_64.h
+++ linux/include/asm-x86/pgtable_64.h
@@ -239,6 +239,13 @@ static inline int pud_large(pud_t pte)

#define update_mmu_cache(vma,address,pte) do { } while (0)

+enum gbopt {
+ GBP_OFF = -1,
+ GBP_DEFAULT,
+ GBP_ON
+};
+extern enum gbopt direct_gbpages;
+
/* Encode and de-code a swap entry */
#define __swp_type(x) (((x).val >> 1) & 0x3f)
#define __swp_offset(x) ((x).val >> 8)
Index: linux/Documentation/x86_64/boot-options.txt
===================================================================
--- linux.orig/Documentation/x86_64/boot-options.txt
+++ linux/Documentation/x86_64/boot-options.txt
@@ -307,3 +307,8 @@ Debugging
stuck (default)

Miscellaneous
+
+ direct_gbpages=off|on
+ off: Do not use GB pages for kernel direct mapping.
+ on: Use GB pages if hardware supports it, but some heuristic
+ would turn them off.

2008-02-01 09:55:56

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [9/12] GBPAGES: Switch pci-gart over to using cpa instead of clear_kernel_mapping


pci-gart needs to unmap the IOMMU aperture to prevent cache corruptions.

Switch this over to using cpa instead of clear_kernel_mapping()

Drawback right now is that cpa will split everything down to 4k
pages, and this costs 4K for each 2MB of aperture; 128KB for a typical
64MB aperture.

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/kernel/pci-gart_64.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

Index: linux/arch/x86/kernel/pci-gart_64.c
===================================================================
--- linux.orig/arch/x86/kernel/pci-gart_64.c
+++ linux/arch/x86/kernel/pci-gart_64.c
@@ -731,7 +731,8 @@ void __init gart_iommu_init(void)
* the backing memory. The GART address is only used by PCI
* devices.
*/
- clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
+ set_memory_np((unsigned long)__va(iommu_bus_base),
+ iommu_size >> PAGE_SHIFT);

/*
* Try to workaround a bug (thanks to BenH)

2008-02-01 09:56:27

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [8/12] GBPAGES: Implement gbpages support in change_page_attr()


Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/pageattr.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)

Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -203,6 +203,7 @@ static int split_large_page(pte_t *kpte,
pte_t *pbase, *tmp;
struct page *base;
unsigned int i, level;
+ unsigned long ps;

#ifdef CONFIG_DEBUG_PAGEALLOC
gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN;
@@ -225,13 +226,23 @@ static int split_large_page(pte_t *kpte,

address = __pa(address);
addr = address & PMD_PAGE_MASK;
+
+ ps = PAGE_SIZE;
+#ifdef CONFIG_X86_64
+ if (level == PG_LEVEL_1G) {
+ ps = PMD_PAGE_SIZE;
+ pgprot_val(ref_prot) |= _PAGE_PSE;
+ addr &= PUD_PAGE_MASK;
+ }
+#endif
+
pbase = (pte_t *)page_address(base);
#ifdef CONFIG_X86_32
paravirt_alloc_pt(&init_mm, page_to_pfn(base));
#endif

pgprot_val(ref_prot) &= ~_PAGE_NX;
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
+ for (i = 0; i < PTRS_PER_PTE; i++, addr += ps)
set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));

/*

2008-02-01 09:56:45

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [10/12] GBPAGES: Remove now unused clear_kernel_mapping


Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/init_64.c | 43 -------------------------------------------
include/asm-x86/pgtable_64.h | 1 -
2 files changed, 44 deletions(-)

Index: linux/arch/x86/mm/init_64.c
===================================================================
--- linux.orig/arch/x86/mm/init_64.c
+++ linux/arch/x86/mm/init_64.c
@@ -456,49 +456,6 @@ void __init paging_init(void)
#endif

/*
- * Unmap a kernel mapping if it exists. This is useful to avoid
- * prefetches from the CPU leading to inconsistent cache lines.
- * address and size must be aligned to 2MB boundaries.
- * Does nothing when the mapping doesn't exist.
- */
-void __init clear_kernel_mapping(unsigned long address, unsigned long size)
-{
- unsigned long end = address + size;
-
- BUG_ON(address & ~PMD_PAGE_MASK);
- BUG_ON(size & ~PMD_PAGE_MASK);
-
- for (; address < end; address += PMD_PAGE_SIZE) {
- pgd_t *pgd = pgd_offset_k(address);
- pud_t *pud;
- pmd_t *pmd;
-
- if (pgd_none(*pgd))
- continue;
-
- pud = pud_offset(pgd, address);
- if (pud_none(*pud))
- continue;
-
- pmd = pmd_offset(pud, address);
- if (!pmd || pmd_none(*pmd))
- continue;
-
- if (!(pmd_val(*pmd) & _PAGE_PSE)) {
- /*
- * Could handle this, but it should not happen
- * currently:
- */
- printk(KERN_ERR "clear_kernel_mapping: "
- "mapping has been split. will leak memory\n");
- pmd_ERROR(*pmd);
- }
- set_pmd(pmd, __pmd(0));
- }
- __flush_tlb_all();
-}
-
-/*
* Memory hotplug specific functions
*/
void online_page(struct page *page)
Index: linux/include/asm-x86/pgtable_64.h
===================================================================
--- linux.orig/include/asm-x86/pgtable_64.h
+++ linux/include/asm-x86/pgtable_64.h
@@ -21,7 +21,6 @@ extern pgd_t init_level4_pgt[];
#define swapper_pg_dir init_level4_pgt

extern void paging_init(void);
-extern void clear_kernel_mapping(unsigned long addr, unsigned long size);

#endif /* !__ASSEMBLY__ */

2008-02-01 09:57:16

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [11/12] GBPAGES: Do kernel direct mapping at boot using GB pages


The AMD Fam10h CPUs support new Gigabyte page table entry for
mapping 1GB at a time. Use this for the kernel direct mapping.

Only done for 64bit because i386 does not support GB page tables.

This only applies to the data portion of the direct mapping; the
kernel text mapping stays with 2MB pages because the AMD Fam10h
microarchitecture does not support GB ITLBs and AMD recommends
against using GB mappings for code.

GBpage are disabled when DEBUG_PAGEALLOC is enabled, because
DEBUG_PAGEALLOC causes recursion in cpa() and with gbpages
the potential max recursion depth is much deeper.

Can be disabled with direct_gbpages=off

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/init_64.c | 35 ++++++++++++++++++++++++++++++-----
1 file changed, 30 insertions(+), 5 deletions(-)

Index: linux/arch/x86/mm/init_64.c
===================================================================
--- linux.orig/arch/x86/mm/init_64.c
+++ linux/arch/x86/mm/init_64.c
@@ -339,7 +339,14 @@ phys_pud_init(pud_t *pud_page, unsigned
}

if (pud_val(*pud)) {
- phys_pmd_update(pud, addr, end);
+ if (!pud_large(*pud))
+ phys_pmd_update(pud, addr, end);
+ continue;
+ }
+
+ if (direct_gbpages == GBP_ON) {
+ set_pte((pte_t *)pud,
+ pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
continue;
}

@@ -360,9 +367,11 @@ static void __init find_early_table_spac
unsigned long puds, pmds, tables, start;

puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
- tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
- round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+ tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
+ if (direct_gbpages == GBP_ON) {
+ pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+ tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+ }

/*
* RED-PEN putting page tables only on node 0 could
@@ -389,6 +398,20 @@ static void __init find_early_table_spac
(table_start << PAGE_SHIFT) + tables);
}

+static void __init init_gbpages(void)
+{
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /* debug pagealloc causes too much recursion with gbpages */
+ if (direct_gbpages != GBP_DEFAULT)
+ return;
+#endif
+ if (direct_gbpages != GBP_OFF && cpu_has_gbpages) {
+ printk(KERN_INFO "Using GB pages for direct mapping\n");
+ direct_gbpages = GBP_ON;
+ } else
+ direct_gbpages = GBP_OFF;
+}
+
/*
* Setup the direct mapping of the physical memory at PAGE_OFFSET.
* This runs before bootmem is initialized and gets pages directly from
@@ -407,8 +430,10 @@ void __init_refok init_memory_mapping(un
* memory mapped. Unfortunately this is done currently before the
* nodes are discovered.
*/
- if (!after_bootmem)
+ if (!after_bootmem) {
+ init_gbpages();
find_early_table_space(end);
+ }

start = (unsigned long)__va(start);
end = (unsigned long)__va(end);

2008-02-01 09:57:38

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [12/12] GBPAGES: Switch direct mapping setup over to set_pte


[Actually not needed for gbpages, but an indepedent, but related cleanup]

Use set_pte() for setting up the 2MB pages in the direct mapping similar
to what the earlier GBPAGES patches did for the 1GB PUDs.

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/init_64.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)

Index: linux/arch/x86/mm/init_64.c
===================================================================
--- linux.orig/arch/x86/mm/init_64.c
+++ linux/arch/x86/mm/init_64.c
@@ -289,7 +289,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
int i = pmd_index(address);

for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
- unsigned long entry;
pmd_t *pmd = pmd_page + pmd_index(address);

if (address >= end) {
@@ -303,9 +302,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
if (pmd_val(*pmd))
continue;

- entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
- entry &= __supported_pte_mask;
- set_pmd(pmd, __pmd(entry));
+ set_pte((pte_t *)pmd,
+ pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
}
}

2008-02-01 12:25:59

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] [0/12] GBPAGES patchkit for 2.6.25 v3

On Fri, 1 Feb 2008, Andi Kleen wrote:

>
> This patchkit implements support for the 1GB pages of AMD Fam10h CPUs
> in the kernel direct mapping.
>
> Change to previous versions:
> - Incorporated feedback from Thomas Gleixner
> - Switch direct mapping setup over to set_pte() / pte_pfn()
> - Split up patches some more
>
> Includes one not strictly required cleanup patch now (the last one)

I picked them up. Will go through and integrate with other
changes in this area.

Thanks,
tglx

2008-02-01 17:38:17

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [PATCH] [12/12] GBPAGES: Switch direct mapping setup over to set_pte

Andi Kleen wrote:
> [Actually not needed for gbpages, but an indepedent, but related cleanup]
>
> Use set_pte() for setting up the 2MB pages in the direct mapping similar
> to what the earlier GBPAGES patches did for the 1GB PUDs.
>
> Signed-off-by: Andi Kleen <[email protected]>
>
> ---
> arch/x86/mm/init_64.c | 6 ++----
> 1 file changed, 2 insertions(+), 4 deletions(-)
>
> Index: linux/arch/x86/mm/init_64.c
> ===================================================================
> --- linux.orig/arch/x86/mm/init_64.c
> +++ linux/arch/x86/mm/init_64.c
> @@ -289,7 +289,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
> int i = pmd_index(address);
>
> for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
> - unsigned long entry;
> pmd_t *pmd = pmd_page + pmd_index(address);
>
> if (address >= end) {
> @@ -303,9 +302,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
> if (pmd_val(*pmd))
> continue;
>
> - entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
> - entry &= __supported_pte_mask;
> - set_pmd(pmd, __pmd(entry));
> + set_pte((pte_t *)pmd,
> + pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
>

Why? 64-bit Xen will need this to be set_pmd if its an update to L2 of
the table.

J

2008-02-01 17:58:08

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] [12/12] GBPAGES: Switch direct mapping setup over to set_pte


> Why? 64-bit Xen will need this to be set_pmd if its an update to L2 of
> the table.

Then change_page_attr() and hugepages will already not work because they both
do exactly that.

And I didn't want to duplicate this manual code for the GBpages case, so i
changed it everywhere to the standard way.

-Andi


> J

2008-02-01 19:17:06

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [PATCH] [12/12] GBPAGES: Switch direct mapping setup over to set_pte

Andi Kleen wrote:
>> Why? 64-bit Xen will need this to be set_pmd if its an update to L2 of
>> the table.
>>
>
> Then change_page_attr() and hugepages will already not work because they both
> do exactly that.
>
> And I didn't want to duplicate this manual code for the GBpages case, so i
> changed it everywhere to the standard way.

It's a bit moot because Xen doesn't support any kind of large page yet,
but there has been some work in that area. The main problem with using
set_pte is that Xen supports trap'n'emulate for pte-level accesses, but
not for upper levels.

Looks like you're right about the rest of cpa; may as well make it all
consistent for now, and we can fix it later when the need arises.

J

2008-02-05 22:43:10

by Randy Dunlap

[permalink] [raw]
Subject: Re: [PATCH] [7/12] GBPAGES: Add an option to disable direct mapping gbpages and a global variable

On Fri, 1 Feb 2008 10:53:20 +0100 (CET) Andi Kleen wrote:

>
> Signed-off-by: Andi Kleen <[email protected]>
>
> ---
> Documentation/x86_64/boot-options.txt | 5 +++++
> arch/x86/mm/init_64.c | 16 ++++++++++++++++
> include/asm-x86/pgtable_64.h | 7 +++++++
> 3 files changed, 28 insertions(+)
>
> Index: linux/arch/x86/mm/init_64.c
> ===================================================================
> --- linux.orig/arch/x86/mm/init_64.c
> +++ linux/arch/x86/mm/init_64.c
> @@ -53,6 +53,22 @@ static unsigned long dma_reserve __initd
>
> DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
>
> +enum gbopt direct_gbpages __meminitdata = GBP_DEFAULT;
> +
> +static int __init parse_direct_gbpages(char *arg)
> +{
> + if (!strcmp(arg, "off")) {
> + direct_gbpages = GBP_OFF;
> + return 0;
> + }
> + if (!strcmp(arg, "on")) {
> + direct_gbpages = GBP_ON;
> + return 0;
> + }
> + return -1;
> +}
> +early_param("direct_gbpages", parse_direct_gbpages);
> +
> /*
> * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
> * physical space so we can cache the place of the first one and move
> Index: linux/include/asm-x86/pgtable_64.h
> ===================================================================
> --- linux.orig/include/asm-x86/pgtable_64.h
> +++ linux/include/asm-x86/pgtable_64.h
> @@ -239,6 +239,13 @@ static inline int pud_large(pud_t pte)
>
> #define update_mmu_cache(vma,address,pte) do { } while (0)
>
> +enum gbopt {
> + GBP_OFF = -1,
> + GBP_DEFAULT,
> + GBP_ON
> +};
> +extern enum gbopt direct_gbpages;
> +
> /* Encode and de-code a swap entry */
> #define __swp_type(x) (((x).val >> 1) & 0x3f)
> #define __swp_offset(x) ((x).val >> 8)
> Index: linux/Documentation/x86_64/boot-options.txt
> ===================================================================
> --- linux.orig/Documentation/x86_64/boot-options.txt
> +++ linux/Documentation/x86_64/boot-options.txt
> @@ -307,3 +307,8 @@ Debugging
> stuck (default)
>
> Miscellaneous
> +
> + direct_gbpages=off|on
> + off: Do not use GB pages for kernel direct mapping.
> + on: Use GB pages if hardware supports it, but some heuristic
> + would turn them off.

s/would/could/ ?

---
~Randy