2007-11-12 14:41:23

by Martin Schwidefsky

[permalink] [raw]
Subject: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

From: Martin Schwidefsky <[email protected]>

Background: I've implemented 1K/2K page tables for s390. These sub-page
page tables are required to properly support the s390 virtualization
instruction with KVM. The SIE instruction requires that the page tables
have 256 page table entries (pte) followed by 256 page status table
entries (pgste). The pgstes are only required if the process is using
the SIE instruction. The pgstes are updated by the hardware and by the
hypervisor for a number of reasons, one of them is dirty and reference
bit tracking. To avoid wasting memory the standard pte table allocation
should return 1K/2K (31/64 bit) and 2K/4K if the process is using SIE.

Problem: Page size on s390 is 4K, page table size is 1K or 2K. That
means the s390 version for pte_alloc_one cannot return a pointer to
a struct page. Trouble is that with the CONFIG_HIGHPTE feature on x86
pte_alloc_one cannot return a pointer to a pte either, since that would
require more than 32 bit for the return value of pte_alloc_one (and the
pte * would not be accessible since its not kmapped).

Solution: The only solution I found to this dilemma is a new typedef:
a pgtable_t. For s390 pgtable_t will be a (pte *) - to be introduced
with a later patch. For everybody else it will be a (struct page *).
The additional problem with the initialization of the ptl lock and the
NR_PAGETABLE accounting is solved with a constructor pgtable_page_ctor
and a destructor pgtable_page_dtor. The page table allocation and free
functions need to call these two whenever a page table page is allocated
or freed. pmd_populate will get a pgtable_t instead of a struct page
pointer. To get the pgtable_t back from a pmd entry that has been
installed with pmd_populate a new function pmd_pgtable is added. It
replaces the pmd_page call in free_pte_range and apply_to_pte_range.

Cc: Benjamin Herrenschmidt <[email protected]>
Signed-off-by: Martin Schwidefsky <[email protected]>
---

arch/frv/mm/pgalloc.c | 8 +++++---
arch/powerpc/mm/pgtable_32.c | 14 ++++++++------
arch/ppc/mm/pgtable.c | 9 ++++++---
arch/s390/mm/pgtable.c | 2 ++
arch/sparc/mm/srmmu.c | 10 +++++++---
arch/sparc/mm/sun4c.c | 14 ++++++++++----
arch/um/kernel/mem.c | 4 +++-
arch/x86/mm/pgtable_32.c | 4 +++-
include/asm-alpha/page.h | 2 ++
include/asm-alpha/pgalloc.h | 22 ++++++++++++++--------
include/asm-arm/page.h | 2 ++
include/asm-arm/pgalloc.h | 9 ++++++---
include/asm-avr32/page.h | 1 +
include/asm-avr32/pgalloc.h | 16 ++++++++++++----
include/asm-cris/page.h | 1 +
include/asm-cris/pgalloc.h | 14 ++++++++++----
include/asm-frv/page.h | 1 +
include/asm-frv/pgalloc.h | 12 +++++++++---
include/asm-ia64/page.h | 2 ++
include/asm-ia64/pgalloc.h | 20 ++++++++++++++------
include/asm-m32r/page.h | 1 +
include/asm-m32r/pgalloc.h | 10 ++++++----
include/asm-m68k/motorola_pgalloc.h | 14 ++++++++------
include/asm-m68k/page.h | 1 +
include/asm-m68k/sun3_pgalloc.h | 17 ++++++++++++-----
include/asm-mips/page.h | 1 +
include/asm-mips/pgalloc.h | 5 +++--
include/asm-parisc/page.h | 1 +
include/asm-parisc/pgalloc.h | 11 +++++++++--
include/asm-powerpc/page.h | 2 ++
include/asm-powerpc/pgalloc-32.h | 6 ++++--
include/asm-powerpc/pgalloc-64.h | 26 +++++++++++++++++++-------
include/asm-ppc/pgalloc.h | 6 ++++--
include/asm-s390/page.h | 2 ++
include/asm-s390/pgalloc.h | 3 ++-
include/asm-s390/tlb.h | 2 +-
include/asm-sh/page.h | 2 ++
include/asm-sh/pgalloc.h | 27 ++++++++++++++++++++-------
include/asm-sh64/page.h | 2 ++
include/asm-sh64/pgalloc.h | 27 ++++++++++++++++++++-------
include/asm-sparc/page.h | 2 ++
include/asm-sparc/pgalloc.h | 5 +++--
include/asm-sparc64/page.h | 2 ++
include/asm-sparc64/pgalloc.h | 19 ++++++++++++++-----
include/asm-um/page.h | 2 ++
include/asm-um/pgalloc.h | 12 +++++++++---
include/asm-x86/page_32.h | 2 ++
include/asm-x86/page_64.h | 2 ++
include/asm-x86/pgalloc_32.h | 7 +++++--
include/asm-x86/pgalloc_64.h | 22 +++++++++++++++++-----
include/asm-xtensa/page.h | 1 +
include/asm-xtensa/pgalloc.h | 17 ++++++++++++-----
include/linux/mm.h | 14 +++++++++++++-
mm/memory.c | 32 +++++++++++++++-----------------
mm/vmalloc.c | 2 +-
55 files changed, 338 insertions(+), 136 deletions(-)

Index: linux-2.6/arch/frv/mm/pgalloc.c
===================================================================
--- linux-2.6.orig/arch/frv/mm/pgalloc.c
+++ linux-2.6/arch/frv/mm/pgalloc.c
@@ -28,7 +28,7 @@ pte_t *pte_alloc_one_kernel(struct mm_st
return pte;
}

-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *page;

@@ -37,9 +37,11 @@ struct page *pte_alloc_one(struct mm_str
#else
page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
#endif
- if (page)
+ if (page) {
clear_highpage(page);
- flush_dcache_page(page);
+ pgtable_page_ctor(page);
+ flush_dcache_page(page);
+ }
return page;
}

Index: linux-2.6/arch/powerpc/mm/pgtable_32.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/pgtable_32.c
+++ linux-2.6/arch/powerpc/mm/pgtable_32.c
@@ -107,19 +107,20 @@ __init_refok pte_t *pte_alloc_one_kernel
return pte;
}

-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *ptepage;

#ifdef CONFIG_HIGHPTE
- gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+ gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT | __GFP_ZERO;
#else
- gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
+ gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
#endif

ptepage = alloc_pages(flags, 0);
- if (ptepage)
- clear_highpage(ptepage);
+ if (!ptepage)
+ return NULL;
+ pgtable_page_ctor(ptepage);
return ptepage;
}

@@ -131,11 +132,12 @@ void pte_free_kernel(struct mm_struct *m
free_page((unsigned long)pte);
}

-void pte_free(struct mm_struct *mm, struct page *ptepage)
+void pte_free(struct mm_struct *mm, pgtable_t ptepage)
{
#ifdef CONFIG_SMP
hash_page_sync();
#endif
+ pgtable_page_dtor(ptepage);
__free_page(ptepage);
}

Index: linux-2.6/arch/ppc/mm/pgtable.c
===================================================================
--- linux-2.6.orig/arch/ppc/mm/pgtable.c
+++ linux-2.6/arch/ppc/mm/pgtable.c
@@ -108,7 +108,7 @@ __init_refok pte_t *pte_alloc_one_kernel
return pte;
}

-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *ptepage;

@@ -119,8 +119,10 @@ struct page *pte_alloc_one(struct mm_str
#endif

ptepage = alloc_pages(flags, 0);
- if (ptepage)
+ if (ptepage) {
clear_highpage(ptepage);
+ pgtable_page_ctor(ptepage);
+ }
return ptepage;
}

@@ -132,11 +134,12 @@ void pte_free_kernel(struct mm_struct *m
free_page((unsigned long)pte);
}

-void pte_free(struct mm_struct *mm, struct page *ptepage)
+void pte_free(struct mm_struct *mm, pgtable_t ptepage)
{
#ifdef CONFIG_SMP
hash_page_sync();
#endif
+ pgtable_page_dtor(ptepage);
__free_page(ptepage);
}

Index: linux-2.6/arch/s390/mm/pgtable.c
===================================================================
--- linux-2.6.orig/arch/s390/mm/pgtable.c
+++ linux-2.6/arch/s390/mm/pgtable.c
@@ -78,6 +78,7 @@ unsigned long *page_table_alloc(int noex
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
page->index = (addr_t) table;
}
+ pgtable_page_ctor(page);
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
return table;
@@ -87,6 +88,7 @@ void page_table_free(unsigned long *tabl
{
unsigned long *shadow = get_shadow_pte(table);

+ pgtable_page_dtor(virt_to_page(table));
if (shadow)
free_page((unsigned long) shadow);
free_page((unsigned long) table);
Index: linux-2.6/arch/sparc/mm/srmmu.c
===================================================================
--- linux-2.6.orig/arch/sparc/mm/srmmu.c
+++ linux-2.6/arch/sparc/mm/srmmu.c
@@ -490,14 +490,17 @@ srmmu_pte_alloc_one_kernel(struct mm_str
return (pte_t *)srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
}

-static struct page *
+static pgtable_t
srmmu_pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
unsigned long pte;
+ struct page *page;

if ((pte = (unsigned long)srmmu_pte_alloc_one_kernel(mm, address)) == 0)
return NULL;
- return pfn_to_page( __nocache_pa(pte) >> PAGE_SHIFT );
+ page = pfn_to_page( __nocache_pa(pte) >> PAGE_SHIFT );
+ pgtable_page_ctor(page);
+ return page;
}

static void srmmu_free_pte_fast(pte_t *pte)
@@ -505,10 +508,11 @@ static void srmmu_free_pte_fast(pte_t *p
srmmu_free_nocache((unsigned long)pte, PTE_SIZE);
}

-static void srmmu_pte_free(struct page *pte)
+static void srmmu_pte_free(pgtable_t pte)
{
unsigned long p;

+ pgtable_page_dtor(pte);
p = (unsigned long)page_address(pte); /* Cached address (for test) */
if (p == 0)
BUG();
Index: linux-2.6/arch/sparc/mm/sun4c.c
===================================================================
--- linux-2.6.orig/arch/sparc/mm/sun4c.c
+++ linux-2.6/arch/sparc/mm/sun4c.c
@@ -1947,12 +1947,17 @@ static pte_t *sun4c_pte_alloc_one_kernel
return pte;
}

-static struct page *sun4c_pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static pgtable_t sun4c_pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte = sun4c_pte_alloc_one_kernel(mm, address);
+ pte_t *pte;
+ struct page *page;
+
+ pte = sun4c_pte_alloc_one_kernel(mm, address);
if (pte == NULL)
return NULL;
- return virt_to_page(pte);
+ page = virt_to_page(pte);
+ pgtable_page_ctor(page);
+ return page;
}

static inline void sun4c_free_pte_fast(pte_t *pte)
@@ -1962,8 +1967,9 @@ static inline void sun4c_free_pte_fast(p
pgtable_cache_size++;
}

-static void sun4c_pte_free(struct page *pte)
+static void sun4c_pte_free(pgtable_t pte)
{
+ pgtable_page_dtor(pte);
sun4c_free_pte_fast(page_address(pte));
}

Index: linux-2.6/arch/um/kernel/mem.c
===================================================================
--- linux-2.6.orig/arch/um/kernel/mem.c
+++ linux-2.6/arch/um/kernel/mem.c
@@ -361,10 +361,12 @@ pte_t *pte_alloc_one_kernel(struct mm_st
return pte;
}

-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;

pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ if (pte)
+ pgtable_page_ctor(pte);
return pte;
}
Index: linux-2.6/arch/x86/mm/pgtable_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/pgtable_32.c
+++ linux-2.6/arch/x86/mm/pgtable_32.c
@@ -183,7 +183,7 @@ pte_t *pte_alloc_one_kernel(struct mm_st
return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
}

-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;

@@ -192,6 +192,8 @@ struct page *pte_alloc_one(struct mm_str
#else
pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
#endif
+ if (pte)
+ pgtable_page_ctor(pte);
return pte;
}

Index: linux-2.6/include/asm-alpha/page.h
===================================================================
--- linux-2.6.orig/include/asm-alpha/page.h
+++ linux-2.6/include/asm-alpha/page.h
@@ -62,6 +62,8 @@ typedef unsigned long pgprot_t;

#endif /* STRICT_MM_TYPECHECKS */

+typedef struct page *pgtable_t;
+
#ifdef USE_48_BIT_KSEG
#define PAGE_OFFSET 0xffff800000000000UL
#else
Index: linux-2.6/include/asm-alpha/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-alpha/pgalloc.h
+++ linux-2.6/include/asm-alpha/pgalloc.h
@@ -11,10 +11,11 @@
*/

static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte)
{
pmd_set(pmd, (pte_t *)(page_to_pa(pte) + PAGE_OFFSET));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
@@ -57,18 +58,23 @@ pte_free_kernel(struct mm_struct *mm, pt
free_page((unsigned long)pte);
}

-static inline struct page *
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t
+pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte = pte_alloc_one_kernel(mm, addr);
- if (pte)
- return virt_to_page(pte);
- return NULL;
+ pte_t *pte = pte_alloc_one_kernel(mm, address);
+ struct page *page;
+
+ if (!pte)
+ return NULL;
+ page = virt_to_page(pte);
+ pgtable_page_ctor(page);
+ return page;
}

static inline void
-pte_free(struct mm_struct *mm, struct page *page)
+pte_free(struct mm_struct *mm, pgtable_t page)
{
+ pgtable_page_dtor(page);
__free_page(page);
}

Index: linux-2.6/include/asm-arm/page.h
===================================================================
--- linux-2.6.orig/include/asm-arm/page.h
+++ linux-2.6/include/asm-arm/page.h
@@ -171,6 +171,8 @@ typedef unsigned long pgprot_t;

#endif /* STRICT_MM_TYPECHECKS */

+typedef struct page *pgtable_t;
+
#endif /* CONFIG_MMU */

#include <asm/memory.h>
Index: linux-2.6/include/asm-arm/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-arm/pgalloc.h
+++ linux-2.6/include/asm-arm/pgalloc.h
@@ -66,7 +66,7 @@ pte_alloc_one_kernel(struct mm_struct *m
return pte;
}

-static inline struct page *
+static inline pgtable_t
pte_alloc_one(struct mm_struct *mm, unsigned long addr)
{
struct page *pte;
@@ -75,6 +75,7 @@ pte_alloc_one(struct mm_struct *mm, unsi
if (pte) {
void *page = page_address(pte);
clean_dcache_area(page, sizeof(pte_t) * PTRS_PER_PTE);
+ pgtable_page_ctor(pte);
}

return pte;
@@ -91,8 +92,9 @@ static inline void pte_free_kernel(struc
}
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

@@ -123,10 +125,11 @@ pmd_populate_kernel(struct mm_struct *mm
}

static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep)
+pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
{
__pmd_populate(pmdp, page_to_pfn(ptep) << PAGE_SHIFT | _PAGE_USER_TABLE);
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

#endif /* CONFIG_MMU */

Index: linux-2.6/include/asm-avr32/page.h
===================================================================
--- linux-2.6.orig/include/asm-avr32/page.h
+++ linux-2.6/include/asm-avr32/page.h
@@ -34,6 +34,7 @@ extern void copy_page(void *to, void *fr
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;

#define pte_val(x) ((x).pte)
#define pgd_val(x) ((x).pgd)
Index: linux-2.6/include/asm-avr32/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-avr32/pgalloc.h
+++ linux-2.6/include/asm-avr32/pgalloc.h
@@ -17,10 +17,11 @@
set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))

static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- struct page *pte)
+ pgtable_t pte)
{
set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte)));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables
@@ -51,7 +52,9 @@ static inline struct page *pte_alloc_one
struct page *pte;

pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
-
+ if (!page)
+ return NULL;
+ pgtable_page_ctor(page);
return pte;
}

@@ -60,12 +63,17 @@ static inline void pte_free_kernel(struc
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb), pte); \
+} while (0)

#define check_pgt_cache() do { } while(0)

Index: linux-2.6/include/asm-cris/page.h
===================================================================
--- linux-2.6.orig/include/asm-cris/page.h
+++ linux-2.6/include/asm-cris/page.h
@@ -29,6 +29,7 @@
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
#endif

#define pte_val(x) ((x).pte)
Index: linux-2.6/include/asm-cris/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-cris/pgalloc.h
+++ linux-2.6/include/asm-cris/pgalloc.h
@@ -6,6 +6,7 @@

#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte)
#define pmd_populate(mm, pmd, pte) pmd_set(pmd, page_address(pte))
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables.
@@ -27,10 +28,11 @@ static inline pte_t *pte_alloc_one_kerne
return pte;
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;
pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+ pgtable_page_ctor(pte);
return pte;
}

@@ -39,13 +41,17 @@ static inline void pte_free_kernel(struc
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

-
-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb), pte); \
+} while (0)

#define check_pgt_cache() do { } while (0)

Index: linux-2.6/include/asm-frv/page.h
===================================================================
--- linux-2.6.orig/include/asm-frv/page.h
+++ linux-2.6/include/asm-frv/page.h
@@ -25,6 +25,7 @@ typedef struct { unsigned long ste[64];}
typedef struct { pmd_t pue[1]; } pud_t;
typedef struct { pud_t pge[1]; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;

#define pte_val(x) ((x).pte)
#define pmd_val(x) ((x).ste[0])
Index: linux-2.6/include/asm-frv/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-frv/pgalloc.h
+++ linux-2.6/include/asm-frv/pgalloc.h
@@ -25,6 +25,7 @@
do { \
__set_pmd((PMD), page_to_pfn(PAGE) << PAGE_SHIFT | _PAGE_TABLE); \
} while(0)
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables.
@@ -35,19 +36,24 @@ extern void pgd_free(struct mm_struct *m

extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);

-extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
+extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb),(pte)); \
+} while (0)

/*
* allocating and freeing a pmd is trivial: the 1-entry pmd is
Index: linux-2.6/include/asm-ia64/page.h
===================================================================
--- linux-2.6.orig/include/asm-ia64/page.h
+++ linux-2.6/include/asm-ia64/page.h
@@ -185,6 +185,7 @@ get_order (unsigned long size)
#endif
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
+ typedef struct page *pgtable_t;

# define pte_val(x) ((x).pte)
# define pmd_val(x) ((x).pmd)
@@ -206,6 +207,7 @@ get_order (unsigned long size)
typedef unsigned long pmd_t;
typedef unsigned long pgd_t;
typedef unsigned long pgprot_t;
+ typedef struct page *pgtable_t;
# endif

# define pte_val(x) (x)
Index: linux-2.6/include/asm-ia64/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-ia64/pgalloc.h
+++ linux-2.6/include/asm-ia64/pgalloc.h
@@ -70,10 +70,11 @@ static inline void pmd_free(struct mm_st
#define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd)

static inline void
-pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, struct page *pte)
+pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte)
{
pmd_val(*pmd_entry) = page_to_phys(pte);
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
@@ -81,11 +82,17 @@ pmd_populate_kernel(struct mm_struct *mm
pmd_val(*pmd_entry) = __pa(pte);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- void *pg = quicklist_alloc(0, GFP_KERNEL, NULL);
- return pg ? virt_to_page(pg) : NULL;
+ struct page *page;
+ void *pg;
+
+ pg = quicklist_alloc(0, GFP_KERNEL, NULL);
+ if (!pg)
+ return NULL;
+ page = virt_to_page(pg);
+ pgtable_page_ctor(page);
+ return page;
}

static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
@@ -94,8 +101,9 @@ static inline pte_t *pte_alloc_one_kerne
return quicklist_alloc(0, GFP_KERNEL, NULL);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
quicklist_free_page(0, NULL, pte);
}

Index: linux-2.6/include/asm-m32r/page.h
===================================================================
--- linux-2.6.orig/include/asm-m32r/page.h
+++ linux-2.6/include/asm-m32r/page.h
@@ -28,6 +28,7 @@ typedef struct { unsigned long pgd; } pg
#define PTE_MASK PAGE_MASK

typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;

#define pmd_val(x) ((x).pmd)
#define pgd_val(x) ((x).pgd)
Index: linux-2.6/include/asm-m32r/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-m32r/pgalloc.h
+++ linux-2.6/include/asm-m32r/pgalloc.h
@@ -9,10 +9,11 @@
set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))

static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- struct page *pte)
+ pgtable_t pte)
{
set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte)));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables.
@@ -37,12 +38,12 @@ static __inline__ pte_t *pte_alloc_one_k
return pte;
}

-static __inline__ struct page *pte_alloc_one(struct mm_struct *mm,
+static __inline__ pgtable_t pte_alloc_one(struct mm_struct *mm,
unsigned long address)
{
struct page *pte = alloc_page(GFP_KERNEL|__GFP_ZERO);

-
+ pgtable_page_ctor(pte);
return pte;
}

@@ -51,8 +52,9 @@ static __inline__ void pte_free_kernel(s
free_page((unsigned long)pte);
}

-static __inline__ void pte_free(struct mm_struct *mm, struct page *pte)
+static __inline__ void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

Index: linux-2.6/include/asm-m68k/motorola_pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-m68k/motorola_pgalloc.h
+++ linux-2.6/include/asm-m68k/motorola_pgalloc.h
@@ -7,7 +7,6 @@
extern pmd_t *get_pointer_table(void);
extern int free_pointer_table(pmd_t *);

-
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
@@ -28,7 +27,7 @@ static inline void pte_free_kernel(struc
free_page((unsigned long) pte);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
pte_t *pte;
@@ -43,19 +42,21 @@ static inline struct page *pte_alloc_one
nocache_page(pte);
}
kunmap(pte);
-
+ pgtable_page_ctor(page);
return page;
}

-static inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, pgtable_t page)
{
+ pgtable_page_dtor(page);
cache_page(kmap(page));
kunmap(page);
__free_page(page);
}

-static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *page)
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page)
{
+ pgtable_page_dtor(page);
cache_page(kmap(page));
kunmap(page);
__free_page(page);
@@ -94,10 +95,11 @@ static inline void pmd_populate_kernel(s
pmd_set(pmd, pte);
}

-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page)
{
pmd_set(pmd, page_address(page));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
{
Index: linux-2.6/include/asm-m68k/page.h
===================================================================
--- linux-2.6.orig/include/asm-m68k/page.h
+++ linux-2.6/include/asm-m68k/page.h
@@ -91,6 +91,7 @@ typedef struct { unsigned long pte; } pt
typedef struct { unsigned long pmd[16]; } pmd_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;

#define pte_val(x) ((x).pte)
#define pmd_val(x) ((&x)->pmd[0])
Index: linux-2.6/include/asm-m68k/sun3_pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-m68k/sun3_pgalloc.h
+++ linux-2.6/include/asm-m68k/sun3_pgalloc.h
@@ -26,12 +26,17 @@ static inline void pte_free_kernel(struc
free_page((unsigned long) pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, pgtable_t page)
{
+ pgtable_page_dtor(page);
__free_page(page);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb), pte); \
+} while (0)

static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
@@ -45,8 +50,8 @@ static inline pte_t *pte_alloc_one_kerne
return (pte_t *) (page);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);

@@ -54,6 +59,7 @@ static inline struct page *pte_alloc_one
return NULL;

clear_highpage(page);
+ pgtable_page_ctor(page);
return page;

}
@@ -63,10 +69,11 @@ static inline void pmd_populate_kernel(s
pmd_val(*pmd) = __pa((unsigned long)pte);
}

-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page)
{
pmd_val(*pmd) = __pa((unsigned long)page_address(page));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* allocating and freeing a pmd is trivial: the 1-entry pmd is
Index: linux-2.6/include/asm-mips/page.h
===================================================================
--- linux-2.6.orig/include/asm-mips/page.h
+++ linux-2.6/include/asm-mips/page.h
@@ -90,6 +90,7 @@ typedef struct { unsigned long pte; } pt
#define pte_val(x) ((x).pte)
#define __pte(x) ((pte_t) { (x) } )
#endif
+typedef struct page *pgtable_t;

/*
* For 3-level pagetables we defines these ourselves, for 2-level the
Index: linux-2.6/include/asm-mips/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-mips/pgalloc.h
+++ linux-2.6/include/asm-mips/pgalloc.h
@@ -20,10 +20,11 @@ static inline void pmd_populate_kernel(s
}

static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- struct page *pte)
+ pgtable_t pte)
{
set_pmd(pmd, __pmd((unsigned long)page_address(pte)));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Initialize a new pmd table with invalid pointers.
@@ -90,7 +91,7 @@ static inline void pte_free_kernel(struc
free_pages((unsigned long)pte, PTE_ORDER);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
__free_pages(pte, PTE_ORDER);
}
Index: linux-2.6/include/asm-parisc/page.h
===================================================================
--- linux-2.6.orig/include/asm-parisc/page.h
+++ linux-2.6/include/asm-parisc/page.h
@@ -91,6 +91,7 @@ typedef unsigned long pgprot_t;

#endif /* STRICT_MM_TYPECHECKS */

+typedef struct page *pgtable_t;

typedef struct __physmem_range {
unsigned long start_pfn;
Index: linux-2.6/include/asm-parisc/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-parisc/pgalloc.h
+++ linux-2.6/include/asm-parisc/pgalloc.h
@@ -115,11 +115,14 @@ pmd_populate_kernel(struct mm_struct *mm

#define pmd_populate(mm, pmd, pte_page) \
pmd_populate_kernel(mm, pmd, page_address(pte_page))
+#define pmd_pgtable(pmd) pmd_page(pmd)

-static inline struct page *
+static inline pgtable_t
pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ if (page)
+ pgtable_page_ctor(page);
return page;
}

@@ -135,7 +138,11 @@ static inline void pte_free_kernel(struc
free_page((unsigned long)pte);
}

-#define pte_free(mm, page) pte_free_kernel(page_address(page))
+static inline void pte_free_kernel(struct mm_struct *mm, struct page *pte)
+{
+ pgtable_page_dtor(pte);
+ pte_free_kernel(page_address((pte));
+}

#define check_pgt_cache() do { } while (0)

Index: linux-2.6/include/asm-powerpc/page.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/page.h
+++ linux-2.6/include/asm-powerpc/page.h
@@ -190,6 +190,8 @@ extern int page_is_ram(unsigned long pfn

struct vm_area_struct;

+typedef struct page *pgtable_t;
+
#include <asm-generic/memory_model.h>
#endif /* __ASSEMBLY__ */

Index: linux-2.6/include/asm-powerpc/pgalloc-32.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/pgalloc-32.h
+++ linux-2.6/include/asm-powerpc/pgalloc-32.h
@@ -22,17 +22,19 @@ extern void pgd_free(struct mm_struct *m
(pmd_val(*(pmd)) = __pa(pte) | _PMD_PRESENT)
#define pmd_populate(mm, pmd, pte) \
(pmd_val(*(pmd)) = (page_to_pfn(pte) << PAGE_SHIFT) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
#else
#define pmd_populate_kernel(mm, pmd, pte) \
(pmd_val(*(pmd)) = (unsigned long)pte | _PMD_PRESENT)
#define pmd_populate(mm, pmd, pte) \
(pmd_val(*(pmd)) = (unsigned long)lowmem_page_address(pte) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
#endif

extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
-extern void pte_free(struct mm_struct *mm, struct page *pte);
+extern void pte_free(struct mm_struct *mm, pgtable_t pte);

#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, (pte))

Index: linux-2.6/include/asm-powerpc/pgalloc-64.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/pgalloc-64.h
+++ linux-2.6/include/asm-powerpc/pgalloc-64.h
@@ -53,6 +53,7 @@ static inline void pud_populate(struct m
#define pmd_populate(mm, pmd, pte_page) \
pmd_populate_kernel(mm, pmd, page_address(pte_page))
#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
+#define pmd_pgtable(pmd) pmd_page(pmd)


#else /* CONFIG_PPC_64K_PAGES */
@@ -87,11 +88,18 @@ static inline pte_t *pte_alloc_one_kerne
return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
- pte_t *pte = pte_alloc_one_kernel(mm, address);
- return pte ? virt_to_page(pte) : NULL;
+ struct page *page;
+ pte_t *pte;
+
+ pte = pte_alloc_one_kernel(mm, address);
+ if (!pte)
+ return NULL;
+ page = virt_to_page(pte);
+ pgtable_page_ctor(page);
+ return page;
}

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -99,8 +107,9 @@ static inline void pte_free_kernel(struc
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
{
+ pgtable_page_dtor(ptepage);
__free_page(ptepage);
}

@@ -131,9 +140,12 @@ static inline void pgtable_free(pgtable_

extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);

-#define __pte_free_tlb(tlb, ptepage) \
+#define __pte_free_tlb(tlb,ptepage) \
+do { \
+ pgtable_page_dtor(ptepage); \
pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
- PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1))
+ PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)); \
+} while (0)
#define __pmd_free_tlb(tlb, pmd) \
pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
Index: linux-2.6/include/asm-ppc/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-ppc/pgalloc.h
+++ linux-2.6/include/asm-ppc/pgalloc.h
@@ -23,17 +23,19 @@ extern void pgd_free(struct mm_struct *m
(pmd_val(*(pmd)) = __pa(pte) | _PMD_PRESENT)
#define pmd_populate(mm, pmd, pte) \
(pmd_val(*(pmd)) = (page_to_pfn(pte) << PAGE_SHIFT) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
#else
#define pmd_populate_kernel(mm, pmd, pte) \
(pmd_val(*(pmd)) = (unsigned long)pte | _PMD_PRESENT)
#define pmd_populate(mm, pmd, pte) \
(pmd_val(*(pmd)) = (unsigned long)lowmem_page_address(pte) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
#endif

extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
-extern void pte_free(struct mm_struct *mm, struct page *pte);
+extern void pte_free(struct mm_struct *mm, pgtable_t pte);

#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, (pte))

Index: linux-2.6/include/asm-s390/page.h
===================================================================
--- linux-2.6.orig/include/asm-s390/page.h
+++ linux-2.6/include/asm-s390/page.h
@@ -109,6 +109,8 @@ typedef struct { unsigned long pgd; } pg

#endif /* __s390x__ */

+typedef struct page *pgtable_t;
+
#define __pte(x) ((pte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
Index: linux-2.6/include/asm-s390/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-s390/pgalloc.h
+++ linux-2.6/include/asm-s390/pgalloc.h
@@ -132,7 +132,7 @@ pmd_populate_kernel(struct mm_struct *mm
}

static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
+pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page)
{
pte_t *pte = (pte_t *)page_to_phys(page);
pmd_t *shadow_pmd = get_shadow_table(pmd);
@@ -142,6 +142,7 @@ pmd_populate(struct mm_struct *mm, pmd_t
if (shadow_pmd && shadow_pte)
pmd_populate_kernel(mm, shadow_pmd, shadow_pte);
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* page table entry allocation/free routines.
Index: linux-2.6/include/asm-s390/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-s390/tlb.h
+++ linux-2.6/include/asm-s390/tlb.h
@@ -95,7 +95,7 @@ static inline void tlb_remove_page(struc
* pte_free_tlb frees a pte table and clears the CRSTE for the
* page table from the tlb.
*/
-static inline void pte_free_tlb(struct mmu_gather *tlb, struct page *page)
+static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t page)
{
if (!tlb->fullmm) {
tlb->array[tlb->nr_ptes++] = page;
Index: linux-2.6/include/asm-sh/page.h
===================================================================
--- linux-2.6.orig/include/asm-sh/page.h
+++ linux-2.6/include/asm-sh/page.h
@@ -105,6 +105,8 @@ typedef struct { unsigned long pgd; } pg
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )

+typedef struct page *pgtable_t;
+
#endif /* !__ASSEMBLY__ */

/* to align the pointer to the (next) page boundary */
Index: linux-2.6/include/asm-sh/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-sh/pgalloc.h
+++ linux-2.6/include/asm-sh/pgalloc.h
@@ -14,10 +14,11 @@ static inline void pmd_populate_kernel(s
}

static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- struct page *pte)
+ pgtable_t pte)
{
set_pmd(pmd, __pmd((unsigned long)page_address(pte)));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline void pgd_ctor(void *x)
{
@@ -47,11 +48,18 @@ static inline pte_t *pte_alloc_one_kerne
return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
- void *pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
- return pg ? virt_to_page(pg) : NULL;
+ struct page *page;
+ void *pg;
+
+ pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+ if (!pg)
+ return NULL;
+ page = virt_to_page(pg);
+ pgtable_page_ctor(page);
+ return page;
}

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -59,12 +67,17 @@ static inline void pte_free_kernel(struc
quicklist_free(QUICK_PT, NULL, pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
quicklist_free_page(QUICK_PT, NULL, pte);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb), (pte)); \
+} while (0)

/*
* allocating and freeing a pmd is trivial: the 1-entry pmd is
Index: linux-2.6/include/asm-sh64/page.h
===================================================================
--- linux-2.6.orig/include/asm-sh64/page.h
+++ linux-2.6/include/asm-sh64/page.h
@@ -82,6 +82,8 @@ typedef struct { unsigned long pgprot; }
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )

+typedef struct page *pgtable_t;
+
#endif /* !__ASSEMBLY__ */

/* to align the pointer to the (next) page boundary */
Index: linux-2.6/include/asm-sh64/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-sh64/pgalloc.h
+++ linux-2.6/include/asm-sh64/pgalloc.h
@@ -51,11 +51,18 @@ static inline void pgd_free(struct mm_st
quicklist_free(0, NULL, pgd);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
- void *pg = quicklist_alloc(0, GFP_KERNEL, NULL);
- return pg ? virt_to_page(pg) : NULL;
+ struct page *page;
+ void *pg;
+
+ pg = quicklist_alloc(0, GFP_KERNEL, NULL);
+ if (!pg)
+ return NULL;
+ page = virt_to_page(pg);
+ pgtable_page_ctor(page);
+ return page;
}

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -63,8 +70,9 @@ static inline void pte_free_kernel(struc
quicklist_free(0, NULL, pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
quicklist_free_page(0, NULL, pte);
}

@@ -74,7 +82,11 @@ static inline pte_t *pte_alloc_one_kerne
return quicklist_alloc(0, GFP_KERNEL, NULL);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb),(pte)); \
+} while (0)

/*
* allocating and freeing a pmd is trivial: the 1-entry pmd is
@@ -112,10 +124,11 @@ static inline void pmd_free(struct mm_st
set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) (pte)))

static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- struct page *pte)
+ pgtable_t pte)
{
set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) page_address (pte)));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline void check_pgt_cache(void)
{
Index: linux-2.6/include/asm-sparc/page.h
===================================================================
--- linux-2.6.orig/include/asm-sparc/page.h
+++ linux-2.6/include/asm-sparc/page.h
@@ -123,6 +123,8 @@ typedef unsigned long iopgprot_t;

#endif

+typedef struct page *pgtable_t;
+
extern unsigned long sparc_unmapped_base;

BTFIXUPDEF_SETHI(sparc_unmapped_base)
Index: linux-2.6/include/asm-sparc/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-sparc/pgalloc.h
+++ linux-2.6/include/asm-sparc/pgalloc.h
@@ -50,10 +50,11 @@ BTFIXUPDEF_CALL(void, free_pmd_fast, pmd

BTFIXUPDEF_CALL(void, pmd_populate, pmd_t *, struct page *)
#define pmd_populate(MM, PMD, PTE) BTFIXUP_CALL(pmd_populate)(PMD, PTE)
+#define pmd_pgtable(pmd) pmd_page(pmd)
BTFIXUPDEF_CALL(void, pmd_set, pmd_t *, pte_t *)
#define pmd_populate_kernel(MM, PMD, PTE) BTFIXUP_CALL(pmd_set)(PMD, PTE)

-BTFIXUPDEF_CALL(struct page *, pte_alloc_one, struct mm_struct *, unsigned long)
+BTFIXUPDEF_CALL(pgtable_t , pte_alloc_one, struct mm_struct *, unsigned long)
#define pte_alloc_one(mm, address) BTFIXUP_CALL(pte_alloc_one)(mm, address)
BTFIXUPDEF_CALL(pte_t *, pte_alloc_one_kernel, struct mm_struct *, unsigned long)
#define pte_alloc_one_kernel(mm, addr) BTFIXUP_CALL(pte_alloc_one_kernel)(mm, addr)
@@ -61,7 +62,7 @@ BTFIXUPDEF_CALL(pte_t *, pte_alloc_one_k
BTFIXUPDEF_CALL(void, free_pte_fast, pte_t *)
#define pte_free_kernel(mm,pte) BTFIXUP_CALL(free_pte_fast)(pte)

-BTFIXUPDEF_CALL(void, pte_free, struct page *)
+BTFIXUPDEF_CALL(void, pte_free, pgtable_t )
#define pte_free(mm,pte) BTFIXUP_CALL(pte_free)(pte)
#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, pte)

Index: linux-2.6/include/asm-sparc64/page.h
===================================================================
--- linux-2.6.orig/include/asm-sparc64/page.h
+++ linux-2.6/include/asm-sparc64/page.h
@@ -104,6 +104,8 @@ typedef unsigned long pgprot_t;

#endif /* (STRICT_MM_TYPECHECKS) */

+typedef struct page *pgtable_t;
+
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
(_AC(0x0000000070000000,UL)) : \
(_AC(0xfffff80000000000,UL) + (1UL << 32UL)))
Index: linux-2.6/include/asm-sparc64/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-sparc64/pgalloc.h
+++ linux-2.6/include/asm-sparc64/pgalloc.h
@@ -43,11 +43,18 @@ static inline pte_t *pte_alloc_one_kerne
return quicklist_alloc(0, GFP_KERNEL, NULL);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
- void *pg = quicklist_alloc(0, GFP_KERNEL, NULL);
- return pg ? virt_to_page(pg) : NULL;
+ struct page *page;
+ void *pg;
+
+ pg = quicklist_alloc(0, GFP_KERNEL, NULL);
+ if (!pg)
+ return NULL;
+ page = virt_to_page(pg);
+ pgtable_page_ctor(page);
+ return page;
}

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -55,8 +62,9 @@ static inline void pte_free_kernel(struc
quicklist_free(0, NULL, pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
{
+ pgtable_page_dtor(ptepage);
quicklist_free_page(0, NULL, ptepage);
}

@@ -64,6 +72,7 @@ static inline void pte_free(struct mm_st
#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
#define pmd_populate(MM,PMD,PTE_PAGE) \
pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline void check_pgt_cache(void)
{
Index: linux-2.6/include/asm-um/page.h
===================================================================
--- linux-2.6.orig/include/asm-um/page.h
+++ linux-2.6/include/asm-um/page.h
@@ -79,6 +79,8 @@ typedef unsigned long phys_t;

typedef struct { unsigned long pgprot; } pgprot_t;

+typedef struct page *pgtable_t;
+
#define pgd_val(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot)

Index: linux-2.6/include/asm-um/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-um/pgalloc.h
+++ linux-2.6/include/asm-um/pgalloc.h
@@ -18,6 +18,7 @@
set_pmd(pmd, __pmd(_PAGE_TABLE + \
((unsigned long long)page_to_pfn(pte) << \
(unsigned long long) PAGE_SHIFT)))
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables.
@@ -26,19 +27,24 @@ extern pgd_t *pgd_alloc(struct mm_struct
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);

extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
+extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long) pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb),(pte)); \
+} while (0)

#ifdef CONFIG_3_LEVEL_PGTABLES

Index: linux-2.6/include/asm-x86/page_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/page_32.h
+++ linux-2.6/include/asm-x86/page_32.h
@@ -115,6 +115,8 @@ static inline pte_t native_make_pte(unsi
#include <asm-generic/pgtable-nopmd.h>
#endif /* CONFIG_X86_PAE */

+typedef struct page *pgtable_t;
+
#define PTE_MASK PAGE_MASK

#ifdef CONFIG_HUGETLB_PAGE
Index: linux-2.6/include/asm-x86/page_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/page_64.h
+++ linux-2.6/include/asm-x86/page_64.h
@@ -61,6 +61,8 @@ typedef struct { unsigned long pgd; } pg

typedef struct { unsigned long pgprot; } pgprot_t;

+typedef struct page *pgtable_t;
+
extern unsigned long phys_base;

#define pte_val(x) ((x).pte)
Index: linux-2.6/include/asm-x86/pgalloc_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/pgalloc_32.h
+++ linux-2.6/include/asm-x86/pgalloc_32.h
@@ -28,6 +28,7 @@ do { \
((unsigned long long)page_to_pfn(pte) << \
(unsigned long long) PAGE_SHIFT))); \
} while (0)
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables.
@@ -36,21 +37,23 @@ extern pgd_t *pgd_alloc(struct mm_struct
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);

extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
+extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}


#define __pte_free_tlb(tlb,pte) \
do { \
+ pgtable_page_dtor(pte); \
paravirt_release_pt(page_to_pfn(pte)); \
tlb_remove_page((tlb),(pte)); \
} while (0)
Index: linux-2.6/include/asm-x86/pgalloc_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/pgalloc_64.h
+++ linux-2.6/include/asm-x86/pgalloc_64.h
@@ -12,6 +12,8 @@
#define pgd_populate(mm, pgd, pud) \
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)))

+#define pmd_pgtable(pmd) pmd_page(pmd)
+
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
{
set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
@@ -89,12 +91,17 @@ static inline pte_t *pte_alloc_one_kerne
return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ struct page *page;
+ void *p;
+
+ p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
if (!p)
return NULL;
- return virt_to_page(p);
+ page = virt_to_page(p);
+ pgtable_page_ctor(page);
+ return page;
}

/* Should really implement gc for free page table pages. This could be
@@ -106,12 +113,17 @@ static inline void pte_free_kernel(struc
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor((pte)); \
+ tlb_remove_page((tlb), (pte)); \
+} while (0)

#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
Index: linux-2.6/include/asm-xtensa/page.h
===================================================================
--- linux-2.6.orig/include/asm-xtensa/page.h
+++ linux-2.6/include/asm-xtensa/page.h
@@ -98,6 +98,7 @@
typedef struct { unsigned long pte; } pte_t; /* page table entry */
typedef struct { unsigned long pgd; } pgd_t; /* PGD table entry */
typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;

#define pte_val(x) ((x).pte)
#define pgd_val(x) ((x).pgd)
Index: linux-2.6/include/asm-xtensa/pgalloc.h
===================================================================
--- linux-2.6.orig/include/asm-xtensa/pgalloc.h
+++ linux-2.6/include/asm-xtensa/pgalloc.h
@@ -24,6 +24,7 @@
(pmd_val(*(pmdp)) = ((unsigned long)ptep))
#define pmd_populate(mm, pmdp, page) \
(pmd_val(*(pmdp)) = ((unsigned long)page_to_virt(page)))
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline pgd_t*
pgd_alloc(struct mm_struct *mm)
@@ -46,10 +47,14 @@ static inline pte_t *pte_alloc_one_kerne
return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long addr)
+static inline pte_token_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long addr)
{
- return virt_to_page(pte_alloc_one_kernel(mm, addr));
+ struct page *page;
+
+ page = virt_to_page(pte_alloc_one_kernel(mm, addr));
+ pgtable_page_ctor(page);
+ return page;
}

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -57,10 +62,12 @@ static inline void pte_free_kernel(struc
kmem_cache_free(pgtable_cache, pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
- kmem_cache_free(pgtable_cache, page_address(page));
+ pgtable_page_dtor(pte);
+ kmem_cache_free(pgtable_cache, page_address(pte));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

#endif /* __KERNEL__ */
#endif /* _XTENSA_PGALLOC_H */
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -879,6 +879,18 @@ static inline pmd_t *pmd_alloc(struct mm
#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */

+static inline void pgtable_page_ctor(struct page *page)
+{
+ pte_lock_init(page);
+ inc_zone_page_state(page, NR_PAGETABLE);
+}
+
+static inline void pgtable_page_dtor(struct page *page)
+{
+ pte_lock_deinit(page);
+ dec_zone_page_state(page, NR_PAGETABLE);
+}
+
#define pte_offset_map_lock(mm, pmd, address, ptlp) \
({ \
spinlock_t *__ptl = pte_lockptr(mm, pmd); \
@@ -1122,7 +1134,7 @@ struct page *follow_page(struct vm_area_
#define FOLL_GET 0x04 /* do get_page on page */
#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */

-typedef int (*pte_fn_t)(pte_t *pte, struct page *pmd_page, unsigned long addr,
+typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data);
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -123,11 +123,9 @@ void pmd_clear_bad(pmd_t *pmd)
*/
static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
{
- struct page *page = pmd_page(*pmd);
+ pgtable_t token = pmd_pgtable(*pmd);
pmd_clear(pmd);
- pte_lock_deinit(page);
- pte_free_tlb(tlb, page);
- dec_zone_page_state(page, NR_PAGETABLE);
+ pte_free_tlb(tlb, token);
tlb->mm->nr_ptes--;
}

@@ -298,21 +296,19 @@ void free_pgtables(struct mmu_gather **t

int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
- struct page *new = pte_alloc_one(mm, address);
+ pgtable_t new = pte_alloc_one(mm, address);
if (!new)
return -ENOMEM;

- pte_lock_init(new);
spin_lock(&mm->page_table_lock);
- if (pmd_present(*pmd)) { /* Another has populated it */
- pte_lock_deinit(new);
- pte_free(mm, new);
- } else {
+ if (!pmd_present(*pmd)) { /* Has another populated it ? */
mm->nr_ptes++;
- inc_zone_page_state(new, NR_PAGETABLE);
pmd_populate(mm, pmd, new);
+ new = NULL;
}
spin_unlock(&mm->page_table_lock);
+ if (new)
+ pte_free(mm, new);
return 0;
}

@@ -323,11 +319,13 @@ int __pte_alloc_kernel(pmd_t *pmd, unsig
return -ENOMEM;

spin_lock(&init_mm.page_table_lock);
- if (pmd_present(*pmd)) /* Another has populated it */
- pte_free_kernel(&init_mm, new);
- else
+ if (!pmd_present(*pmd)) { /* Has another populated it ? */
pmd_populate_kernel(&init_mm, pmd, new);
+ new = NULL;
+ }
spin_unlock(&init_mm.page_table_lock);
+ if (new)
+ pte_free_kernel(&init_mm, new);
return 0;
}

@@ -1377,7 +1375,7 @@ static int apply_to_pte_range(struct mm_
{
pte_t *pte;
int err;
- struct page *pmd_page;
+ pgtable_t token;
spinlock_t *uninitialized_var(ptl);

pte = (mm == &init_mm) ?
@@ -1388,10 +1386,10 @@ static int apply_to_pte_range(struct mm_

BUG_ON(pmd_huge(*pmd));

- pmd_page = pmd_page(*pmd);
+ token = pmd_pgtable(*pmd);

do {
- err = fn(pte, pmd_page, addr, data);
+ err = fn(pte, token, addr, data);
if (err)
break;
} while (pte++, addr += PAGE_SIZE, addr != end);
Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c
+++ linux-2.6/mm/vmalloc.c
@@ -829,7 +829,7 @@ void __attribute__((weak)) vmalloc_sync
}


-static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
+static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
{
/* apply_to_page_range() does all the hard work. */
return 0;

--
blue skies,
Martin.

"Reality continues to ruin my life." - Calvin.


2008-01-02 20:44:50

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Mon, Nov 12, 2007 at 03:30:11PM +0100, [email protected] wrote:
> From: Martin Schwidefsky <[email protected]>
> Solution: The only solution I found to this dilemma is a new typedef:
> a pgtable_t. For s390 pgtable_t will be a (pte *) - to be introduced
> with a later patch. For everybody else it will be a (struct page *).
> The additional problem with the initialization of the ptl lock and the
> NR_PAGETABLE accounting is solved with a constructor pgtable_page_ctor
> and a destructor pgtable_page_dtor. The page table allocation and free
> functions need to call these two whenever a page table page is allocated
> or freed. pmd_populate will get a pgtable_t instead of a struct page
> pointer. To get the pgtable_t back from a pmd entry that has been
> installed with pmd_populate a new function pmd_pgtable is added. It
> replaces the pmd_page call in free_pte_range and apply_to_pte_range.

Can we please just nuke CONFIG_HIGHPTE? There's only been a small
amount of 32bit machines with so much memory that they'd need it
and they can happily stay on the currently supported enterprise
distro releases instead of dragging this cruft around forever.

2008-01-02 21:24:54

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Wed, 2 Jan 2008, Christoph Hellwig wrote:
> On Mon, Nov 12, 2007 at 03:30:11PM +0100, [email protected] wrote:
> > From: Martin Schwidefsky <[email protected]>
> > Solution: The only solution I found to this dilemma is a new typedef:
> > a pgtable_t. For s390 pgtable_t will be a (pte *) - to be introduced
> > with a later patch. For everybody else it will be a (struct page *).
> > The additional problem with the initialization of the ptl lock and the
> > NR_PAGETABLE accounting is solved with a constructor pgtable_page_ctor
> > and a destructor pgtable_page_dtor. The page table allocation and free
> > functions need to call these two whenever a page table page is allocated
> > or freed. pmd_populate will get a pgtable_t instead of a struct page
> > pointer. To get the pgtable_t back from a pmd entry that has been
> > installed with pmd_populate a new function pmd_pgtable is added. It
> > replaces the pmd_page call in free_pte_range and apply_to_pte_range.
>
> Can we please just nuke CONFIG_HIGHPTE? There's only been a small
> amount of 32bit machines with so much memory that they'd need it
> and they can happily stay on the currently supported enterprise
> distro releases instead of dragging this cruft around forever.

And all MMU-equipped FR-V machines with more than 256 MiB of RAM,
according to arch/frv/Kconfig?

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

2008-01-02 21:29:44

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.


> > Can we please just nuke CONFIG_HIGHPTE? There's only been a small
> > amount of 32bit machines with so much memory that they'd need it
> > and they can happily stay on the currently supported enterprise
> > distro releases instead of dragging this cruft around forever.
>
> And all MMU-equipped FR-V machines with more than 256 MiB of RAM,
> according to arch/frv/Kconfig?

ppc32 uses that too when highmem is on.

Ben.

2008-01-03 13:12:42

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.


> Can we please just nuke CONFIG_HIGHPTE? There's only been a small
> amount of 32bit machines

It's unfortunately a larger amount :/ And for unknown reasons a lot of
people still install 32bit kernels on new perfectly capable 64bit systems
even if they have a lot of memory.

I don't think removing CONFIG_HIGHPTE will be an option any time soon.

-Andi

2008-01-03 14:02:18

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Thu, Jan 03 2008 at 15:12 +0200, Andi Kleen <[email protected]> wrote:
>> Can we please just nuke CONFIG_HIGHPTE? There's only been a small
>> amount of 32bit machines
>
> It's unfortunately a larger amount :/ And for unknown reasons a lot of
> people still install 32bit kernels on new perfectly capable 64bit systems
> even if they have a lot of memory.
>
Yes I've seen that too many times. When I comment about it, people say:
"What this 'core 2' supports AMD64 ??!!"

I think this is the distros fault.
1. Most call it AMD64 and not, i don't know what it should be called.

2. They put the first and default installation as i386 and not the x64,
the later is only farther down or on a different link.

3. My Fedora x86_64 installation has a full blown 32bit installation inside.
In fact, every time I yum install this or that, I get installed by default
both the 64bit and 32bit libraries. I don't even know how to turn the 32bit
off. This means that Distros can supply one x86 installation for both
flavors and decide at setup time what to install. By this, follow the Kernel
in observing that it is the same-ARCH same-INSTALL.

4. Some binary modules like Flash, media-codecs acrobat-reader etc... think
(That snow ball effect here) that 32bit is much more common and only provide
that. So people think that for best compatibility they should stick with
32bit. But this is not true. They all work perfectly here in 64bit land.
Distros should both confirm on that publicly. And also make extensive tests
for 32bit compatibility on 64bit machines, even for these binary only bad
guys.

5. Distros should jump on the 64bit bang-wagon, to stand it-self apart from
Window 32bit-ness. It has bin proven more than once that a Huge Linux 64bit
machine, with lots of CPUs and memory, can be the best Windows 32bit performer
in existence. Both under KVM/VMWARE or Wine. 64bit Vista can never get close,
not to speak of the "none-existent" 64bit XP.

Just my $0.02
Boaz

> I don't think removing CONFIG_HIGHPTE will be an option any time soon.
>
> -Andi
> --

2008-02-01 23:16:17

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Mon, 12 Nov 2007 15:30:11 +0100
[email protected] wrote:

> From: Martin Schwidefsky <[email protected]>
>
> Background: I've implemented 1K/2K page tables for s390. These sub-page
> page tables are required to properly support the s390 virtualization
> instruction with KVM. The SIE instruction requires that the page tables
> have 256 page table entries (pte) followed by 256 page status table
> entries (pgste). The pgstes are only required if the process is using
> the SIE instruction. The pgstes are updated by the hardware and by the
> hypervisor for a number of reasons, one of them is dirty and reference
> bit tracking. To avoid wasting memory the standard pte table allocation
> should return 1K/2K (31/64 bit) and 2K/4K if the process is using SIE.
>
> Problem: Page size on s390 is 4K, page table size is 1K or 2K. That
> means the s390 version for pte_alloc_one cannot return a pointer to
> a struct page. Trouble is that with the CONFIG_HIGHPTE feature on x86
> pte_alloc_one cannot return a pointer to a pte either, since that would
> require more than 32 bit for the return value of pte_alloc_one (and the
> pte * would not be accessible since its not kmapped).
>
> Solution: The only solution I found to this dilemma is a new typedef:
> a pgtable_t. For s390 pgtable_t will be a (pte *) - to be introduced
> with a later patch. For everybody else it will be a (struct page *).
> The additional problem with the initialization of the ptl lock and the
> NR_PAGETABLE accounting is solved with a constructor pgtable_page_ctor
> and a destructor pgtable_page_dtor. The page table allocation and free
> functions need to call these two whenever a page table page is allocated
> or freed. pmd_populate will get a pgtable_t instead of a struct page
> pointer. To get the pgtable_t back from a pmd entry that has been
> installed with pmd_populate a new function pmd_pgtable is added. It
> replaces the pmd_page call in free_pte_range and apply_to_pte_range.

Sorry, I'm going to drop this. And I guess the whole series.

On my 7000th fix-it-for-git-x86-changes I ended up with this:

static inline struct page *pmd_pgtable(pmd_t *pmd)
{
return pmd_page(pmd);
}

expanding to this:

static inline __attribute__((always_inline)) struct page *pmd_pgtable(pmd_t *pmd)
{
return ((mem_map + ((((native_pgd_val(((pmd).pud).pgd))) >> 12) - (0UL))));
}

and producing this:

In file included from include/asm/pgalloc.h:2,
from include/asm/mmu_context_32.h:6,
from include/asm/mmu_context.h:2,
from arch/x86/kernel/ldt.c:20:
include/asm/pgalloc_32.h: In function 'pmd_pgtable':
include/asm/pgalloc_32.h:37: error: request for member 'pud' in something not a structure or union

it was a revolting experience picking through the mess we've made, trying
to work out when we're using ptes/pmds/puds/pgds versus when we're using
*pointers* to those things. The obsessional use type-free macros, the
liberal avoidance of comments and the general spaghettiness of it all makes
this far harder than it should be.

The reason why I chose to drop the patch rather than keep poking away at it
was:

> +#define pmd_pgtable(pmd) pmd_page(pmd)
> +#define pmd_pgtable(pmd) pmd_page(pmd)
> +#define __pte_free_tlb(tlb,pte) \
> +do { \
> + pgtable_page_dtor(pte); \
> + tlb_remove_page((tlb), pte); \
> +} while (0)
> +#define __pte_free_tlb(tlb,pte) \
> +do { \
> + pgtable_page_dtor(pte); \
> + tlb_remove_page((tlb),(pte)); \
> +} while (0)
> +#define pmd_pgtable(pmd) pmd_page(pmd)
> +#define pmd_pgtable(pmd) pmd_page(pmd)
> +#define __pte_free_tlb(tlb,pte) \
> +do { \
> + pgtable_page_dtor(pte); \
> + tlb_remove_page((tlb), pte); \
> +} while (0)
>
> etcetera

This is just making a bad situation worse. Please only use macros as a
last resort. Please prefer to code in typesafe, self-documenting C.

2008-02-03 05:43:21

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

Why dropping add-mm-argument-to-pte-pmd-pud-pgd_free.patch though ?

It's a sane patch and a helps going further, and a total pain to re-do
later on. Besides, I may have some use for it on powerpc at some point
too...

Ben.

2008-02-03 05:53:28

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Sun, 03 Feb 2008 16:37:00 +1100 Benjamin Herrenschmidt <[email protected]> wrote:

> Why dropping add-mm-argument-to-pte-pmd-pud-pgd_free.patch though ?

I dropped the whole series.

> It's a sane patch and a helps going further, and a total pain to re-do
> later on. Besides, I may have some use for it on powerpc at some point
> too...

OK, I'll try to reestablish it.

Look: I can't fix *everyone's* stuff. This was a consequence of ongoing
unbounded churn in the x86 tree. If we can find a way of preventing those
guys (and everyone else) from trashing everyone else's stuff then we'd have
much smoother sailing.

Feel free to fix up Martin's patches against mmotm and we're good. Should
take you less than an hour.

2008-02-03 06:47:26

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.


* Andrew Morton <[email protected]> wrote:

> > It's a sane patch and a helps going further, and a total pain to
> > re-do later on. Besides, I may have some use for it on powerpc at
> > some point too...
>
> OK, I'll try to reestablish it.
>
> Look: I can't fix *everyone's* stuff. This was a consequence of
> ongoing unbounded churn in the x86 tree. [...]

i've reviewed this patchset and the right model appears to me to do this
change upstream right now, atomically. It is supposed to be a pure
functional NOP, and any deviation from that is easy to spot.

Acked-by: Ingo Molnar <[email protected]>

I dont think it's particular wise to maintain a change like this across
all the arch churn, for months. This patch is a pure cleanup, it could
have been merged two months ago. The author should get agreement that
it's fine to do it, and if the timing happens to be unfortunate for
immediate merging (we are within say 1 month window before stable
release) then delay it and redo the cleanup right when it's about to be
merged.

The worst thing to do is to prolong this for months - it is only
unnecessary work for no particular good reason. It complicates -mm
merging, keeps an API fork around for no good reason, etc., etc.

there's tons of past examples of much larger transformations than this
done right: for example the recent irq_regs changes. (and that one wasnt
even a pure NOP like this change.)

In general, we can pick up the x86 bits of any tree-wide change into
x86.git no problem, and then maintain it against all the nuances of
x86.git churn. (That requires them to be shaped in a way so that they
can be applied to one architecture at a time - which is obviously a good
thing anyway - but not always possible, such as in this case where a
common API is extended.)

If anyone is feeling _any_ serious effects of x86.git churn then please
talk to us maintainers and we can work out some technical solution. The
only thing we cannot do is to stop 100 active contributors and the flow
of 1000 patches until someone finds the time to get tree-wide changes
upstream.

Roland was able to shape his utrace-enabler regset patches upstream this
way, and it is two or three orders of magnitude more complex of a code
transformation than the one we are talking about here.

Ingo

2008-02-04 10:37:05

by Martin Schwidefsky

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Sat, 2008-02-02 at 21:53 -0800, Andrew Morton wrote:
> On Sun, 03 Feb 2008 16:37:00 +1100 Benjamin Herrenschmidt <[email protected]> wrote:
>
> > Why dropping add-mm-argument-to-pte-pmd-pud-pgd_free.patch though ?
>
> I dropped the whole series.

Sniff .. my patches .. ;-)

> > It's a sane patch and a helps going further, and a total pain to re-do
> > later on. Besides, I may have some use for it on powerpc at some point
> > too...
>
> OK, I'll try to reestablish it.

Fine. I've got the patch-merge message, so the first of the series is
done.

> Look: I can't fix *everyone's* stuff. This was a consequence of ongoing
> unbounded churn in the x86 tree. If we can find a way of preventing those
> guys (and everyone else) from trashing everyone else's stuff then we'd have
> much smoother sailing.

Understood. That is where I jump in and regenerate my patches on the
latest available level. That the patches did hold up for some months in
-mm now without really breaking anything is an indication that we can
push them upstream now, isn't ? That would make the patch problem go
away and I could queue my s390 specific page table rework. Our KVM
people keep asking about it.

--
blue skies,
Martin.

"Reality continues to ruin my life." - Calvin.

2008-02-04 10:51:45

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Mon, 04 Feb 2008 11:36:49 +0100 Martin Schwidefsky <[email protected]> wrote:

> On Sat, 2008-02-02 at 21:53 -0800, Andrew Morton wrote:
> > On Sun, 03 Feb 2008 16:37:00 +1100 Benjamin Herrenschmidt <[email protected]> wrote:
> >
> > > Why dropping add-mm-argument-to-pte-pmd-pud-pgd_free.patch though ?
> >
> > I dropped the whole series.
>
> Sniff .. my patches .. ;-)

Well yes. People who merge via -mm continue to be at a disadvantage
because they're forced to go behind all the subsystem trees. Plus I (and
apparently I alone) will skip patches when the kernel is
more-than-usually-wrecked and will slow things down for stabilisation as
we're heading into the merge window.

Plus: I started to prepare 2.6.24-mm1 on Friday morning, worked all weekend
and got it out Sunday evening after having committed forty to fifty fixes
and having dropped numerous patches.

If this situation (conflicting changes and poor code quality) persists into
the 2.6.25 cycle I will toss all the subsystem trees out of -mm, shall
rebase -mm on mainline and shall merge first. I had decided today to
actually just do this, but on reflection I'll give it just one more shot.

It's remarkable how many bugs are in current mainline which weren't in
2.6.24-rc8-mm1. What could have caused this?

> > Look: I can't fix *everyone's* stuff. This was a consequence of ongoing
> > unbounded churn in the x86 tree. If we can find a way of preventing those
> > guys (and everyone else) from trashing everyone else's stuff then we'd have
> > much smoother sailing.
>
> Understood. That is where I jump in and regenerate my patches on the
> latest available level. That the patches did hold up for some months in
> -mm now without really breaking anything is an indication that we can
> push them upstream now, isn't ? That would make the patch problem go
> away and I could queue my s390 specific page table rework. Our KVM
> people keep asking about it.

yes, against 2.6.24-mm1 would be good, thanks. I really don't know what
went wrong in i386 but I ended up getting all grumpy at the macro mess
we've made in all the pagetable handling. Please do take a look at
improving that.


(goes back to bisecting)

2008-02-04 11:03:28

by Russell King

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Mon, Feb 04, 2008 at 02:51:33AM -0800, Andrew Morton wrote:
> If this situation (conflicting changes and poor code quality) persists into
> the 2.6.25 cycle I will toss all the subsystem trees out of -mm, shall
> rebase -mm on mainline and shall merge first. I had decided today to
> actually just do this, but on reflection I'll give it just one more shot.

Can I too whinge about that?

Shortly after the 2.6 merge window opened, various changes went in which
completely broke a number of the merged changes in the ARM tree. That
resulted in the stuff which I thought was safe to merge becoming unsafe,
and with that I dropped all the changes which conflicted.

In some cases, these merge conflicts came about due to a bug fix I had
to put in to make the kernel bootable on ARM again.

I'm still in the middle of rebuilding the resulting mess from that - and
we're not yet back to where we were prior to the 2.6.24 release. So, the
current version of the ARM tree which you most likely pulled for -mm1 is
incomplete with respect to what was planned to go in. Therefore, you can
expect to see quite a number of apparantly "new" changes appearing in it
as these problems are resolved.

They're not really new, they're just the old stuff with the merge conflicts
fixed.

I don't see any end to these bun fights at the start of the merge window.
I believe it's inevitable given the work flow that we're now using.

--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of:

2008-02-04 11:14:46

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Mon, 4 Feb 2008 11:02:38 +0000 Russell King <[email protected]> wrote:

> I don't see any end to these bun fights at the start of the merge window.
> I believe it's inevitable given the work flow that we're now using.

I'm trying to find someone who will run an merged tree of all the
subsystems (I dub it "linux-next"). Subsystem maintainers will put their
2.6.x+1 material into branches or quilt directories for that tree.

Once that person is found and the system is up and running we can solicit
testing of that tree - it's basically like -mm without the -mm bits.
Obviously I'll be able to put a lot of -mm in there too (uml, fbdev, etc,
etc).

Right now, the merge practices of the subsystem maintainers will drive that
person insane, so some pushback will be needed to make it practical.

None of which is really relevant to your complaint. But if/when linux-next
is running, we can do more things around it. One might be "if a non-bugfix
patch wasn't in linux-next one week prior to 2.6.x, it doesn't get merged
into 2.6.x+1". For example.

2008-02-05 14:40:01

by Martin Schwidefsky

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Mon, 2008-02-04 at 02:51 -0800, Andrew Morton wrote:
> > > Look: I can't fix *everyone's* stuff. This was a consequence of ongoing
> > > unbounded churn in the x86 tree. If we can find a way of preventing those
> > > guys (and everyone else) from trashing everyone else's stuff then we'd have
> > > much smoother sailing.
> >
> > Understood. That is where I jump in and regenerate my patches on the
> > latest available level. That the patches did hold up for some months in
> > -mm now without really breaking anything is an indication that we can
> > push them upstream now, isn't ? That would make the patch problem go
> > away and I could queue my s390 specific page table rework. Our KVM
> > people keep asking about it.
>
> yes, against 2.6.24-mm1 would be good, thanks. I really don't know what
> went wrong in i386 but I ended up getting all grumpy at the macro mess
> we've made in all the pagetable handling. Please do take a look at
> improving that.

I'm trying to replace the __pte_free_tlb macros my patch touches for the
different architectures. Not much luck yet, there is a reason why
__pte_free_tlb is a macro in the first place: welcome to #include hell.
I'm starting to get grumpy as well..

Just an example for x86-64:
* asm-x86/tlb.h includes asm-generic/tlb.h
* asm-generic/tlb.h includes asm-x86/pgalloc.h
* asm-x86/pgalloc.h includes asm-x86/pgalloc_64.h
* asm-x86/pgalloc_64.h includes asm-x86/tlb.h
* since asm-x86/tlb.h started this #include chain it expands to nothing
* asm-x86/pgalloc_64.h calls tlb_remove_page which is defined in
asm-x86/tlb.h but the compiler hasn't seen the definition yet
* you loose..

I got x86-64 compiled by removing the #include <asm/pgalloc.h> from
asm-generic/tlb.h. But who knows what will break if the include is
missing .. I'll cross compile some of the other architectures next.

--
blue skies,
Martin.

"Reality continues to ruin my life." - Calvin.

2008-02-05 18:46:39

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Tue, 05 Feb 2008 15:39:47 +0100 Martin Schwidefsky <[email protected]> wrote:

> On Mon, 2008-02-04 at 02:51 -0800, Andrew Morton wrote:
> > > > Look: I can't fix *everyone's* stuff. This was a consequence of ongoing
> > > > unbounded churn in the x86 tree. If we can find a way of preventing those
> > > > guys (and everyone else) from trashing everyone else's stuff then we'd have
> > > > much smoother sailing.
> > >
> > > Understood. That is where I jump in and regenerate my patches on the
> > > latest available level. That the patches did hold up for some months in
> > > -mm now without really breaking anything is an indication that we can
> > > push them upstream now, isn't ? That would make the patch problem go
> > > away and I could queue my s390 specific page table rework. Our KVM
> > > people keep asking about it.
> >
> > yes, against 2.6.24-mm1 would be good, thanks. I really don't know what
> > went wrong in i386 but I ended up getting all grumpy at the macro mess
> > we've made in all the pagetable handling. Please do take a look at
> > improving that.
>
> I'm trying to replace the __pte_free_tlb macros my patch touches for the
> different architectures. Not much luck yet, there is a reason why
> __pte_free_tlb is a macro in the first place: welcome to #include hell.
> I'm starting to get grumpy as well..
>
> Just an example for x86-64:
> * asm-x86/tlb.h includes asm-generic/tlb.h
> * asm-generic/tlb.h includes asm-x86/pgalloc.h
> * asm-x86/pgalloc.h includes asm-x86/pgalloc_64.h
> * asm-x86/pgalloc_64.h includes asm-x86/tlb.h
> * since asm-x86/tlb.h started this #include chain it expands to nothing
> * asm-x86/pgalloc_64.h calls tlb_remove_page which is defined in
> asm-x86/tlb.h but the compiler hasn't seen the definition yet
> * you loose..
>
> I got x86-64 compiled by removing the #include <asm/pgalloc.h> from
> asm-generic/tlb.h. But who knows what will break if the include is
> missing .. I'll cross compile some of the other architectures next.
>

urgh, well, thanks for trying. If there's significant risk factor (or
hassle) in fixing the macros then I'd suggest we not do it for now - it's a
separate project.

At least x86 is getting better in that regard.

2008-02-06 09:06:46

by Martin Schwidefsky

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Tue, 2008-02-05 at 10:46 -0800, Andrew Morton wrote:
> > I got x86-64 compiled by removing the #include <asm/pgalloc.h> from
> > asm-generic/tlb.h. But who knows what will break if the include is
> > missing .. I'll cross compile some of the other architectures next.
> >
>
> urgh, well, thanks for trying. If there's significant risk factor (or
> hassle) in fixing the macros then I'd suggest we not do it for now - it's a
> separate project.

I'm still at it. I does make sense to convert the damn macros to inline
functions. The question now is the order of things, the macro cleanup
first or the sub-page page tables first? I would prefer the sub-page
page tables first since that code has been hanging around in -mm for a
while and could go upstream after I regenerated the patch and test
compiled it again. We do need it for KVM and we want to push our KVM
patches for s390 soon.

--
blue skies,
Martin.

"Reality continues to ruin my life." - Calvin.

2008-02-06 09:10:14

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Wed, 06 Feb 2008 10:06:18 +0100 Martin Schwidefsky <[email protected]> wrote:

> On Tue, 2008-02-05 at 10:46 -0800, Andrew Morton wrote:
> > > I got x86-64 compiled by removing the #include <asm/pgalloc.h> from
> > > asm-generic/tlb.h. But who knows what will break if the include is
> > > missing .. I'll cross compile some of the other architectures next.
> > >
> >
> > urgh, well, thanks for trying. If there's significant risk factor (or
> > hassle) in fixing the macros then I'd suggest we not do it for now - it's a
> > separate project.
>
> I'm still at it. I does make sense to convert the damn macros to inline
> functions. The question now is the order of things, the macro cleanup
> first or the sub-page page tables first? I would prefer the sub-page
> page tables first since that code has been hanging around in -mm for a
> while and could go upstream after I regenerated the patch and test
> compiled it again. We do need it for KVM and we want to push our KVM
> patches for s390 soon.

I'd suggest do the macro ceanup later. That's the sort of thing which we
can/should trickle through arch maintainers.

2008-02-06 09:16:00

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.


* Andrew Morton <[email protected]> wrote:

> > > urgh, well, thanks for trying. If there's significant risk factor
> > > (or hassle) in fixing the macros then I'd suggest we not do it for
> > > now - it's a separate project.
> >
> > I'm still at it. I does make sense to convert the damn macros to
> > inline functions. The question now is the order of things, the macro
> > cleanup first or the sub-page page tables first? I would prefer the
> > sub-page page tables first since that code has been hanging around
> > in -mm for a while and could go upstream after I regenerated the
> > patch and test compiled it again. We do need it for KVM and we want
> > to push our KVM patches for s390 soon.
>
> I'd suggest do the macro ceanup later. That's the sort of thing which
> we can/should trickle through arch maintainers.

note that there are ways to stage even API extensions like adding an
extra 'struct mm_struct *mm' to macros. It takes a temporary ugliness
like:

#define __EXTRA_MM_ARG_DEF , struct mm_struct *mm
#define __EXTRA_MM_ARG_VAL(arg) , (arg)

which converted architectures redefine.

and at the end eliminate these compatibility macros from the core, once
all arches have converted.

so we _could_ stage even something like this.

Ingo

2008-02-06 15:50:37

by Martin Schwidefsky

[permalink] [raw]
Subject: Re: [patch 2/3] CONFIG_HIGHPTE vs. sub-page page tables.

On Wed, 2008-02-06 at 01:09 -0800, Andrew Morton wrote:
> I'd suggest do the macro ceanup later. That's the sort of thing which we
> can/should trickle through arch maintainers.

Ok then, here is the regenerated sub-page page table patch. I test
compiled it on todays git with defconfig and allmodconfig on alpha, arm,
ia64, mips, powerpc, power64, s390, s390-64, sparc, sparc64, x86 and
x86-64. I found a few modules that wouldn't compile for some
architectures with allmodconfig but this has nothing to do with my
patch. From my point of view the patch looks good.

The patch applies cleanly on todays git and with a few hunks on
2.6.24-mm1 as well. Have fun.

--
blue skies,
Martin.

"Reality continues to ruin my life." - Calvin.

---
Subject: [PATCH] CONFIG_HIGHPTE vs. sub-page page tables.

From: Martin Schwidefsky <[email protected]>

Background: I've implemented 1K/2K page tables for s390. These sub-page
page tables are required to properly support the s390 virtualization
instruction with KVM. The SIE instruction requires that the page tables
have 256 page table entries (pte) followed by 256 page status table
entries (pgste). The pgstes are only required if the process is using
the SIE instruction. The pgstes are updated by the hardware and by the
hypervisor for a number of reasons, one of them is dirty and reference
bit tracking. To avoid wasting memory the standard pte table allocation
should return 1K/2K (31/64 bit) and 2K/4K if the process is using SIE.

Problem: Page size on s390 is 4K, page table size is 1K or 2K. That
means the s390 version for pte_alloc_one cannot return a pointer to
a struct page. Trouble is that with the CONFIG_HIGHPTE feature on x86
pte_alloc_one cannot return a pointer to a pte either, since that would
require more than 32 bit for the return value of pte_alloc_one (and the
pte * would not be accessible since its not kmapped).

Solution: The only solution I found to this dilemma is a new typedef:
a pgtable_t. For s390 pgtable_t will be a (pte *) - to be introduced
with a later patch. For everybody else it will be a (struct page *).
The additional problem with the initialization of the ptl lock and the
NR_PAGETABLE accounting is solved with a constructor pgtable_page_ctor
and a destructor pgtable_page_dtor. The page table allocation and free
functions need to call these two whenever a page table page is allocated
or freed. pmd_populate will get a pgtable_t instead of a struct page
pointer. To get the pgtable_t back from a pmd entry that has been
installed with pmd_populate a new function pmd_pgtable is added. It
replaces the pmd_page call in free_pte_range and apply_to_pte_range.

Signed-off-by: Martin Schwidefsky <[email protected]>
---

arch/frv/mm/pgalloc.c | 8 +++++---
arch/powerpc/mm/pgtable_32.c | 14 ++++++++------
arch/ppc/mm/pgtable.c | 9 ++++++---
arch/s390/mm/pgtable.c | 2 ++
arch/sparc/mm/srmmu.c | 10 +++++++---
arch/sparc/mm/sun4c.c | 14 ++++++++++----
arch/um/kernel/mem.c | 4 +++-
arch/x86/mm/pgtable_32.c | 5 ++++-
include/asm-alpha/page.h | 2 ++
include/asm-alpha/pgalloc.h | 22 ++++++++++++++--------
include/asm-arm/page.h | 2 ++
include/asm-arm/pgalloc.h | 9 ++++++---
include/asm-avr32/page.h | 1 +
include/asm-avr32/pgalloc.h | 16 ++++++++++++----
include/asm-cris/page.h | 1 +
include/asm-cris/pgalloc.h | 14 ++++++++++----
include/asm-frv/page.h | 1 +
include/asm-frv/pgalloc.h | 12 +++++++++---
include/asm-ia64/page.h | 2 ++
include/asm-ia64/pgalloc.h | 20 ++++++++++++++------
include/asm-m32r/page.h | 1 +
include/asm-m32r/pgalloc.h | 10 ++++++----
include/asm-m68k/motorola_pgalloc.h | 14 ++++++++------
include/asm-m68k/page.h | 1 +
include/asm-m68k/sun3_pgalloc.h | 17 ++++++++++++-----
include/asm-mips/page.h | 1 +
include/asm-mips/pgalloc.h | 17 ++++++++++++-----
include/asm-parisc/page.h | 1 +
include/asm-parisc/pgalloc.h | 11 +++++++++--
include/asm-powerpc/page.h | 2 ++
include/asm-powerpc/pgalloc-32.h | 6 ++++--
include/asm-powerpc/pgalloc-64.h | 27 ++++++++++++++++++++-------
include/asm-ppc/pgalloc.h | 6 ++++--
include/asm-s390/page.h | 2 ++
include/asm-s390/pgalloc.h | 3 ++-
include/asm-s390/tlb.h | 2 +-
include/asm-sh/page.h | 2 ++
include/asm-sh/pgalloc.h | 27 ++++++++++++++++++++-------
include/asm-sparc/page.h | 2 ++
include/asm-sparc/pgalloc.h | 5 +++--
include/asm-sparc64/page.h | 2 ++
include/asm-sparc64/pgalloc.h | 19 ++++++++++++++-----
include/asm-um/page.h | 2 ++
include/asm-um/pgalloc.h | 12 +++++++++---
include/asm-x86/page_32.h | 2 ++
include/asm-x86/page_64.h | 2 ++
include/asm-x86/pgalloc_32.h | 6 ++++--
include/asm-x86/pgalloc_64.h | 22 +++++++++++++++++-----
include/asm-xtensa/page.h | 1 +
include/asm-xtensa/pgalloc.h | 17 ++++++++++++-----
include/linux/mm.h | 14 +++++++++++++-
mm/memory.c | 32 +++++++++++++++-----------------
mm/vmalloc.c | 2 +-
53 files changed, 326 insertions(+), 132 deletions(-)

diff -urpN linux-2.6/arch/frv/mm/pgalloc.c linux-2.6-patched/arch/frv/mm/pgalloc.c
--- linux-2.6/arch/frv/mm/pgalloc.c 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/arch/frv/mm/pgalloc.c 2008-02-06 16:31:31.000000000 +0100
@@ -28,7 +28,7 @@ pte_t *pte_alloc_one_kernel(struct mm_st
return pte;
}

-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *page;

@@ -37,9 +37,11 @@ struct page *pte_alloc_one(struct mm_str
#else
page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
#endif
- if (page)
+ if (page) {
clear_highpage(page);
- flush_dcache_page(page);
+ pgtable_page_ctor(page);
+ flush_dcache_page(page);
+ }
return page;
}

diff -urpN linux-2.6/arch/powerpc/mm/pgtable_32.c linux-2.6-patched/arch/powerpc/mm/pgtable_32.c
--- linux-2.6/arch/powerpc/mm/pgtable_32.c 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/arch/powerpc/mm/pgtable_32.c 2008-02-06 16:31:31.000000000 +0100
@@ -107,19 +107,20 @@ __init_refok pte_t *pte_alloc_one_kernel
return pte;
}

-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *ptepage;

#ifdef CONFIG_HIGHPTE
- gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+ gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT | __GFP_ZERO;
#else
- gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
+ gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
#endif

ptepage = alloc_pages(flags, 0);
- if (ptepage)
- clear_highpage(ptepage);
+ if (!ptepage)
+ return NULL;
+ pgtable_page_ctor(ptepage);
return ptepage;
}

@@ -131,11 +132,12 @@ void pte_free_kernel(struct mm_struct *m
free_page((unsigned long)pte);
}

-void pte_free(struct mm_struct *mm, struct page *ptepage)
+void pte_free(struct mm_struct *mm, pgtable_t ptepage)
{
#ifdef CONFIG_SMP
hash_page_sync();
#endif
+ pgtable_page_dtor(ptepage);
__free_page(ptepage);
}

diff -urpN linux-2.6/arch/ppc/mm/pgtable.c linux-2.6-patched/arch/ppc/mm/pgtable.c
--- linux-2.6/arch/ppc/mm/pgtable.c 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/arch/ppc/mm/pgtable.c 2008-02-06 16:31:31.000000000 +0100
@@ -95,7 +95,7 @@ __init_refok pte_t *pte_alloc_one_kernel
return pte;
}

-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *ptepage;

@@ -106,8 +106,10 @@ struct page *pte_alloc_one(struct mm_str
#endif

ptepage = alloc_pages(flags, 0);
- if (ptepage)
+ if (ptepage) {
clear_highpage(ptepage);
+ pgtable_page_ctor(ptepage);
+ }
return ptepage;
}

@@ -119,11 +121,12 @@ void pte_free_kernel(struct mm_struct *m
free_page((unsigned long)pte);
}

-void pte_free(struct mm_struct *mm, struct page *ptepage)
+void pte_free(struct mm_struct *mm, pgtable_t ptepage)
{
#ifdef CONFIG_SMP
hash_page_sync();
#endif
+ pgtable_page_dtor(ptepage);
__free_page(ptepage);
}

diff -urpN linux-2.6/arch/s390/mm/pgtable.c linux-2.6-patched/arch/s390/mm/pgtable.c
--- linux-2.6/arch/s390/mm/pgtable.c 2008-01-22 16:16:23.000000000 +0100
+++ linux-2.6-patched/arch/s390/mm/pgtable.c 2008-02-06 16:31:31.000000000 +0100
@@ -78,6 +78,7 @@ unsigned long *page_table_alloc(int noex
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
page->index = (addr_t) table;
}
+ pgtable_page_ctor(page);
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
return table;
@@ -87,6 +88,7 @@ void page_table_free(unsigned long *tabl
{
unsigned long *shadow = get_shadow_pte(table);

+ pgtable_page_dtor(virt_to_page(table));
if (shadow)
free_page((unsigned long) shadow);
free_page((unsigned long) table);
diff -urpN linux-2.6/arch/sparc/mm/srmmu.c linux-2.6-patched/arch/sparc/mm/srmmu.c
--- linux-2.6/arch/sparc/mm/srmmu.c 2008-01-22 16:16:23.000000000 +0100
+++ linux-2.6-patched/arch/sparc/mm/srmmu.c 2008-02-06 16:31:31.000000000 +0100
@@ -490,14 +490,17 @@ srmmu_pte_alloc_one_kernel(struct mm_str
return (pte_t *)srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
}

-static struct page *
+static pgtable_t
srmmu_pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
unsigned long pte;
+ struct page *page;

if ((pte = (unsigned long)srmmu_pte_alloc_one_kernel(mm, address)) == 0)
return NULL;
- return pfn_to_page( __nocache_pa(pte) >> PAGE_SHIFT );
+ page = pfn_to_page( __nocache_pa(pte) >> PAGE_SHIFT );
+ pgtable_page_ctor(page);
+ return page;
}

static void srmmu_free_pte_fast(pte_t *pte)
@@ -505,10 +508,11 @@ static void srmmu_free_pte_fast(pte_t *p
srmmu_free_nocache((unsigned long)pte, PTE_SIZE);
}

-static void srmmu_pte_free(struct page *pte)
+static void srmmu_pte_free(pgtable_t pte)
{
unsigned long p;

+ pgtable_page_dtor(pte);
p = (unsigned long)page_address(pte); /* Cached address (for test) */
if (p == 0)
BUG();
diff -urpN linux-2.6/arch/sparc/mm/sun4c.c linux-2.6-patched/arch/sparc/mm/sun4c.c
--- linux-2.6/arch/sparc/mm/sun4c.c 2008-01-22 16:16:23.000000000 +0100
+++ linux-2.6-patched/arch/sparc/mm/sun4c.c 2008-02-06 16:31:31.000000000 +0100
@@ -1947,12 +1947,17 @@ static pte_t *sun4c_pte_alloc_one_kernel
return pte;
}

-static struct page *sun4c_pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static pgtable_t sun4c_pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte = sun4c_pte_alloc_one_kernel(mm, address);
+ pte_t *pte;
+ struct page *page;
+
+ pte = sun4c_pte_alloc_one_kernel(mm, address);
if (pte == NULL)
return NULL;
- return virt_to_page(pte);
+ page = virt_to_page(pte);
+ pgtable_page_ctor(page);
+ return page;
}

static inline void sun4c_free_pte_fast(pte_t *pte)
@@ -1962,8 +1967,9 @@ static inline void sun4c_free_pte_fast(p
pgtable_cache_size++;
}

-static void sun4c_pte_free(struct page *pte)
+static void sun4c_pte_free(pgtable_t pte)
{
+ pgtable_page_dtor(pte);
sun4c_free_pte_fast(page_address(pte));
}

diff -urpN linux-2.6/arch/um/kernel/mem.c linux-2.6-patched/arch/um/kernel/mem.c
--- linux-2.6/arch/um/kernel/mem.c 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/arch/um/kernel/mem.c 2008-02-06 16:31:31.000000000 +0100
@@ -354,11 +354,13 @@ pte_t *pte_alloc_one_kernel(struct mm_st
return pte;
}

-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;

pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ if (pte)
+ pgtable_page_ctor(pte);
return pte;
}

diff -urpN linux-2.6/arch/x86/mm/pgtable_32.c linux-2.6-patched/arch/x86/mm/pgtable_32.c
--- linux-2.6/arch/x86/mm/pgtable_32.c 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/arch/x86/mm/pgtable_32.c 2008-02-06 16:31:31.000000000 +0100
@@ -183,7 +183,7 @@ pte_t *pte_alloc_one_kernel(struct mm_st
return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
}

-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;

@@ -192,6 +192,8 @@ struct page *pte_alloc_one(struct mm_str
#else
pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
#endif
+ if (pte)
+ pgtable_page_ctor(pte);
return pte;
}

@@ -365,6 +367,7 @@ void check_pgt_cache(void)

void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
+ pgtable_page_dtor(pte);
paravirt_release_pt(page_to_pfn(pte));
tlb_remove_page(tlb, pte);
}
diff -urpN linux-2.6/include/asm-alpha/page.h linux-2.6-patched/include/asm-alpha/page.h
--- linux-2.6/include/asm-alpha/page.h 2008-01-22 16:16:26.000000000 +0100
+++ linux-2.6-patched/include/asm-alpha/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -64,6 +64,8 @@ typedef unsigned long pgprot_t;

#endif /* STRICT_MM_TYPECHECKS */

+typedef struct page *pgtable_t;
+
#ifdef USE_48_BIT_KSEG
#define PAGE_OFFSET 0xffff800000000000UL
#else
diff -urpN linux-2.6/include/asm-alpha/pgalloc.h linux-2.6-patched/include/asm-alpha/pgalloc.h
--- linux-2.6/include/asm-alpha/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-alpha/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -11,10 +11,11 @@
*/

static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte)
{
pmd_set(pmd, (pte_t *)(page_to_pa(pte) + PAGE_OFFSET));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
@@ -57,18 +58,23 @@ pte_free_kernel(struct mm_struct *mm, pt
free_page((unsigned long)pte);
}

-static inline struct page *
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t
+pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte = pte_alloc_one_kernel(mm, addr);
- if (pte)
- return virt_to_page(pte);
- return NULL;
+ pte_t *pte = pte_alloc_one_kernel(mm, address);
+ struct page *page;
+
+ if (!pte)
+ return NULL;
+ page = virt_to_page(pte);
+ pgtable_page_ctor(page);
+ return page;
}

static inline void
-pte_free(struct mm_struct *mm, struct page *page)
+pte_free(struct mm_struct *mm, pgtable_t page)
{
+ pgtable_page_dtor(page);
__free_page(page);
}

diff -urpN linux-2.6/include/asm-arm/page.h linux-2.6-patched/include/asm-arm/page.h
--- linux-2.6/include/asm-arm/page.h 2008-01-22 16:16:26.000000000 +0100
+++ linux-2.6-patched/include/asm-arm/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -174,6 +174,8 @@ typedef unsigned long pgprot_t;

#endif /* STRICT_MM_TYPECHECKS */

+typedef struct page *pgtable_t;
+
#endif /* CONFIG_MMU */

#include <asm/memory.h>
diff -urpN linux-2.6/include/asm-arm/pgalloc.h linux-2.6-patched/include/asm-arm/pgalloc.h
--- linux-2.6/include/asm-arm/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-arm/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -66,7 +66,7 @@ pte_alloc_one_kernel(struct mm_struct *m
return pte;
}

-static inline struct page *
+static inline pgtable_t
pte_alloc_one(struct mm_struct *mm, unsigned long addr)
{
struct page *pte;
@@ -75,6 +75,7 @@ pte_alloc_one(struct mm_struct *mm, unsi
if (pte) {
void *page = page_address(pte);
clean_dcache_area(page, sizeof(pte_t) * PTRS_PER_PTE);
+ pgtable_page_ctor(pte);
}

return pte;
@@ -91,8 +92,9 @@ static inline void pte_free_kernel(struc
}
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

@@ -123,10 +125,11 @@ pmd_populate_kernel(struct mm_struct *mm
}

static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep)
+pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
{
__pmd_populate(pmdp, page_to_pfn(ptep) << PAGE_SHIFT | _PAGE_USER_TABLE);
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

#endif /* CONFIG_MMU */

diff -urpN linux-2.6/include/asm-avr32/page.h linux-2.6-patched/include/asm-avr32/page.h
--- linux-2.6/include/asm-avr32/page.h 2008-01-22 16:16:26.000000000 +0100
+++ linux-2.6-patched/include/asm-avr32/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -36,6 +36,7 @@ extern void copy_page(void *to, void *fr
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;

#define pte_val(x) ((x).pte)
#define pgd_val(x) ((x).pgd)
diff -urpN linux-2.6/include/asm-avr32/pgalloc.h linux-2.6-patched/include/asm-avr32/pgalloc.h
--- linux-2.6/include/asm-avr32/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-avr32/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -17,10 +17,11 @@
set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))

static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- struct page *pte)
+ pgtable_t pte)
{
set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte)));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables
@@ -51,7 +52,9 @@ static inline struct page *pte_alloc_one
struct page *pte;

pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
-
+ if (!pte)
+ return NULL;
+ pgtable_page_ctor(pte);
return pte;
}

@@ -60,12 +63,17 @@ static inline void pte_free_kernel(struc
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb), pte); \
+} while (0)

#define check_pgt_cache() do { } while(0)

diff -urpN linux-2.6/include/asm-cris/page.h linux-2.6-patched/include/asm-cris/page.h
--- linux-2.6/include/asm-cris/page.h 2008-01-22 16:16:26.000000000 +0100
+++ linux-2.6-patched/include/asm-cris/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -28,6 +28,7 @@
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
#endif

#define pte_val(x) ((x).pte)
diff -urpN linux-2.6/include/asm-cris/pgalloc.h linux-2.6-patched/include/asm-cris/pgalloc.h
--- linux-2.6/include/asm-cris/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-cris/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -6,6 +6,7 @@

#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte)
#define pmd_populate(mm, pmd, pte) pmd_set(pmd, page_address(pte))
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables.
@@ -27,10 +28,11 @@ static inline pte_t *pte_alloc_one_kerne
return pte;
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;
pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+ pgtable_page_ctor(pte);
return pte;
}

@@ -39,13 +41,17 @@ static inline void pte_free_kernel(struc
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

-
-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb), pte); \
+} while (0)

#define check_pgt_cache() do { } while (0)

diff -urpN linux-2.6/include/asm-frv/page.h linux-2.6-patched/include/asm-frv/page.h
--- linux-2.6/include/asm-frv/page.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-frv/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -27,6 +27,7 @@ typedef struct { unsigned long ste[64];}
typedef struct { pmd_t pue[1]; } pud_t;
typedef struct { pud_t pge[1]; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;

#define pte_val(x) ((x).pte)
#define pmd_val(x) ((x).ste[0])
diff -urpN linux-2.6/include/asm-frv/pgalloc.h linux-2.6-patched/include/asm-frv/pgalloc.h
--- linux-2.6/include/asm-frv/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-frv/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -25,6 +25,7 @@
do { \
__set_pmd((PMD), page_to_pfn(PAGE) << PAGE_SHIFT | _PAGE_TABLE); \
} while(0)
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables.
@@ -35,19 +36,24 @@ extern void pgd_free(struct mm_struct *m

extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);

-extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
+extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb),(pte)); \
+} while (0)

/*
* allocating and freeing a pmd is trivial: the 1-entry pmd is
diff -urpN linux-2.6/include/asm-ia64/page.h linux-2.6-patched/include/asm-ia64/page.h
--- linux-2.6/include/asm-ia64/page.h 2008-01-22 16:16:26.000000000 +0100
+++ linux-2.6-patched/include/asm-ia64/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -187,6 +187,7 @@ get_order (unsigned long size)
#endif
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
+ typedef struct page *pgtable_t;

# define pte_val(x) ((x).pte)
# define pmd_val(x) ((x).pmd)
@@ -208,6 +209,7 @@ get_order (unsigned long size)
typedef unsigned long pmd_t;
typedef unsigned long pgd_t;
typedef unsigned long pgprot_t;
+ typedef struct page *pgtable_t;
# endif

# define pte_val(x) (x)
diff -urpN linux-2.6/include/asm-ia64/pgalloc.h linux-2.6-patched/include/asm-ia64/pgalloc.h
--- linux-2.6/include/asm-ia64/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-ia64/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -70,10 +70,11 @@ static inline void pmd_free(struct mm_st
#define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd)

static inline void
-pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, struct page *pte)
+pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte)
{
pmd_val(*pmd_entry) = page_to_phys(pte);
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
@@ -81,11 +82,17 @@ pmd_populate_kernel(struct mm_struct *mm
pmd_val(*pmd_entry) = __pa(pte);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- void *pg = quicklist_alloc(0, GFP_KERNEL, NULL);
- return pg ? virt_to_page(pg) : NULL;
+ struct page *page;
+ void *pg;
+
+ pg = quicklist_alloc(0, GFP_KERNEL, NULL);
+ if (!pg)
+ return NULL;
+ page = virt_to_page(pg);
+ pgtable_page_ctor(page);
+ return page;
}

static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
@@ -94,8 +101,9 @@ static inline pte_t *pte_alloc_one_kerne
return quicklist_alloc(0, GFP_KERNEL, NULL);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
quicklist_free_page(0, NULL, pte);
}

diff -urpN linux-2.6/include/asm-m32r/page.h linux-2.6-patched/include/asm-m32r/page.h
--- linux-2.6/include/asm-m32r/page.h 2008-01-22 16:16:26.000000000 +0100
+++ linux-2.6-patched/include/asm-m32r/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -29,6 +29,7 @@ typedef struct { unsigned long pgd; } pg
#define PTE_MASK PAGE_MASK

typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;

#define pmd_val(x) ((x).pmd)
#define pgd_val(x) ((x).pgd)
diff -urpN linux-2.6/include/asm-m32r/pgalloc.h linux-2.6-patched/include/asm-m32r/pgalloc.h
--- linux-2.6/include/asm-m32r/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-m32r/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -9,10 +9,11 @@
set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))

static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- struct page *pte)
+ pgtable_t pte)
{
set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte)));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables.
@@ -37,12 +38,12 @@ static __inline__ pte_t *pte_alloc_one_k
return pte;
}

-static __inline__ struct page *pte_alloc_one(struct mm_struct *mm,
+static __inline__ pgtable_t pte_alloc_one(struct mm_struct *mm,
unsigned long address)
{
struct page *pte = alloc_page(GFP_KERNEL|__GFP_ZERO);

-
+ pgtable_page_ctor(pte);
return pte;
}

@@ -51,8 +52,9 @@ static inline void pte_free_kernel(struc
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

diff -urpN linux-2.6/include/asm-m68k/motorola_pgalloc.h linux-2.6-patched/include/asm-m68k/motorola_pgalloc.h
--- linux-2.6/include/asm-m68k/motorola_pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-m68k/motorola_pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -7,7 +7,6 @@
extern pmd_t *get_pointer_table(void);
extern int free_pointer_table(pmd_t *);

-
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
@@ -28,7 +27,7 @@ static inline void pte_free_kernel(struc
free_page((unsigned long) pte);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
pte_t *pte;
@@ -43,19 +42,21 @@ static inline struct page *pte_alloc_one
nocache_page(pte);
}
kunmap(pte);
-
+ pgtable_page_ctor(page);
return page;
}

-static inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, pgtable_t page)
{
+ pgtable_page_dtor(page);
cache_page(kmap(page));
kunmap(page);
__free_page(page);
}

-static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *page)
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page)
{
+ pgtable_page_dtor(page);
cache_page(kmap(page));
kunmap(page);
__free_page(page);
@@ -94,10 +95,11 @@ static inline void pmd_populate_kernel(s
pmd_set(pmd, pte);
}

-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page)
{
pmd_set(pmd, page_address(page));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
{
diff -urpN linux-2.6/include/asm-m68k/page.h linux-2.6-patched/include/asm-m68k/page.h
--- linux-2.6/include/asm-m68k/page.h 2008-01-22 16:16:26.000000000 +0100
+++ linux-2.6-patched/include/asm-m68k/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -94,6 +94,7 @@ typedef struct { unsigned long pte; } pt
typedef struct { unsigned long pmd[16]; } pmd_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;

#define pte_val(x) ((x).pte)
#define pmd_val(x) ((&x)->pmd[0])
diff -urpN linux-2.6/include/asm-m68k/sun3_pgalloc.h linux-2.6-patched/include/asm-m68k/sun3_pgalloc.h
--- linux-2.6/include/asm-m68k/sun3_pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-m68k/sun3_pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -26,12 +26,17 @@ static inline void pte_free_kernel(struc
free_page((unsigned long) pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, pgtable_t page)
{
+ pgtable_page_dtor(page);
__free_page(page);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb), pte); \
+} while (0)

static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
@@ -45,8 +50,8 @@ static inline pte_t *pte_alloc_one_kerne
return (pte_t *) (page);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);

@@ -54,6 +59,7 @@ static inline struct page *pte_alloc_one
return NULL;

clear_highpage(page);
+ pgtable_page_ctor(page);
return page;

}
@@ -63,10 +69,11 @@ static inline void pmd_populate_kernel(s
pmd_val(*pmd) = __pa((unsigned long)pte);
}

-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page)
{
pmd_val(*pmd) = __pa((unsigned long)page_address(page));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* allocating and freeing a pmd is trivial: the 1-entry pmd is
diff -urpN linux-2.6/include/asm-mips/page.h linux-2.6-patched/include/asm-mips/page.h
--- linux-2.6/include/asm-mips/page.h 2008-01-22 16:16:26.000000000 +0100
+++ linux-2.6-patched/include/asm-mips/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -93,6 +93,7 @@ typedef struct { unsigned long pte; } pt
#define pte_val(x) ((x).pte)
#define __pte(x) ((pte_t) { (x) } )
#endif
+typedef struct page *pgtable_t;

/*
* For 3-level pagetables we defines these ourselves, for 2-level the
diff -urpN linux-2.6/include/asm-mips/pgalloc.h linux-2.6-patched/include/asm-mips/pgalloc.h
--- linux-2.6/include/asm-mips/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-mips/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -20,10 +20,11 @@ static inline void pmd_populate_kernel(s
}

static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- struct page *pte)
+ pgtable_t pte)
{
set_pmd(pmd, __pmd((unsigned long)page_address(pte)));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Initialize a new pmd table with invalid pointers.
@@ -79,9 +80,10 @@ static inline struct page *pte_alloc_one
struct page *pte;

pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
- if (pte)
+ if (pte) {
clear_highpage(pte);
-
+ pgtable_page_ctor(pte);
+ }
return pte;
}

@@ -90,12 +92,17 @@ static inline void pte_free_kernel(struc
free_pages((unsigned long)pte, PTE_ORDER);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_pages(pte, PTE_ORDER);
}

-#define __pte_free_tlb(tlb, pte) tlb_remove_page((tlb), (pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb), pte); \
+} while (0)

#ifdef CONFIG_32BIT

diff -urpN linux-2.6/include/asm-parisc/page.h linux-2.6-patched/include/asm-parisc/page.h
--- linux-2.6/include/asm-parisc/page.h 2008-01-22 16:16:26.000000000 +0100
+++ linux-2.6-patched/include/asm-parisc/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -93,6 +93,7 @@ typedef unsigned long pgprot_t;

#endif /* STRICT_MM_TYPECHECKS */

+typedef struct page *pgtable_t;

typedef struct __physmem_range {
unsigned long start_pfn;
diff -urpN linux-2.6/include/asm-parisc/pgalloc.h linux-2.6-patched/include/asm-parisc/pgalloc.h
--- linux-2.6/include/asm-parisc/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-parisc/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -115,11 +115,14 @@ pmd_populate_kernel(struct mm_struct *mm

#define pmd_populate(mm, pmd, pte_page) \
pmd_populate_kernel(mm, pmd, page_address(pte_page))
+#define pmd_pgtable(pmd) pmd_page(pmd)

-static inline struct page *
+static inline pgtable_t
pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ if (page)
+ pgtable_page_ctor(page);
return page;
}

@@ -135,7 +138,11 @@ static inline void pte_free_kernel(struc
free_page((unsigned long)pte);
}

-#define pte_free(mm, page) pte_free_kernel(page_address(page))
+static inline void pte_free_kernel(struct mm_struct *mm, struct page *pte)
+{
+ pgtable_page_dtor(pte);
+ pte_free_kernel(page_address((pte));
+}

#define check_pgt_cache() do { } while (0)

diff -urpN linux-2.6/include/asm-powerpc/page.h linux-2.6-patched/include/asm-powerpc/page.h
--- linux-2.6/include/asm-powerpc/page.h 2008-01-22 16:16:26.000000000 +0100
+++ linux-2.6-patched/include/asm-powerpc/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -191,6 +191,8 @@ extern int page_is_ram(unsigned long pfn

struct vm_area_struct;

+typedef struct page *pgtable_t;
+
#include <asm-generic/memory_model.h>
#endif /* __ASSEMBLY__ */

diff -urpN linux-2.6/include/asm-powerpc/pgalloc-32.h linux-2.6-patched/include/asm-powerpc/pgalloc-32.h
--- linux-2.6/include/asm-powerpc/pgalloc-32.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-powerpc/pgalloc-32.h 2008-02-06 16:31:31.000000000 +0100
@@ -22,17 +22,19 @@ extern void pgd_free(struct mm_struct *m
(pmd_val(*(pmd)) = __pa(pte) | _PMD_PRESENT)
#define pmd_populate(mm, pmd, pte) \
(pmd_val(*(pmd)) = (page_to_pfn(pte) << PAGE_SHIFT) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
#else
#define pmd_populate_kernel(mm, pmd, pte) \
(pmd_val(*(pmd)) = (unsigned long)pte | _PMD_PRESENT)
#define pmd_populate(mm, pmd, pte) \
(pmd_val(*(pmd)) = (unsigned long)lowmem_page_address(pte) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
#endif

extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
-extern void pte_free(struct mm_struct *mm, struct page *pte);
+extern void pte_free(struct mm_struct *mm, pgtable_t pte);

#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, (pte))

diff -urpN linux-2.6/include/asm-powerpc/pgalloc-64.h linux-2.6-patched/include/asm-powerpc/pgalloc-64.h
--- linux-2.6/include/asm-powerpc/pgalloc-64.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-powerpc/pgalloc-64.h 2008-02-06 16:31:31.000000000 +0100
@@ -58,6 +58,7 @@ static inline void pud_populate(struct m
#define pmd_populate(mm, pmd, pte_page) \
pmd_populate_kernel(mm, pmd, page_address(pte_page))
#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
+#define pmd_pgtable(pmd) pmd_page(pmd)


#else /* CONFIG_PPC_64K_PAGES */
@@ -72,6 +73,7 @@ static inline void pmd_populate_kernel(s

#define pmd_populate(mm, pmd, pte_page) \
pmd_populate_kernel(mm, pmd, page_address(pte_page))
+#define pmd_pgtable(pmd) pmd_page(pmd)

#endif /* CONFIG_PPC_64K_PAGES */

@@ -92,11 +94,18 @@ static inline pte_t *pte_alloc_one_kerne
return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
- pte_t *pte = pte_alloc_one_kernel(mm, address);
- return pte ? virt_to_page(pte) : NULL;
+ struct page *page;
+ pte_t *pte;
+
+ pte = pte_alloc_one_kernel(mm, address);
+ if (!pte)
+ return NULL;
+ page = virt_to_page(pte);
+ pgtable_page_ctor(page);
+ return page;
}

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -104,8 +113,9 @@ static inline void pte_free_kernel(struc
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
{
+ pgtable_page_dtor(ptepage);
__free_page(ptepage);
}

@@ -136,9 +146,12 @@ static inline void pgtable_free(pgtable_

extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);

-#define __pte_free_tlb(tlb, ptepage) \
+#define __pte_free_tlb(tlb,ptepage) \
+do { \
+ pgtable_page_dtor(ptepage); \
pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
- PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1))
+ PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)); \
+} while (0)
#define __pmd_free_tlb(tlb, pmd) \
pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
diff -urpN linux-2.6/include/asm-ppc/pgalloc.h linux-2.6-patched/include/asm-ppc/pgalloc.h
--- linux-2.6/include/asm-ppc/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-ppc/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -23,17 +23,19 @@ extern void pgd_free(struct mm_struct *m
(pmd_val(*(pmd)) = __pa(pte) | _PMD_PRESENT)
#define pmd_populate(mm, pmd, pte) \
(pmd_val(*(pmd)) = (page_to_pfn(pte) << PAGE_SHIFT) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
#else
#define pmd_populate_kernel(mm, pmd, pte) \
(pmd_val(*(pmd)) = (unsigned long)pte | _PMD_PRESENT)
#define pmd_populate(mm, pmd, pte) \
(pmd_val(*(pmd)) = (unsigned long)lowmem_page_address(pte) | _PMD_PRESENT)
+#define pmd_pgtable(pmd) pmd_page(pmd)
#endif

extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
-extern void pte_free(struct mm_struct *mm, struct page *pte);
+extern void pte_free(struct mm_struct *mm, pgtable_t pte);

#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, (pte))

diff -urpN linux-2.6/include/asm-s390/page.h linux-2.6-patched/include/asm-s390/page.h
--- linux-2.6/include/asm-s390/page.h 2008-01-22 16:16:27.000000000 +0100
+++ linux-2.6-patched/include/asm-s390/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -110,6 +110,8 @@ typedef struct { unsigned long pgd; } pg

#endif /* __s390x__ */

+typedef struct page *pgtable_t;
+
#define __pte(x) ((pte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
diff -urpN linux-2.6/include/asm-s390/pgalloc.h linux-2.6-patched/include/asm-s390/pgalloc.h
--- linux-2.6/include/asm-s390/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-s390/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -132,7 +132,7 @@ pmd_populate_kernel(struct mm_struct *mm
}

static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
+pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page)
{
pte_t *pte = (pte_t *)page_to_phys(page);
pmd_t *shadow_pmd = get_shadow_table(pmd);
@@ -142,6 +142,7 @@ pmd_populate(struct mm_struct *mm, pmd_t
if (shadow_pmd && shadow_pte)
pmd_populate_kernel(mm, shadow_pmd, shadow_pte);
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* page table entry allocation/free routines.
diff -urpN linux-2.6/include/asm-s390/tlb.h linux-2.6-patched/include/asm-s390/tlb.h
--- linux-2.6/include/asm-s390/tlb.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-s390/tlb.h 2008-02-06 16:31:31.000000000 +0100
@@ -95,7 +95,7 @@ static inline void tlb_remove_page(struc
* pte_free_tlb frees a pte table and clears the CRSTE for the
* page table from the tlb.
*/
-static inline void pte_free_tlb(struct mmu_gather *tlb, struct page *page)
+static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t page)
{
if (!tlb->fullmm) {
tlb->array[tlb->nr_ptes++] = page;
diff -urpN linux-2.6/include/asm-sh/page.h linux-2.6-patched/include/asm-sh/page.h
--- linux-2.6/include/asm-sh/page.h 2008-01-31 15:42:13.000000000 +0100
+++ linux-2.6-patched/include/asm-sh/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -102,6 +102,8 @@ typedef struct { unsigned long pgd; } pg
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )

+typedef struct page *pgtable_t;
+
#endif /* !__ASSEMBLY__ */

/*
diff -urpN linux-2.6/include/asm-sh/pgalloc.h linux-2.6-patched/include/asm-sh/pgalloc.h
--- linux-2.6/include/asm-sh/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-sh/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -14,10 +14,11 @@ static inline void pmd_populate_kernel(s
}

static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- struct page *pte)
+ pgtable_t pte)
{
set_pmd(pmd, __pmd((unsigned long)page_address(pte)));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline void pgd_ctor(void *x)
{
@@ -47,11 +48,18 @@ static inline pte_t *pte_alloc_one_kerne
return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
- void *pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
- return pg ? virt_to_page(pg) : NULL;
+ struct page *page;
+ void *pg;
+
+ pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+ if (!pg)
+ return NULL;
+ page = virt_to_page(pg);
+ pgtable_page_ctor(page);
+ return page;
}

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -59,12 +67,17 @@ static inline void pte_free_kernel(struc
quicklist_free(QUICK_PT, NULL, pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
quicklist_free_page(QUICK_PT, NULL, pte);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb), (pte)); \
+} while (0)

/*
* allocating and freeing a pmd is trivial: the 1-entry pmd is
diff -urpN linux-2.6/include/asm-sparc/page.h linux-2.6-patched/include/asm-sparc/page.h
--- linux-2.6/include/asm-sparc/page.h 2008-01-22 16:16:27.000000000 +0100
+++ linux-2.6-patched/include/asm-sparc/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -125,6 +125,8 @@ typedef unsigned long iopgprot_t;

#endif

+typedef struct page *pgtable_t;
+
extern unsigned long sparc_unmapped_base;

BTFIXUPDEF_SETHI(sparc_unmapped_base)
diff -urpN linux-2.6/include/asm-sparc/pgalloc.h linux-2.6-patched/include/asm-sparc/pgalloc.h
--- linux-2.6/include/asm-sparc/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-sparc/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -50,10 +50,11 @@ BTFIXUPDEF_CALL(void, free_pmd_fast, pmd

BTFIXUPDEF_CALL(void, pmd_populate, pmd_t *, struct page *)
#define pmd_populate(MM, PMD, PTE) BTFIXUP_CALL(pmd_populate)(PMD, PTE)
+#define pmd_pgtable(pmd) pmd_page(pmd)
BTFIXUPDEF_CALL(void, pmd_set, pmd_t *, pte_t *)
#define pmd_populate_kernel(MM, PMD, PTE) BTFIXUP_CALL(pmd_set)(PMD, PTE)

-BTFIXUPDEF_CALL(struct page *, pte_alloc_one, struct mm_struct *, unsigned long)
+BTFIXUPDEF_CALL(pgtable_t , pte_alloc_one, struct mm_struct *, unsigned long)
#define pte_alloc_one(mm, address) BTFIXUP_CALL(pte_alloc_one)(mm, address)
BTFIXUPDEF_CALL(pte_t *, pte_alloc_one_kernel, struct mm_struct *, unsigned long)
#define pte_alloc_one_kernel(mm, addr) BTFIXUP_CALL(pte_alloc_one_kernel)(mm, addr)
@@ -61,7 +62,7 @@ BTFIXUPDEF_CALL(pte_t *, pte_alloc_one_k
BTFIXUPDEF_CALL(void, free_pte_fast, pte_t *)
#define pte_free_kernel(mm, pte) BTFIXUP_CALL(free_pte_fast)(pte)

-BTFIXUPDEF_CALL(void, pte_free, struct page *)
+BTFIXUPDEF_CALL(void, pte_free, pgtable_t )
#define pte_free(mm, pte) BTFIXUP_CALL(pte_free)(pte)
#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, pte)

diff -urpN linux-2.6/include/asm-sparc64/page.h linux-2.6-patched/include/asm-sparc64/page.h
--- linux-2.6/include/asm-sparc64/page.h 2008-01-22 16:16:27.000000000 +0100
+++ linux-2.6-patched/include/asm-sparc64/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -106,6 +106,8 @@ typedef unsigned long pgprot_t;

#endif /* (STRICT_MM_TYPECHECKS) */

+typedef struct page *pgtable_t;
+
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
(_AC(0x0000000070000000,UL)) : \
(_AC(0xfffff80000000000,UL) + (1UL << 32UL)))
diff -urpN linux-2.6/include/asm-sparc64/pgalloc.h linux-2.6-patched/include/asm-sparc64/pgalloc.h
--- linux-2.6/include/asm-sparc64/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-sparc64/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -43,11 +43,18 @@ static inline pte_t *pte_alloc_one_kerne
return quicklist_alloc(0, GFP_KERNEL, NULL);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
- void *pg = quicklist_alloc(0, GFP_KERNEL, NULL);
- return pg ? virt_to_page(pg) : NULL;
+ struct page *page;
+ void *pg;
+
+ pg = quicklist_alloc(0, GFP_KERNEL, NULL);
+ if (!pg)
+ return NULL;
+ page = virt_to_page(pg);
+ pgtable_page_ctor(page);
+ return page;
}

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -55,8 +62,9 @@ static inline void pte_free_kernel(struc
quicklist_free(0, NULL, pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
{
+ pgtable_page_dtor(ptepage);
quicklist_free_page(0, NULL, ptepage);
}

@@ -64,6 +72,7 @@ static inline void pte_free(struct mm_st
#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
#define pmd_populate(MM,PMD,PTE_PAGE) \
pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline void check_pgt_cache(void)
{
diff -urpN linux-2.6/include/asm-um/page.h linux-2.6-patched/include/asm-um/page.h
--- linux-2.6/include/asm-um/page.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-um/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -79,6 +79,8 @@ typedef unsigned long phys_t;

typedef struct { unsigned long pgprot; } pgprot_t;

+typedef struct page *pgtable_t;
+
#define pgd_val(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot)

diff -urpN linux-2.6/include/asm-um/pgalloc.h linux-2.6-patched/include/asm-um/pgalloc.h
--- linux-2.6/include/asm-um/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-um/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -18,6 +18,7 @@
set_pmd(pmd, __pmd(_PAGE_TABLE + \
((unsigned long long)page_to_pfn(pte) << \
(unsigned long long) PAGE_SHIFT)))
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables.
@@ -26,19 +27,24 @@ extern pgd_t *pgd_alloc(struct mm_struct
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);

extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
+extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long) pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb),(pte)); \
+} while (0)

#ifdef CONFIG_3_LEVEL_PGTABLES

diff -urpN linux-2.6/include/asm-x86/page_32.h linux-2.6-patched/include/asm-x86/page_32.h
--- linux-2.6/include/asm-x86/page_32.h 2008-01-31 15:42:14.000000000 +0100
+++ linux-2.6-patched/include/asm-x86/page_32.h 2008-02-06 16:31:31.000000000 +0100
@@ -50,6 +50,8 @@ typedef unsigned long phys_addr_t;
typedef union { pteval_t pte, pte_low; } pte_t;
typedef pte_t boot_pte_t;

+typedef struct page *pgtable_t;
+
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_X86_PAE */

diff -urpN linux-2.6/include/asm-x86/page_64.h linux-2.6-patched/include/asm-x86/page_64.h
--- linux-2.6/include/asm-x86/page_64.h 2008-02-05 10:16:58.000000000 +0100
+++ linux-2.6-patched/include/asm-x86/page_64.h 2008-02-06 16:31:31.000000000 +0100
@@ -71,6 +71,8 @@ typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
typedef unsigned long phys_addr_t;

+typedef struct page *pgtable_t;
+
typedef struct { pteval_t pte; } pte_t;

#define vmemmap ((struct page *)VMEMMAP_START)
diff -urpN linux-2.6/include/asm-x86/pgalloc_32.h linux-2.6-patched/include/asm-x86/pgalloc_32.h
--- linux-2.6/include/asm-x86/pgalloc_32.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-x86/pgalloc_32.h 2008-02-06 16:31:31.000000000 +0100
@@ -31,6 +31,7 @@ static inline void pmd_populate(struct m
paravirt_alloc_pt(mm, pfn);
set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

/*
* Allocate and free page tables.
@@ -39,15 +40,16 @@ extern pgd_t *pgd_alloc(struct mm_struct
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);

extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern struct page *pte_alloc_one(struct mm_struct *, unsigned long);
+extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

diff -urpN linux-2.6/include/asm-x86/pgalloc_64.h linux-2.6-patched/include/asm-x86/pgalloc_64.h
--- linux-2.6/include/asm-x86/pgalloc_64.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-x86/pgalloc_64.h 2008-02-06 16:31:31.000000000 +0100
@@ -12,6 +12,8 @@
#define pgd_populate(mm, pgd, pud) \
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)))

+#define pmd_pgtable(pmd) pmd_page(pmd)
+
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
{
set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
@@ -89,12 +91,17 @@ static inline pte_t *pte_alloc_one_kerne
return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ struct page *page;
+ void *p;
+
+ p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
if (!p)
return NULL;
- return virt_to_page(p);
+ page = virt_to_page(p);
+ pgtable_page_ctor(page);
+ return page;
}

/* Should really implement gc for free page table pages. This could be
@@ -106,12 +113,17 @@ static inline void pte_free_kernel(struc
free_page((unsigned long)pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *pte)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
+ pgtable_page_dtor(pte);
__free_page(pte);
}

-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) \
+do { \
+ pgtable_page_dtor((pte)); \
+ tlb_remove_page((tlb), (pte)); \
+} while (0)

#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
diff -urpN linux-2.6/include/asm-xtensa/page.h linux-2.6-patched/include/asm-xtensa/page.h
--- linux-2.6/include/asm-xtensa/page.h 2008-01-22 16:16:27.000000000 +0100
+++ linux-2.6-patched/include/asm-xtensa/page.h 2008-02-06 16:31:31.000000000 +0100
@@ -100,6 +100,7 @@
typedef struct { unsigned long pte; } pte_t; /* page table entry */
typedef struct { unsigned long pgd; } pgd_t; /* PGD table entry */
typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;

#define pte_val(x) ((x).pte)
#define pgd_val(x) ((x).pgd)
diff -urpN linux-2.6/include/asm-xtensa/pgalloc.h linux-2.6-patched/include/asm-xtensa/pgalloc.h
--- linux-2.6/include/asm-xtensa/pgalloc.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/asm-xtensa/pgalloc.h 2008-02-06 16:31:31.000000000 +0100
@@ -24,6 +24,7 @@
(pmd_val(*(pmdp)) = ((unsigned long)ptep))
#define pmd_populate(mm, pmdp, page) \
(pmd_val(*(pmdp)) = ((unsigned long)page_to_virt(page)))
+#define pmd_pgtable(pmd) pmd_page(pmd)

static inline pgd_t*
pgd_alloc(struct mm_struct *mm)
@@ -46,10 +47,14 @@ static inline pte_t *pte_alloc_one_kerne
return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT);
}

-static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long addr)
+static inline pte_token_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long addr)
{
- return virt_to_page(pte_alloc_one_kernel(mm, addr));
+ struct page *page;
+
+ page = virt_to_page(pte_alloc_one_kernel(mm, addr));
+ pgtable_page_ctor(page);
+ return page;
}

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
@@ -57,10 +62,12 @@ static inline void pte_free_kernel(struc
kmem_cache_free(pgtable_cache, pte);
}

-static inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
- kmem_cache_free(pgtable_cache, page_address(page));
+ pgtable_page_dtor(pte);
+ kmem_cache_free(pgtable_cache, page_address(pte));
}
+#define pmd_pgtable(pmd) pmd_page(pmd)

#endif /* __KERNEL__ */
#endif /* _XTENSA_PGALLOC_H */
diff -urpN linux-2.6/include/linux/mm.h linux-2.6-patched/include/linux/mm.h
--- linux-2.6/include/linux/mm.h 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/include/linux/mm.h 2008-02-06 16:31:31.000000000 +0100
@@ -894,6 +894,18 @@ static inline pmd_t *pmd_alloc(struct mm
#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */

+static inline void pgtable_page_ctor(struct page *page)
+{
+ pte_lock_init(page);
+ inc_zone_page_state(page, NR_PAGETABLE);
+}
+
+static inline void pgtable_page_dtor(struct page *page)
+{
+ pte_lock_deinit(page);
+ dec_zone_page_state(page, NR_PAGETABLE);
+}
+
#define pte_offset_map_lock(mm, pmd, address, ptlp) \
({ \
spinlock_t *__ptl = pte_lockptr(mm, pmd); \
@@ -1136,7 +1148,7 @@ struct page *follow_page(struct vm_area_
#define FOLL_GET 0x04 /* do get_page on page */
#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */

-typedef int (*pte_fn_t)(pte_t *pte, struct page *pmd_page, unsigned long addr,
+typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data);
diff -urpN linux-2.6/mm/memory.c linux-2.6-patched/mm/memory.c
--- linux-2.6/mm/memory.c 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/mm/memory.c 2008-02-06 16:31:31.000000000 +0100
@@ -122,11 +122,9 @@ void pmd_clear_bad(pmd_t *pmd)
*/
static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
{
- struct page *page = pmd_page(*pmd);
+ pgtable_t token = pmd_pgtable(*pmd);
pmd_clear(pmd);
- pte_lock_deinit(page);
- pte_free_tlb(tlb, page);
- dec_zone_page_state(page, NR_PAGETABLE);
+ pte_free_tlb(tlb, token);
tlb->mm->nr_ptes--;
}

@@ -297,21 +295,19 @@ void free_pgtables(struct mmu_gather **t

int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
- struct page *new = pte_alloc_one(mm, address);
+ pgtable_t new = pte_alloc_one(mm, address);
if (!new)
return -ENOMEM;

- pte_lock_init(new);
spin_lock(&mm->page_table_lock);
- if (pmd_present(*pmd)) { /* Another has populated it */
- pte_lock_deinit(new);
- pte_free(mm, new);
- } else {
+ if (!pmd_present(*pmd)) { /* Has another populated it ? */
mm->nr_ptes++;
- inc_zone_page_state(new, NR_PAGETABLE);
pmd_populate(mm, pmd, new);
+ new = NULL;
}
spin_unlock(&mm->page_table_lock);
+ if (new)
+ pte_free(mm, new);
return 0;
}

@@ -322,11 +318,13 @@ int __pte_alloc_kernel(pmd_t *pmd, unsig
return -ENOMEM;

spin_lock(&init_mm.page_table_lock);
- if (pmd_present(*pmd)) /* Another has populated it */
- pte_free_kernel(&init_mm, new);
- else
+ if (!pmd_present(*pmd)) { /* Has another populated it ? */
pmd_populate_kernel(&init_mm, pmd, new);
+ new = NULL;
+ }
spin_unlock(&init_mm.page_table_lock);
+ if (new)
+ pte_free_kernel(&init_mm, new);
return 0;
}

@@ -1370,7 +1368,7 @@ static int apply_to_pte_range(struct mm_
{
pte_t *pte;
int err;
- struct page *pmd_page;
+ pgtable_t token;
spinlock_t *uninitialized_var(ptl);

pte = (mm == &init_mm) ?
@@ -1381,10 +1379,10 @@ static int apply_to_pte_range(struct mm_

BUG_ON(pmd_huge(*pmd));

- pmd_page = pmd_page(*pmd);
+ token = pmd_pgtable(*pmd);

do {
- err = fn(pte, pmd_page, addr, data);
+ err = fn(pte, token, addr, data);
if (err)
break;
} while (pte++, addr += PAGE_SIZE, addr != end);
diff -urpN linux-2.6/mm/vmalloc.c linux-2.6-patched/mm/vmalloc.c
--- linux-2.6/mm/vmalloc.c 2008-02-06 10:00:21.000000000 +0100
+++ linux-2.6-patched/mm/vmalloc.c 2008-02-06 16:31:31.000000000 +0100
@@ -820,7 +820,7 @@ void __attribute__((weak)) vmalloc_sync
}


-static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
+static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
{
/* apply_to_page_range() does all the hard work. */
return 0;