LinuxLists.cc - [PATCHv2 1/2] mm: try to detect that page->ptl is in use

2013-10-14 14:32:44

Subject: [PATCHv2 1/2] mm: try to detect that page->ptl is in use

prep_new_page() initialize page->private (and therefore page->ptl) with
0. Make sure nobody took it in use in between allocation of the page and
page table constructor.

It can happen if arch try to use slab for page table allocation: slab
code uses page->slab_cache and page->first_page (for tail pages), which
share storage with page->ptl.

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
v2:
- fix typo;

Documentation/vm/split_page_table_lock | 4 ++++
include/linux/mm.h | 9 +++++++++
2 files changed, 13 insertions(+)

diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock
index e2f617b732..7521d367f2 100644
--- a/Documentation/vm/split_page_table_lock
+++ b/Documentation/vm/split_page_table_lock
@@ -53,6 +53,10 @@ There's no need in special enabling of PTE split page table lock:
everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
which must be called on PTE table allocation / freeing.

+Make sure the architecture doesn't use slab allocator for page table
+allocation: slab uses page->slab_cache and page->first_page for its pages.
+These fields share storage with page->ptl.
+
PMD split lock only makes sense if you have more than two page table
levels.

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 658e8b317f..9a4a873b2f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1262,6 +1262,15 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)

static inline bool ptlock_init(struct page *page)
{
+ /*
+ * prep_new_page() initialize page->private (and therefore page->ptl)
+ * with 0. Make sure nobody took it in use in between.
+ *
+ * It can happen if arch try to use slab for page table allocation:
+ * slab code uses page->slab_cache and page->first_page (for tail
+ * pages), which share storage with page->ptl.
+ */
+ VM_BUG_ON(page->ptl);
if (!ptlock_alloc(page))
return false;
spin_lock_init(ptlock_ptr(page));
--
1.8.4.rc3

2013-10-14 14:32:46

by Kirill A. Shutemov

[permalink] [raw]

Subject: [PATCHv2 2/2] xtensa: use buddy allocator for PTE table

At the moment xtensa uses slab allocator for PTE table. It doesn't work
with enabled split page table lock: slab uses page->slab_cache and
page->first_page for its pages. These fields share stroage with
page->ptl.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Chris Zankel <[email protected]>
Cc: Max Filippov <[email protected]>
---
v2:
- add missed return in pte_alloc_one_kernel;

arch/xtensa/include/asm/pgalloc.h | 20 ++++++++++++--------
arch/xtensa/include/asm/pgtable.h | 3 +--
arch/xtensa/mm/mmu.c | 20 --------------------
3 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index b8774f1e21..8507b32d6e 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,14 +38,18 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long)pgd);
}

-/* Use a slab cache for the pte pages (see also sparc64 implementation) */
-
-extern struct kmem_cache *pgtable_cache;
-
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
- return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT);
+ pte_t *ptep;
+ int i;
+
+ ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!ptep)
+ return NULL;
+ for (i = 0; i < 1024; i++, ptep++)
+ pte_clear(NULL, 0, ptep);
+ return ptep;
}

static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -59,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
return NULL;
page = virt_to_page(pte);
if (!pgtable_page_ctor(page)) {
- kmem_cache_free(pgtable_cache, pte);
+ __free_page(page);
return NULL;
}
return page;
@@ -67,13 +71,13 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
- kmem_cache_free(pgtable_cache, pte);
+ free_page((unsigned long)pte);
}

static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
pgtable_page_dtor(pte);
- kmem_cache_free(pgtable_cache, page_address(pte));
+ __free_page(pte);
}
#define pmd_pgtable(pmd) pmd_page(pmd)

diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 0fdf5d043f..216446295a 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -220,12 +220,11 @@ extern unsigned long empty_zero_page[1024];
#ifdef CONFIG_MMU
extern pgd_t swapper_pg_dir[PAGE_SIZE/sizeof(pgd_t)];
extern void paging_init(void);
-extern void pgtable_cache_init(void);
#else
# define swapper_pg_dir NULL
static inline void paging_init(void) { }
-static inline void pgtable_cache_init(void) { }
#endif
+static inline void pgtable_cache_init(void) { }

/*
* The pmd contains the kernel virtual address of the pte page.
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index a1077570e3..c43771c974 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -50,23 +50,3 @@ void __init init_mmu(void)
*/
set_ptevaddr_register(PGTABLE_START);
}
-
-struct kmem_cache *pgtable_cache __read_mostly;
-
-static void pgd_ctor(void *addr)
-{
- pte_t *ptep = (pte_t *)addr;
- int i;
-
- for (i = 0; i < 1024; i++, ptep++)
- pte_clear(NULL, 0, ptep);
-
-}
-
-void __init pgtable_cache_init(void)
-{
- pgtable_cache = kmem_cache_create("pgd",
- PAGE_SIZE, PAGE_SIZE,
- SLAB_HWCACHE_ALIGN,
- pgd_ctor);
-}
--
1.8.4.rc3

2013-10-14 15:19:23

by Max Filippov

[permalink] [raw]

Subject: Re: [PATCHv2 2/2] xtensa: use buddy allocator for PTE table

On Mon, Oct 14, 2013 at 6:32 PM, Kirill A. Shutemov
<[email protected]> wrote:
> At the moment xtensa uses slab allocator for PTE table. It doesn't work
> with enabled split page table lock: slab uses page->slab_cache and
> page->first_page for its pages. These fields share stroage with
> page->ptl.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Cc: Chris Zankel <[email protected]>
> Cc: Max Filippov <[email protected]>
> ---
> v2:
> - add missed return in pte_alloc_one_kernel;
>
> arch/xtensa/include/asm/pgalloc.h | 20 ++++++++++++--------
> arch/xtensa/include/asm/pgtable.h | 3 +--
> arch/xtensa/mm/mmu.c | 20 --------------------
> 3 files changed, 13 insertions(+), 30 deletions(-)
>
> diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
> index b8774f1e21..8507b32d6e 100644
> --- a/arch/xtensa/include/asm/pgalloc.h
> +++ b/arch/xtensa/include/asm/pgalloc.h
> @@ -38,14 +38,18 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
> free_page((unsigned long)pgd);
> }
>
> -/* Use a slab cache for the pte pages (see also sparc64 implementation) */
> -
> -extern struct kmem_cache *pgtable_cache;
> -
> static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> unsigned long address)
> {
> - return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT);
> + pte_t *ptep;
> + int i;
> +
> + ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
> + if (!ptep)
> + return NULL;
> + for (i = 0; i < 1024; i++, ptep++)
> + pte_clear(NULL, 0, ptep);
> + return ptep;

You're returning modified ptep, not the allocated one.

> }
>
> static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> @@ -59,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
> return NULL;
> page = virt_to_page(pte);
> if (!pgtable_page_ctor(page)) {
> - kmem_cache_free(pgtable_cache, pte);
> + __free_page(page);
> return NULL;
> }
> return page;
> @@ -67,13 +71,13 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
>
> static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
> {
> - kmem_cache_free(pgtable_cache, pte);
> + free_page((unsigned long)pte);
> }
>
> static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
> {
> pgtable_page_dtor(pte);
> - kmem_cache_free(pgtable_cache, page_address(pte));
> + __free_page(pte);
> }
> #define pmd_pgtable(pmd) pmd_page(pmd)
>
> diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
> index 0fdf5d043f..216446295a 100644
> --- a/arch/xtensa/include/asm/pgtable.h
> +++ b/arch/xtensa/include/asm/pgtable.h
> @@ -220,12 +220,11 @@ extern unsigned long empty_zero_page[1024];
> #ifdef CONFIG_MMU
> extern pgd_t swapper_pg_dir[PAGE_SIZE/sizeof(pgd_t)];
> extern void paging_init(void);
> -extern void pgtable_cache_init(void);
> #else
> # define swapper_pg_dir NULL
> static inline void paging_init(void) { }
> -static inline void pgtable_cache_init(void) { }
> #endif
> +static inline void pgtable_cache_init(void) { }
>
> /*
> * The pmd contains the kernel virtual address of the pte page.
> diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
> index a1077570e3..c43771c974 100644
> --- a/arch/xtensa/mm/mmu.c
> +++ b/arch/xtensa/mm/mmu.c
> @@ -50,23 +50,3 @@ void __init init_mmu(void)
> */
> set_ptevaddr_register(PGTABLE_START);
> }
> -
> -struct kmem_cache *pgtable_cache __read_mostly;
> -
> -static void pgd_ctor(void *addr)
> -{
> - pte_t *ptep = (pte_t *)addr;
> - int i;
> -
> - for (i = 0; i < 1024; i++, ptep++)
> - pte_clear(NULL, 0, ptep);
> -
> -}
> -
> -void __init pgtable_cache_init(void)
> -{
> - pgtable_cache = kmem_cache_create("pgd",
> - PAGE_SIZE, PAGE_SIZE,
> - SLAB_HWCACHE_ALIGN,
> - pgd_ctor);
> -}
> --
> 1.8.4.rc3
>

--
Thanks.
-- Max

2013-10-14 15:44:10

by Kirill A. Shutemov

[permalink] [raw]

Subject: Re: [PATCHv2 2/2] xtensa: use buddy allocator for PTE table

Max Filippov wrote:
> On Mon, Oct 14, 2013 at 6:32 PM, Kirill A. Shutemov
> <[email protected]> wrote:
> > At the moment xtensa uses slab allocator for PTE table. It doesn't work
> > with enabled split page table lock: slab uses page->slab_cache and
> > page->first_page for its pages. These fields share stroage with
> > page->ptl.
> >
> > Signed-off-by: Kirill A. Shutemov <[email protected]>
> > Cc: Chris Zankel <[email protected]>
> > Cc: Max Filippov <[email protected]>
> > ---
> > v2:
> > - add missed return in pte_alloc_one_kernel;
> >
> > arch/xtensa/include/asm/pgalloc.h | 20 ++++++++++++--------
> > arch/xtensa/include/asm/pgtable.h | 3 +--
> > arch/xtensa/mm/mmu.c | 20 --------------------
> > 3 files changed, 13 insertions(+), 30 deletions(-)
> >
> > diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
> > index b8774f1e21..8507b32d6e 100644
> > --- a/arch/xtensa/include/asm/pgalloc.h
> > +++ b/arch/xtensa/include/asm/pgalloc.h
> > @@ -38,14 +38,18 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
> > free_page((unsigned long)pgd);
> > }
> >
> > -/* Use a slab cache for the pte pages (see also sparc64 implementation) */
> > -
> > -extern struct kmem_cache *pgtable_cache;
> > -
> > static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
> > unsigned long address)
> > {
> > - return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT);
> > + pte_t *ptep;
> > + int i;
> > +
> > + ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
> > + if (!ptep)
> > + return NULL;
> > + for (i = 0; i < 1024; i++, ptep++)
> > + pte_clear(NULL, 0, ptep);
> > + return ptep;
>
> You're returning modified ptep, not the allocated one.

Erghh.. Stupid me.

Corrected patch below.

>From 0ba2ac687321f5ad7bac5f5c141da5b65b957fdc Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <[email protected]>
Date: Mon, 14 Oct 2013 13:38:21 +0300
Subject: [PATCHv3] xtensa: use buddy allocator for PTE table

At the moment xtensa uses slab allocator for PTE table. It doesn't work
with enabled split page table lock: slab uses page->slab_cache and
page->first_page for its pages. These fields share stroage with
page->ptl.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Chris Zankel <[email protected]>
Cc: Max Filippov <[email protected]>
---
v3:
- return correct value from pte_alloc_one_kernel();
v2:
- add missed return in pte_alloc_one_kernel();

arch/xtensa/include/asm/pgalloc.h | 20 ++++++++++++--------
arch/xtensa/include/asm/pgtable.h | 3 +--
arch/xtensa/mm/mmu.c | 20 --------------------
3 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index b8774f1e21..d38eb9237e 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -38,14 +38,18 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long)pgd);
}

-/* Use a slab cache for the pte pages (see also sparc64 implementation) */
-
-extern struct kmem_cache *pgtable_cache;
-
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
unsigned long address)
{
- return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT);
+ pte_t *ptep;
+ int i;
+
+ ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!ptep)
+ return NULL;
+ for (i = 0; i < 1024; i++)
+ pte_clear(NULL, 0, ptep + i);
+ return ptep;
}

static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -59,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
return NULL;
page = virt_to_page(pte);
if (!pgtable_page_ctor(page)) {
- kmem_cache_free(pgtable_cache, pte);
+ __free_page(page);
return NULL;
}
return page;
@@ -67,13 +71,13 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,

static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
- kmem_cache_free(pgtable_cache, pte);
+ free_page((unsigned long)pte);
}

static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
pgtable_page_dtor(pte);
- kmem_cache_free(pgtable_cache, page_address(pte));
+ __free_page(pte);
}
#define pmd_pgtable(pmd) pmd_page(pmd)

diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 0fdf5d043f..216446295a 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -220,12 +220,11 @@ extern unsigned long empty_zero_page[1024];
#ifdef CONFIG_MMU
extern pgd_t swapper_pg_dir[PAGE_SIZE/sizeof(pgd_t)];
extern void paging_init(void);
-extern void pgtable_cache_init(void);
#else
# define swapper_pg_dir NULL
static inline void paging_init(void) { }
-static inline void pgtable_cache_init(void) { }
#endif
+static inline void pgtable_cache_init(void) { }

/*
* The pmd contains the kernel virtual address of the pte page.
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index a1077570e3..c43771c974 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -50,23 +50,3 @@ void __init init_mmu(void)
*/
set_ptevaddr_register(PGTABLE_START);
}
-
-struct kmem_cache *pgtable_cache __read_mostly;
-
-static void pgd_ctor(void *addr)
-{
- pte_t *ptep = (pte_t *)addr;
- int i;
-
- for (i = 0; i < 1024; i++, ptep++)
- pte_clear(NULL, 0, ptep);
-
-}
-
-void __init pgtable_cache_init(void)
-{
- pgtable_cache = kmem_cache_create("pgd",
- PAGE_SIZE, PAGE_SIZE,
- SLAB_HWCACHE_ALIGN,
- pgd_ctor);
-}
--
Kirill A. Shutemov

2013-10-15 20:44:08

by Max Filippov

[permalink] [raw]

Subject: Re: [PATCHv2 2/2] xtensa: use buddy allocator for PTE table

On Mon, Oct 14, 2013 at 7:44 PM, Kirill A. Shutemov
<[email protected]> wrote:
> Corrected patch below.
>
> From 0ba2ac687321f5ad7bac5f5c141da5b65b957fdc Mon Sep 17 00:00:00 2001
> From: "Kirill A. Shutemov" <[email protected]>
> Date: Mon, 14 Oct 2013 13:38:21 +0300
> Subject: [PATCHv3] xtensa: use buddy allocator for PTE table
>
> At the moment xtensa uses slab allocator for PTE table. It doesn't work
> with enabled split page table lock: slab uses page->slab_cache and
> page->first_page for its pages. These fields share stroage with
> page->ptl.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Cc: Chris Zankel <[email protected]>
> Cc: Max Filippov <[email protected]>
> ---
> v3:
> - return correct value from pte_alloc_one_kernel();
> v2:
> - add missed return in pte_alloc_one_kernel();

Acked-by: Max Filippov <[email protected]>

--
Thanks.
-- Max