2014-11-05 16:28:51

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH V5 1/3] powerpc/mm: Add missing pmd accessors

This patch add documentation and missing accessors.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
arch/powerpc/include/asm/pgtable-ppc64-4k.h | 16 ++++++++-
arch/powerpc/include/asm/pgtable-ppc64-64k.h | 3 ++
arch/powerpc/include/asm/pgtable-ppc64.h | 51 +++++++++++++++++++++-------
arch/powerpc/mm/hugetlbpage.c | 3 ++
arch/powerpc/mm/pgtable_64.c | 22 ++++++++++--
5 files changed, 78 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h
index 7b935683f268..132ee1d482c2 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h
@@ -57,7 +57,21 @@
#define pgd_present(pgd) (pgd_val(pgd) != 0)
#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)
-#define pgd_page(pgd) virt_to_page(pgd_page_vaddr(pgd))
+
+#ifndef __ASSEMBLY__
+
+static inline pte_t pgd_pte(pgd_t pgd)
+{
+ return __pte(pgd_val(pgd));
+}
+
+static inline pgd_t pte_pgd(pte_t pte)
+{
+ return __pgd(pte_val(pte));
+}
+extern struct page *pgd_page(pgd_t pgd);
+
+#endif /* !__ASSEMBLY__ */

#define pud_offset(pgdp, addr) \
(((pud_t *) pgd_page_vaddr(*(pgdp))) + \
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
index a56b82fb0609..1de35bbd02a6 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
@@ -38,4 +38,7 @@
/* Bits to mask out from a PGD/PUD to get to the PMD page */
#define PUD_MASKED_BITS 0x1ff

+#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd })))
+#define pte_pgd(pte) ((pgd_t)pte_pud(pte))
+
#endif /* _ASM_POWERPC_PGTABLE_PPC64_64K_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index ae153c40ab7c..d12092420560 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -152,7 +152,7 @@
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \
|| (pmd_val(pmd) & PMD_BAD_BITS))
-#define pmd_present(pmd) (pmd_val(pmd) != 0)
+#define pmd_present(pmd) (!pmd_none(pmd))
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
extern struct page *pmd_page(pmd_t pmd);
@@ -164,9 +164,21 @@ extern struct page *pmd_page(pmd_t pmd);
#define pud_present(pud) (pud_val(pud) != 0)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0)
#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
-#define pud_page(pud) virt_to_page(pud_page_vaddr(pud))

+extern struct page *pud_page(pud_t pud);
+
+static inline pte_t pud_pte(pud_t pud)
+{
+ return __pte(pud_val(pud));
+}
+
+static inline pud_t pte_pud(pte_t pte)
+{
+ return __pud(pte_val(pte));
+}
+#define pud_write(pud) pte_write(pud_pte(pud))
#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
+#define pgd_write(pgd) pte_write(pgd_pte(pgd))

/*
* Find an entry in a page-table-directory. We combine the address region
@@ -422,7 +434,22 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd);
-
+/*
+ *
+ * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
+ * page. The hugetlbfs page table walking and mangling paths are totally
+ * separated form the core VM paths and they're differentiated by
+ * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
+ *
+ * pmd_trans_huge() is defined as false at build time if
+ * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
+ * time in such case.
+ *
+ * For ppc64 we need to differntiate from explicit hugepages from THP, because
+ * for THP we also track the subpage details at the pmd level. We don't do
+ * that for explicit huge pages.
+ *
+ */
static inline int pmd_trans_huge(pmd_t pmd)
{
/*
@@ -431,16 +458,6 @@ static inline int pmd_trans_huge(pmd_t pmd)
return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
}

-static inline int pmd_large(pmd_t pmd)
-{
- /*
- * leaf pte for huge page, bottom two bits != 00
- */
- if (pmd_trans_huge(pmd))
- return pmd_val(pmd) & _PAGE_PRESENT;
- return 0;
-}
-
static inline int pmd_trans_splitting(pmd_t pmd)
{
if (pmd_trans_huge(pmd))
@@ -451,6 +468,14 @@ static inline int pmd_trans_splitting(pmd_t pmd)
extern int has_transparent_hugepage(void);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

+static inline int pmd_large(pmd_t pmd)
+{
+ /*
+ * leaf pte for huge page, bottom two bits != 00
+ */
+ return ((pmd_val(pmd) & 0x3) != 0x0);
+}
+
static inline pte_t pmd_pte(pmd_t pmd)
{
return __pte(pmd_val(pmd));
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7e70ae968e5f..2e35b9a82929 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -62,6 +62,9 @@ static unsigned nr_gpages;
/*
* We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
* 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
+ *
+ * Defined in such a way that we can optimize away code block at build time
+ * if CONFIG_HUGETLB_PAGE=n.
*/
int pmd_huge(pmd_t pmd)
{
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index c8d709ab489d..e40ca0e3f2bf 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -36,6 +36,7 @@
#include <linux/bootmem.h>
#include <linux/memblock.h>
#include <linux/slab.h>
+#include <linux/hugetlb.h>

#include <asm/pgalloc.h>
#include <asm/page.h>
@@ -352,16 +353,31 @@ EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(__iounmap);
EXPORT_SYMBOL(__iounmap_at);

+#ifndef __PAGETABLE_PUD_FOLDED
+/* 4 level page table */
+struct page *pgd_page(pgd_t pgd)
+{
+ if (pgd_huge(pgd))
+ return pte_page(pgd_pte(pgd));
+ return virt_to_page(pgd_page_vaddr(pgd));
+}
+#endif
+
+struct page *pud_page(pud_t pud)
+{
+ if (pud_huge(pud))
+ return pte_page(pud_pte(pud));
+ return virt_to_page(pud_page_vaddr(pud));
+}
+
/*
* For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
* For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
*/
struct page *pmd_page(pmd_t pmd)
{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pmd_trans_huge(pmd))
+ if (pmd_trans_huge(pmd) || pmd_huge(pmd))
return pfn_to_page(pmd_pfn(pmd));
-#endif
return virt_to_page(pmd_page_vaddr(pmd));
}

--
2.1.0


2014-11-05 16:28:53

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH V5 2/3] mm: Update generic gup implementation to handle hugepage directory

Update generic gup implementation with powerpc specific details.
On powerpc at pmd level we can have hugepte, normal pmd pointer
or a pointer to the hugepage directory.

Tested-by: Steve Capper <[email protected]>
Acked-by: Steve Capper <[email protected]>
Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
Changes from V4:
* Add pmd accessor needed for the change to ppc64
* Drop the assumption that we can access pmd using pte_* functions.

arch/powerpc/include/asm/page.h | 1 +
include/linux/hugetlb.h | 46 +++++++++++++++++++++++
mm/gup.c | 81 +++++++++++++++++++++++++++++++++++++----
3 files changed, 120 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 26fe1ae15212..f973fce73a43 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -380,6 +380,7 @@ static inline int hugepd_ok(hugepd_t hpd)
#endif

#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep))))
+#define pgd_huge pgd_huge
int pgd_huge(pgd_t pgd);
#else /* CONFIG_HUGETLB_PAGE */
#define is_hugepd(pdep) 0
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6e6d338641fe..e6b62f30ab21 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -175,6 +175,52 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
}

#endif /* !CONFIG_HUGETLB_PAGE */
+/*
+ * hugepages at page global directory. If arch support
+ * hugepages at pgd level, they need to define this.
+ */
+#ifndef pgd_huge
+#define pgd_huge(x) 0
+#endif
+
+#ifndef pgd_write
+static inline int pgd_write(pgd_t pgd)
+{
+ BUG();
+ return 0;
+}
+#endif
+
+#ifndef pud_write
+static inline int pud_write(pud_t pud)
+{
+ BUG();
+ return 0;
+}
+#endif
+
+#ifndef is_hugepd
+/*
+ * Some architectures requires a hugepage directory format that is
+ * required to support multiple hugepage sizes. For example
+ * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables"
+ * introduced the same on powerpc. This allows for a more flexible hugepage
+ * pagetable layout.
+ */
+typedef struct { unsigned long pd; } hugepd_t;
+#define is_hugepd(hugepd) (0)
+#define __hugepd(x) ((hugepd_t) { (x) })
+static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
+ unsigned pdshift, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ return 0;
+}
+#else
+extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
+ unsigned pdshift, unsigned long end,
+ int write, struct page **pages, int *nr);
+#endif

#define HUGETLB_ANON_FILE "anon_hugepage"

diff --git a/mm/gup.c b/mm/gup.c
index cd62c8c90d4a..0ca1df9075ab 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -3,7 +3,6 @@
#include <linux/err.h>
#include <linux/spinlock.h>

-#include <linux/hugetlb.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
@@ -12,6 +11,7 @@

#include <linux/sched.h>
#include <linux/rwsem.h>
+#include <linux/hugetlb.h>
#include <asm/pgtable.h>

#include "internal.h"
@@ -875,6 +875,49 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
return 1;
}

+static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
+ unsigned long end, int write,
+ struct page **pages, int *nr)
+{
+ int refs;
+ struct page *head, *page, *tail;
+
+ if (write && !pgd_write(orig))
+ return 0;
+
+ refs = 0;
+ head = pgd_page(orig);
+ page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
+ tail = page;
+ do {
+ VM_BUG_ON_PAGE(compound_head(page) != head, page);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+ if (!page_cache_add_speculative(head, refs)) {
+ *nr -= refs;
+ return 0;
+ }
+
+ if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
+ *nr -= refs;
+ while (refs--)
+ put_page(head);
+ return 0;
+ }
+
+ while (refs--) {
+ if (PageTail(tail))
+ get_huge_page_tail(tail);
+ tail++;
+ }
+
+ return 1;
+}
+
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
@@ -902,6 +945,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
pages, nr))
return 0;

+ } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
+ /*
+ * architecture have different format for hugetlbfs
+ * pmd format and THP pmd format
+ */
+ if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
+ PMD_SHIFT, next, write, pages, nr))
+ return 0;
} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
return 0;
} while (pmdp++, addr = next, addr != end);
@@ -909,22 +960,26 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
return 1;
}

-static int gup_pud_range(pgd_t *pgdp, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp;

- pudp = pud_offset(pgdp, addr);
+ pudp = pud_offset(&pgd, addr);
do {
pud_t pud = ACCESS_ONCE(*pudp);

next = pud_addr_end(addr, end);
if (pud_none(pud))
return 0;
- if (pud_huge(pud)) {
+ if (unlikely(pud_huge(pud))) {
if (!gup_huge_pud(pud, pudp, addr, next, write,
- pages, nr))
+ pages, nr))
+ return 0;
+ } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
+ if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
+ PUD_SHIFT, next, write, pages, nr))
return 0;
} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
return 0;
@@ -970,10 +1025,20 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
+ pgd_t pgd = ACCESS_ONCE(*pgdp);
+
next = pgd_addr_end(addr, end);
- if (pgd_none(*pgdp))
+ if (pgd_none(pgd))
break;
- else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr))
+ if (unlikely(pgd_huge(pgd))) {
+ if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+ pages, &nr))
+ break;
+ } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+ if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+ PGDIR_SHIFT, next, write, pages, &nr))
+ break;
+ } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
--
2.1.0

2014-11-05 16:29:29

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH V5 3/3] powerpc/mm: Switch to generic RCU get_user_pages_fast

This patch switch the ppc arch to use the generic RCU based
gup implementation.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
arch/powerpc/Kconfig | 1 +
arch/powerpc/include/asm/hugetlb.h | 8 +-
arch/powerpc/include/asm/page.h | 3 +-
arch/powerpc/include/asm/pgtable-ppc64.h | 1 -
arch/powerpc/include/asm/pgtable.h | 6 +-
arch/powerpc/mm/Makefile | 2 +-
arch/powerpc/mm/gup.c | 235 -------------------------------
arch/powerpc/mm/hugetlbpage.c | 33 ++---
8 files changed, 22 insertions(+), 267 deletions(-)
delete mode 100644 arch/powerpc/mm/gup.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 88eace4e28c3..7af887dc6aed 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -148,6 +148,7 @@ config PPC
select HAVE_ARCH_AUDITSYSCALL
select ARCH_SUPPORTS_ATOMIC_RMW
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
+ select HAVE_GENERIC_RCU_GUP

config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 766b77d527ac..1d53a65b4ec1 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd)
#endif /* CONFIG_PPC_BOOK3S_64 */


-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
unsigned pdshift)
{
/*
@@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
*/
unsigned long idx = 0;

- pte_t *dir = hugepd_page(*hpdp);
+ pte_t *dir = hugepd_page(hpd);
#ifndef CONFIG_PPC_FSL_BOOK3E
- idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
+ idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
#endif

return dir + idx;
@@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
}

#define hugepd_shift(x) 0
-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
unsigned pdshift)
{
return 0;
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index f973fce73a43..69c059887a2c 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -379,13 +379,14 @@ static inline int hugepd_ok(hugepd_t hpd)
}
#endif

-#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep))))
+#define is_hugepd(hpd) (hugepd_ok(hpd))
#define pgd_huge pgd_huge
int pgd_huge(pgd_t pgd);
#else /* CONFIG_HUGETLB_PAGE */
#define is_hugepd(pdep) 0
#define pgd_huge(pgd) 0
#endif /* CONFIG_HUGETLB_PAGE */
+#define __hugepd(x) ((hugepd_t) { (x) })

struct page;
extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index d12092420560..5600e434332f 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -600,6 +600,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
*/
return true;
}
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 316f9a5da173..a8805fee0df9 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -274,11 +274,9 @@ extern void paging_init(void);
*/
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);

-extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr);
-
extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr);
+ unsigned long end, int write,
+ struct page **pages, int *nr);
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0
#define has_transparent_hugepage() 0
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 325e861616a1..438dcd3fd0d1 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror

ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)

-obj-y := fault.o mem.o pgtable.o gup.o mmap.o \
+obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(CONFIG_WORD_SIZE).o \
pgtable_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
deleted file mode 100644
index d8746684f606..000000000000
--- a/arch/powerpc/mm/gup.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Lockless get_user_pages_fast for powerpc
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- */
-#undef DEBUG
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <linux/vmstat.h>
-#include <linux/pagemap.h>
-#include <linux/rwsem.h>
-#include <asm/pgtable.h>
-
-#ifdef __HAVE_ARCH_PTE_SPECIAL
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long mask, result;
- pte_t *ptep;
-
- result = _PAGE_PRESENT|_PAGE_USER;
- if (write)
- result |= _PAGE_RW;
- mask = result | _PAGE_SPECIAL;
-
- ptep = pte_offset_kernel(&pmd, addr);
- do {
- pte_t pte = ACCESS_ONCE(*ptep);
- struct page *page;
- /*
- * Similar to the PMD case, NUMA hinting must take slow path
- */
- if (pte_numa(pte))
- return 0;
-
- if ((pte_val(pte) & mask) != result)
- return 0;
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- page = pte_page(pte);
- if (!page_cache_get_speculative(page))
- return 0;
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_page(page);
- return 0;
- }
- pages[*nr] = page;
- (*nr)++;
-
- } while (ptep++, addr += PAGE_SIZE, addr != end);
-
- return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pmd_t *pmdp;
-
- pmdp = pmd_offset(&pud, addr);
- do {
- pmd_t pmd = ACCESS_ONCE(*pmdp);
-
- next = pmd_addr_end(addr, end);
- /*
- * If we find a splitting transparent hugepage we
- * return zero. That will result in taking the slow
- * path which will call wait_split_huge_page()
- * if the pmd is still in splitting state
- */
- if (pmd_none(pmd) || pmd_trans_splitting(pmd))
- return 0;
- if (pmd_huge(pmd) || pmd_large(pmd)) {
- /*
- * NUMA hinting faults need to be handled in the GUP
- * slowpath for accounting purposes and so that they
- * can be serialised against THP migration.
- */
- if (pmd_numa(pmd))
- return 0;
-
- if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
- write, pages, nr))
- return 0;
- } else if (is_hugepd(pmdp)) {
- if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
- addr, next, write, pages, nr))
- return 0;
- } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
- return 0;
- } while (pmdp++, addr = next, addr != end);
-
- return 1;
-}
-
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pud_t *pudp;
-
- pudp = pud_offset(&pgd, addr);
- do {
- pud_t pud = ACCESS_ONCE(*pudp);
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- return 0;
- if (pud_huge(pud)) {
- if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
- write, pages, nr))
- return 0;
- } else if (is_hugepd(pudp)) {
- if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
- addr, next, write, pages, nr))
- return 0;
- } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
- return 0;
- } while (pudp++, addr = next, addr != end);
-
- return 1;
-}
-
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- unsigned long flags;
- pgd_t *pgdp;
- int nr = 0;
-
- pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
-
- if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
- start, len)))
- return 0;
-
- pr_devel(" aligned: %lx .. %lx\n", start, end);
-
- /*
- * XXX: batch / limit 'nr', to avoid large irq off latency
- * needs some instrumenting to determine the common sizes used by
- * important workloads (eg. DB2), and whether limiting the batch size
- * will decrease performance.
- *
- * It seems like we're in the clear for the moment. Direct-IO is
- * the main guy that batches up lots of get_user_pages, and even
- * they are limited to 64-at-a-time which is not so many.
- */
- /*
- * This doesn't prevent pagetable teardown, but does prevent
- * the pagetables from being freed on powerpc.
- *
- * So long as we atomically load page table pointers versus teardown,
- * we can follow the address down to the the page and take a ref on it.
- */
- local_irq_save(flags);
-
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = ACCESS_ONCE(*pgdp);
-
- pr_devel(" %016lx: normal pgd %p\n", addr,
- (void *)pgd_val(pgd));
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (pgd_huge(pgd)) {
- if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
- write, pages, &nr))
- break;
- } else if (is_hugepd(pgdp)) {
- if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
- addr, next, write, pages, &nr))
- break;
- } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
-
- local_irq_restore(flags);
-
- return nr;
-}
-
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- int nr, ret;
-
- start &= PAGE_MASK;
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
- ret = nr;
-
- if (nr < nr_pages) {
- pr_devel(" slow path ! nr = %d\n", nr);
-
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
-
- down_read(&mm->mmap_sem);
- ret = get_user_pages(current, mm, start,
- nr_pages - nr, write, 0, pages, NULL);
- up_read(&mm->mmap_sem);
-
- /* Have to be a bit careful with return values */
- if (nr > 0) {
- if (ret < 0)
- ret = nr;
- else
- ret += nr;
- }
- }
-
- return ret;
-}
-
-#endif /* __HAVE_ARCH_PTE_SPECIAL */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 2e35b9a82929..d54d74ccdc5a 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -233,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
return NULL;

- return hugepte_offset(hpdp, addr, pdshift);
+ return hugepte_offset(*hpdp, addr, pdshift);
}

#else
@@ -273,7 +273,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
return NULL;

- return hugepte_offset(hpdp, addr, pdshift);
+ return hugepte_offset(*hpdp, addr, pdshift);
}
#endif

@@ -541,7 +541,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
do {
pmd = pmd_offset(pud, addr);
next = pmd_addr_end(addr, end);
- if (!is_hugepd(pmd)) {
+ if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
/*
* if it is not hugepd pointer, we should already find
* it cleared.
@@ -590,7 +590,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
do {
pud = pud_offset(pgd, addr);
next = pud_addr_end(addr, end);
- if (!is_hugepd(pud)) {
+ if (!is_hugepd(__hugepd(pud_val(*pud)))) {
if (pud_none_or_clear_bad(pud))
continue;
hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
@@ -656,7 +656,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
do {
next = pgd_addr_end(addr, end);
pgd = pgd_offset(tlb->mm, addr);
- if (!is_hugepd(pgd)) {
+ if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
if (pgd_none_or_clear_bad(pgd))
continue;
hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
@@ -716,12 +716,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
return (__boundary - 1 < end - 1) ? __boundary : end;
}

-int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
- unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
+int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift,
+ unsigned long end, int write, struct page **pages, int *nr)
{
pte_t *ptep;
- unsigned long sz = 1UL << hugepd_shift(*hugepd);
+ unsigned long sz = 1UL << hugepd_shift(hugepd);
unsigned long next;

ptep = hugepte_offset(hugepd, addr, pdshift);
@@ -964,7 +963,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
else if (pgd_huge(pgd)) {
ret_pte = (pte_t *) pgdp;
goto out;
- } else if (is_hugepd(&pgd))
+ } else if (is_hugepd(__hugepd(pgd_val(pgd))))
hpdp = (hugepd_t *)&pgd;
else {
/*
@@ -981,7 +980,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
else if (pud_huge(pud)) {
ret_pte = (pte_t *) pudp;
goto out;
- } else if (is_hugepd(&pud))
+ } else if (is_hugepd(__hugepd(pud_val(pud))))
hpdp = (hugepd_t *)&pud;
else {
pdshift = PMD_SHIFT;
@@ -1002,7 +1001,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
if (pmd_huge(pmd) || pmd_large(pmd)) {
ret_pte = (pte_t *) pmdp;
goto out;
- } else if (is_hugepd(&pmd))
+ } else if (is_hugepd(__hugepd(pmd_val(pmd))))
hpdp = (hugepd_t *)&pmd;
else
return pte_offset_kernel(&pmd, ea);
@@ -1011,7 +1010,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
if (!hpdp)
return NULL;

- ret_pte = hugepte_offset(hpdp, ea, pdshift);
+ ret_pte = hugepte_offset(*hpdp, ea, pdshift);
pdshift = hugepd_shift(*hpdp);
out:
if (shift)
@@ -1041,14 +1040,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
if ((pte_val(pte) & mask) != mask)
return 0;

-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- /*
- * check for splitting here
- */
- if (pmd_trans_splitting(pte_pmd(pte)))
- return 0;
-#endif
-
/* hugepages are never "special" */
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));

--
2.1.0