2014-10-15 16:35:21

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 1/2] mm: Update generic gup implementation to handle hugepage directory

Update generic gup implementation with powerpc specific details.
On powerpc at pmd level we can have hugepte, normal pmd pointer
or a pointer to the hugepage directory.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
include/linux/hugetlb.h | 1 +
include/linux/mm.h | 26 +++++++++++
mm/gup.c | 113 +++++++++++++++++++++++-------------------------
3 files changed, 81 insertions(+), 59 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6e6d338641fe..65e12a24ce1d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -138,6 +138,7 @@ static inline void hugetlb_show_meminfo(void)
#define prepare_hugepage_range(file, addr, len) (-EINVAL)
#define pmd_huge(x) 0
#define pud_huge(x) 0
+#define pgd_huge(x) 0
#define is_hugepage_only_range(mm, addr, len) 0
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 02d11ee7f19d..f97732412cb4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1219,6 +1219,32 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
struct vm_area_struct **vmas);
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
+
+#ifdef CONFIG_HAVE_GENERIC_RCU_GUP
+#ifndef is_hugepd
+/*
+ * Some architectures support hugepage directory format that is
+ * required to support different hugetlbfs sizes.
+ */
+typedef struct { unsigned long pd; } hugepd_t;
+#define is_hugepd(hugepd) (0)
+#define __hugepd(x) ((hugepd_t) { (x) })
+static inline int gup_hugepd(hugepd_t hugepd, unsigned long addr,
+ unsigned pdshift, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ return 0;
+}
+#else
+extern int gup_hugepd(hugepd_t hugepd, unsigned long addr,
+ unsigned pdshift, unsigned long end,
+ int write, struct page **pages, int *nr);
+#endif
+extern int gup_huge_pte(pte_t orig, pte_t *ptep, unsigned long addr,
+ unsigned long sz, unsigned long end, int write,
+ struct page **pages, int *nr);
+#endif
+
struct kvec;
int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
struct page **pages);
diff --git a/mm/gup.c b/mm/gup.c
index cd62c8c90d4a..13c560ef9ddf 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -786,65 +786,31 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
}
#endif /* __HAVE_ARCH_PTE_SPECIAL */

-static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
+int gup_huge_pte(pte_t orig, pte_t *ptep, unsigned long addr,
+ unsigned long sz, unsigned long end, int write,
+ struct page **pages, int *nr)
{
- struct page *head, *page, *tail;
int refs;
+ unsigned long pte_end;
+ struct page *head, *page, *tail;

- if (write && !pmd_write(orig))
- return 0;
-
- refs = 0;
- head = pmd_page(orig);
- page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- tail = page;
- do {
- VM_BUG_ON_PAGE(compound_head(page) != head, page);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);

- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
+ if (write && !pte_write(orig))
return 0;
- }

- if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
- *nr -= refs;
- while (refs--)
- put_page(head);
+ if (!pte_present(orig))
return 0;
- }

- /*
- * Any tail pages need their mapcount reference taken before we
- * return. (This allows the THP code to bump their ref count when
- * they are split into base pages).
- */
- while (refs--) {
- if (PageTail(tail))
- get_huge_page_tail(tail);
- tail++;
- }
-
- return 1;
-}
-
-static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct page *head, *page, *tail;
- int refs;
+ pte_end = (addr + sz) & ~(sz-1);
+ if (pte_end < end)
+ end = pte_end;

- if (write && !pud_write(orig))
- return 0;
+ /* hugepages are never "special" */
+ VM_BUG_ON(!pfn_valid(pte_pfn(orig)));

refs = 0;
- head = pud_page(orig);
- page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ head = pte_page(orig);
+ page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
tail = page;
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
@@ -859,13 +825,18 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
return 0;
}

- if (unlikely(pud_val(orig) != pud_val(*pudp))) {
+ if (unlikely(pte_val(orig) != pte_val(*ptep))) {
*nr -= refs;
while (refs--)
put_page(head);
return 0;
}

+ /*
+ * Any tail pages need their mapcount reference taken before we
+ * return. (This allows the THP code to bump their ref count when
+ * they are split into base pages).
+ */
while (refs--) {
if (PageTail(tail))
get_huge_page_tail(tail);
@@ -898,10 +869,19 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
if (pmd_numa(pmd))
return 0;

- if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
- pages, nr))
+ if (!gup_huge_pte(__pte(pmd_val(pmd)), (pte_t *)pmdp,
+ addr, PMD_SIZE, next,
+ write, pages, nr))
return 0;

+ } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
+ /*
+ * architecture have different format for hugetlbfs
+ * pmd format and THP pmd format
+ */
+ if (!gup_hugepd(__hugepd(pmd_val(pmd)), addr, PMD_SHIFT,
+ next, write, pages, nr))
+ return 0;
} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
return 0;
} while (pmdp++, addr = next, addr != end);
@@ -909,22 +889,27 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
return 1;
}

-static int gup_pud_range(pgd_t *pgdp, unsigned long addr, unsigned long end,
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp;

- pudp = pud_offset(pgdp, addr);
+ pudp = pud_offset(&pgd, addr);
do {
pud_t pud = ACCESS_ONCE(*pudp);

next = pud_addr_end(addr, end);
if (pud_none(pud))
return 0;
- if (pud_huge(pud)) {
- if (!gup_huge_pud(pud, pudp, addr, next, write,
- pages, nr))
+ if (unlikely(pud_huge(pud))) {
+ if (!gup_huge_pte(__pte(pud_val(pud)), (pte_t *)pudp,
+ addr, PUD_SIZE, next,
+ write, pages, nr))
+ return 0;
+ } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
+ if (!gup_hugepd(__hugepd(pud_val(pud)), addr, PUD_SHIFT,
+ next, write, pages, nr))
return 0;
} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
return 0;
@@ -970,10 +955,21 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
+ pgd_t pgd = ACCESS_ONCE(*pgdp);
+
next = pgd_addr_end(addr, end);
- if (pgd_none(*pgdp))
+ if (pgd_none(pgd))
break;
- else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr))
+ if (unlikely(pgd_huge(pgd))) {
+ if (!gup_huge_pte(__pte(pgd_val(pgd)), (pte_t *)pgdp,
+ addr, PGDIR_SIZE, next,
+ write, pages, &nr))
+ break;
+ } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+ if (!gup_hugepd(__hugepd(pgd_val(pgd)), addr, PGDIR_SHIFT,
+ next, write, pages, &nr))
+ break;
+ } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
@@ -1028,5 +1024,4 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,

return ret;
}
-
#endif /* CONFIG_HAVE_GENERIC_RCU_GUP */
--
1.9.1


2014-10-15 16:35:23

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 2/2] arch/powerpc: Switch to generic RCU get_user_pages_fast

This patch switch the ppc arch to use the generic RCU based
gup implementation.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
arch/powerpc/Kconfig | 1 +
arch/powerpc/include/asm/hugetlb.h | 8 +-
arch/powerpc/include/asm/page.h | 4 +-
arch/powerpc/include/asm/pgtable-ppc64.h | 1 -
arch/powerpc/include/asm/pgtable.h | 5 -
arch/powerpc/mm/Makefile | 2 +-
arch/powerpc/mm/gup.c | 235 -------------------------------
arch/powerpc/mm/hugetlbpage.c | 27 ++--
8 files changed, 21 insertions(+), 262 deletions(-)
delete mode 100644 arch/powerpc/mm/gup.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 88eace4e28c3..7af887dc6aed 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -148,6 +148,7 @@ config PPC
select HAVE_ARCH_AUDITSYSCALL
select ARCH_SUPPORTS_ATOMIC_RMW
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
+ select HAVE_GENERIC_RCU_GUP

config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 623f2971ce0e..7855cce9c969 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd)
#endif /* CONFIG_PPC_BOOK3S_64 */


-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
unsigned pdshift)
{
/*
@@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
*/
unsigned long idx = 0;

- pte_t *dir = hugepd_page(*hpdp);
+ pte_t *dir = hugepd_page(hpd);
#ifndef CONFIG_PPC_FSL_BOOK3E
- idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
+ idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
#endif

return dir + idx;
@@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
}

#define hugepd_shift(x) 0
-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
unsigned pdshift)
{
return 0;
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 26fe1ae15212..aa430ec14895 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -379,12 +379,12 @@ static inline int hugepd_ok(hugepd_t hpd)
}
#endif

-#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep))))
+#define is_hugepd(hpd) (hugepd_ok(hpd))
int pgd_huge(pgd_t pgd);
#else /* CONFIG_HUGETLB_PAGE */
#define is_hugepd(pdep) 0
-#define pgd_huge(pgd) 0
#endif /* CONFIG_HUGETLB_PAGE */
+#define __hugepd(x) ((hugepd_t) { (x) })

struct page;
extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index ae153c40ab7c..29c36242cc6a 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -575,6 +575,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
*/
return true;
}
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 316f9a5da173..4a67c1ddb91b 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -274,11 +274,6 @@ extern void paging_init(void);
*/
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);

-extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr);
-
-extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr);
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0
#define has_transparent_hugepage() 0
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 325e861616a1..438dcd3fd0d1 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror

ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)

-obj-y := fault.o mem.o pgtable.o gup.o mmap.o \
+obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(CONFIG_WORD_SIZE).o \
pgtable_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
deleted file mode 100644
index d8746684f606..000000000000
--- a/arch/powerpc/mm/gup.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Lockless get_user_pages_fast for powerpc
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- */
-#undef DEBUG
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <linux/vmstat.h>
-#include <linux/pagemap.h>
-#include <linux/rwsem.h>
-#include <asm/pgtable.h>
-
-#ifdef __HAVE_ARCH_PTE_SPECIAL
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long mask, result;
- pte_t *ptep;
-
- result = _PAGE_PRESENT|_PAGE_USER;
- if (write)
- result |= _PAGE_RW;
- mask = result | _PAGE_SPECIAL;
-
- ptep = pte_offset_kernel(&pmd, addr);
- do {
- pte_t pte = ACCESS_ONCE(*ptep);
- struct page *page;
- /*
- * Similar to the PMD case, NUMA hinting must take slow path
- */
- if (pte_numa(pte))
- return 0;
-
- if ((pte_val(pte) & mask) != result)
- return 0;
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- page = pte_page(pte);
- if (!page_cache_get_speculative(page))
- return 0;
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_page(page);
- return 0;
- }
- pages[*nr] = page;
- (*nr)++;
-
- } while (ptep++, addr += PAGE_SIZE, addr != end);
-
- return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pmd_t *pmdp;
-
- pmdp = pmd_offset(&pud, addr);
- do {
- pmd_t pmd = ACCESS_ONCE(*pmdp);
-
- next = pmd_addr_end(addr, end);
- /*
- * If we find a splitting transparent hugepage we
- * return zero. That will result in taking the slow
- * path which will call wait_split_huge_page()
- * if the pmd is still in splitting state
- */
- if (pmd_none(pmd) || pmd_trans_splitting(pmd))
- return 0;
- if (pmd_huge(pmd) || pmd_large(pmd)) {
- /*
- * NUMA hinting faults need to be handled in the GUP
- * slowpath for accounting purposes and so that they
- * can be serialised against THP migration.
- */
- if (pmd_numa(pmd))
- return 0;
-
- if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
- write, pages, nr))
- return 0;
- } else if (is_hugepd(pmdp)) {
- if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
- addr, next, write, pages, nr))
- return 0;
- } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
- return 0;
- } while (pmdp++, addr = next, addr != end);
-
- return 1;
-}
-
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pud_t *pudp;
-
- pudp = pud_offset(&pgd, addr);
- do {
- pud_t pud = ACCESS_ONCE(*pudp);
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- return 0;
- if (pud_huge(pud)) {
- if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
- write, pages, nr))
- return 0;
- } else if (is_hugepd(pudp)) {
- if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
- addr, next, write, pages, nr))
- return 0;
- } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
- return 0;
- } while (pudp++, addr = next, addr != end);
-
- return 1;
-}
-
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- unsigned long flags;
- pgd_t *pgdp;
- int nr = 0;
-
- pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
-
- if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
- start, len)))
- return 0;
-
- pr_devel(" aligned: %lx .. %lx\n", start, end);
-
- /*
- * XXX: batch / limit 'nr', to avoid large irq off latency
- * needs some instrumenting to determine the common sizes used by
- * important workloads (eg. DB2), and whether limiting the batch size
- * will decrease performance.
- *
- * It seems like we're in the clear for the moment. Direct-IO is
- * the main guy that batches up lots of get_user_pages, and even
- * they are limited to 64-at-a-time which is not so many.
- */
- /*
- * This doesn't prevent pagetable teardown, but does prevent
- * the pagetables from being freed on powerpc.
- *
- * So long as we atomically load page table pointers versus teardown,
- * we can follow the address down to the the page and take a ref on it.
- */
- local_irq_save(flags);
-
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = ACCESS_ONCE(*pgdp);
-
- pr_devel(" %016lx: normal pgd %p\n", addr,
- (void *)pgd_val(pgd));
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (pgd_huge(pgd)) {
- if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
- write, pages, &nr))
- break;
- } else if (is_hugepd(pgdp)) {
- if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
- addr, next, write, pages, &nr))
- break;
- } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
-
- local_irq_restore(flags);
-
- return nr;
-}
-
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- int nr, ret;
-
- start &= PAGE_MASK;
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
- ret = nr;
-
- if (nr < nr_pages) {
- pr_devel(" slow path ! nr = %d\n", nr);
-
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
-
- down_read(&mm->mmap_sem);
- ret = get_user_pages(current, mm, start,
- nr_pages - nr, write, 0, pages, NULL);
- up_read(&mm->mmap_sem);
-
- /* Have to be a bit careful with return values */
- if (nr > 0) {
- if (ret < 0)
- ret = nr;
- else
- ret += nr;
- }
- }
-
- return ret;
-}
-
-#endif /* __HAVE_ARCH_PTE_SPECIAL */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7e70ae968e5f..b8d228ca50fd 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -230,7 +230,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
return NULL;

- return hugepte_offset(hpdp, addr, pdshift);
+ return hugepte_offset(*hpdp, addr, pdshift);
}

#else
@@ -270,7 +270,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
return NULL;

- return hugepte_offset(hpdp, addr, pdshift);
+ return hugepte_offset(*hpdp, addr, pdshift);
}
#endif

@@ -538,7 +538,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
do {
pmd = pmd_offset(pud, addr);
next = pmd_addr_end(addr, end);
- if (!is_hugepd(pmd)) {
+ if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
/*
* if it is not hugepd pointer, we should already find
* it cleared.
@@ -587,7 +587,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
do {
pud = pud_offset(pgd, addr);
next = pud_addr_end(addr, end);
- if (!is_hugepd(pud)) {
+ if (!is_hugepd(__hugepd(pud_val(*pud)))) {
if (pud_none_or_clear_bad(pud))
continue;
hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
@@ -653,7 +653,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
do {
next = pgd_addr_end(addr, end);
pgd = pgd_offset(tlb->mm, addr);
- if (!is_hugepd(pgd)) {
+ if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
if (pgd_none_or_clear_bad(pgd))
continue;
hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
@@ -713,18 +713,17 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
return (__boundary - 1 < end - 1) ? __boundary : end;
}

-int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
- unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
+int gup_hugepd(hugepd_t hugepd, unsigned long addr, unsigned pdshift,
+ unsigned long end, int write, struct page **pages, int *nr)
{
pte_t *ptep;
- unsigned long sz = 1UL << hugepd_shift(*hugepd);
+ unsigned long sz = 1UL << hugepd_shift(hugepd);
unsigned long next;

ptep = hugepte_offset(hugepd, addr, pdshift);
do {
next = hugepte_addr_end(addr, end, sz);
- if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
+ if (!gup_huge_pte(*ptep, ptep, addr, sz, end, write, pages, nr))
return 0;
} while (ptep++, addr = next, addr != end);

@@ -961,7 +960,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
else if (pgd_huge(pgd)) {
ret_pte = (pte_t *) pgdp;
goto out;
- } else if (is_hugepd(&pgd))
+ } else if (is_hugepd(__hugepd(pgd_val(pgd))))
hpdp = (hugepd_t *)&pgd;
else {
/*
@@ -978,7 +977,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
else if (pud_huge(pud)) {
ret_pte = (pte_t *) pudp;
goto out;
- } else if (is_hugepd(&pud))
+ } else if (is_hugepd(__hugepd(pud_val(pud))))
hpdp = (hugepd_t *)&pud;
else {
pdshift = PMD_SHIFT;
@@ -999,7 +998,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
if (pmd_huge(pmd) || pmd_large(pmd)) {
ret_pte = (pte_t *) pmdp;
goto out;
- } else if (is_hugepd(&pmd))
+ } else if (is_hugepd(__hugepd(pmd_val(pmd))))
hpdp = (hugepd_t *)&pmd;
else
return pte_offset_kernel(&pmd, ea);
@@ -1008,7 +1007,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
if (!hpdp)
return NULL;

- ret_pte = hugepte_offset(hpdp, ea, pdshift);
+ ret_pte = hugepte_offset(*hpdp, ea, pdshift);
pdshift = hugepd_shift(*hpdp);
out:
if (shift)
--
1.9.1

2014-10-16 09:25:41

by Steve Capper

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm: Update generic gup implementation to handle hugepage directory

On Wed, Oct 15, 2014 at 10:04:47PM +0530, Aneesh Kumar K.V wrote:
> Update generic gup implementation with powerpc specific details.
> On powerpc at pmd level we can have hugepte, normal pmd pointer
> or a pointer to the hugepage directory.
>
> Signed-off-by: Aneesh Kumar K.V <[email protected]>

Hi,
This patch causes compiler errors on arm and arm64 due to pgd_huge
being undefined. I've attached a fixup below, this fixup will require
that #define pgd_huge(pgd) 0 be added back into:
arch/powerpc/include/asm/page.h
For the second patch in this series.

Another avenue would be to do something like:
#ifndef pgd_huge
#define pgd_huge(pgd) (0)
#endif

Then no changes would be required to arm and arm64 (or other
architectures).

To help with bisectability, could we please have a suitable fix applied
to the two patches in the -mm tree:
http://ozlabs.org/~akpm/mmots/broken-out/mm-update-generic-gup-implementation-to-handle-hugepage-directory.patch
http://ozlabs.org/~akpm/mmots/broken-out/arch-powerpc-switch-to-generic-rcu-get_user_pages_fast.patch

rather than applied afterwards?

With pgd_huge(x) defined, this patch passes my futex test on arm
(Arndale platform) and arm64(Juno).

Cheers,
--
Steve



>From 2fb7b0308f0aca94c50611257ba82d656abb0768 Mon Sep 17 00:00:00 2001
From: Steve Capper <[email protected]>
Date: Thu, 16 Oct 2014 09:09:48 +0100
Subject: [PATCH] Fixup for Update generic gup implementation

The patch:
mm: Update generic gup implementation to handle hugepage directory

will not compile for arm or arm64 due to pgd_huge being undefined.

Signed-off-by: Steve Capper <[email protected]>
---
arch/arm/include/asm/pgtable.h | 2 ++
arch/arm64/include/asm/pgtable.h | 2 ++
include/linux/hugetlb.h | 1 -
3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 90aa4583..46f81fb 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -181,6 +181,8 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)

+#define pgd_huge(pgd) (0)
+
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_present(pmd) (pmd_val(pmd))

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 464c5ce..d4462ca 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -462,6 +462,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];

+#define pgd_huge(pgd) (0)
+
/*
* Encode and decode a swap entry:
* bits 0-1: present (must be zero)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 65e12a2..6e6d338 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -138,7 +138,6 @@ static inline void hugetlb_show_meminfo(void)
#define prepare_hugepage_range(file, addr, len) (-EINVAL)
#define pmd_huge(x) 0
#define pud_huge(x) 0
-#define pgd_huge(x) 0
#define is_hugepage_only_range(mm, addr, len) 0
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
--
1.9.3

2014-10-16 15:18:50

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm: Update generic gup implementation to handle hugepage directory

Steve Capper <[email protected]> writes:

> On Wed, Oct 15, 2014 at 10:04:47PM +0530, Aneesh Kumar K.V wrote:
>> Update generic gup implementation with powerpc specific details.
>> On powerpc at pmd level we can have hugepte, normal pmd pointer
>> or a pointer to the hugepage directory.
>>
>> Signed-off-by: Aneesh Kumar K.V <[email protected]>
>
> Hi,
> This patch causes compiler errors on arm and arm64 due to pgd_huge
> being undefined. I've attached a fixup below, this fixup will require
> that #define pgd_huge(pgd) 0 be added back into:
> arch/powerpc/include/asm/page.h
> For the second patch in this series.
>
> Another avenue would be to do something like:
> #ifndef pgd_huge
> #define pgd_huge(pgd) (0)
> #endif
>
> Then no changes would be required to arm and arm64 (or other
> architectures).
>
> To help with bisectability, could we please have a suitable fix applied
> to the two patches in the -mm tree:
> http://ozlabs.org/~akpm/mmots/broken-out/mm-update-generic-gup-implementation-to-handle-hugepage-directory.patch
> http://ozlabs.org/~akpm/mmots/broken-out/arch-powerpc-switch-to-generic-rcu-get_user_pages_fast.patch
>
> rather than applied afterwards?
>
> With pgd_huge(x) defined, this patch passes my futex test on arm
> (Arndale platform) and arm64(Juno).
>
> Cheers,
> --
> Steve
>
>
>
> From 2fb7b0308f0aca94c50611257ba82d656abb0768 Mon Sep 17 00:00:00 2001
> From: Steve Capper <[email protected]>
> Date: Thu, 16 Oct 2014 09:09:48 +0100
> Subject: [PATCH] Fixup for Update generic gup implementation
>
> The patch:
> mm: Update generic gup implementation to handle hugepage directory
>
> will not compile for arm or arm64 due to pgd_huge being undefined.
>
> Signed-off-by: Steve Capper <[email protected]>
> ---
> arch/arm/include/asm/pgtable.h | 2 ++
> arch/arm64/include/asm/pgtable.h | 2 ++
> include/linux/hugetlb.h | 1 -
> 3 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
> index 90aa4583..46f81fb 100644
> --- a/arch/arm/include/asm/pgtable.h
> +++ b/arch/arm/include/asm/pgtable.h
> @@ -181,6 +181,8 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
> /* to find an entry in a kernel page-table-directory */
> #define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
>
> +#define pgd_huge(pgd) (0)
> +
> #define pmd_none(pmd) (!pmd_val(pmd))
> #define pmd_present(pmd) (pmd_val(pmd))
>
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 464c5ce..d4462ca 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -462,6 +462,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
> extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
> extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
>
> +#define pgd_huge(pgd) (0)
> +
> /*
> * Encode and decode a swap entry:
> * bits 0-1: present (must be zero)
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 65e12a2..6e6d338 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -138,7 +138,6 @@ static inline void hugetlb_show_meminfo(void)
> #define prepare_hugepage_range(file, addr, len) (-EINVAL)
> #define pmd_huge(x) 0
> #define pud_huge(x) 0
> -#define pgd_huge(x) 0
> #define is_hugepage_only_range(mm, addr, len) 0
> #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
> #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })

don't do the last hunk, that will result in build failures on sub
platforms on ppc64. can you do the arm patch without making the change
to hugetlb.h ?

-aneesh

2014-10-16 15:42:45

by Steve Capper

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm: Update generic gup implementation to handle hugepage directory

On Thu, Oct 16, 2014 at 08:48:20PM +0530, Aneesh Kumar K.V wrote:
> Steve Capper <[email protected]> writes:
>
> > On Wed, Oct 15, 2014 at 10:04:47PM +0530, Aneesh Kumar K.V wrote:
> >> Update generic gup implementation with powerpc specific details.
> >> On powerpc at pmd level we can have hugepte, normal pmd pointer
> >> or a pointer to the hugepage directory.
> >>
> >> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> >
> > Hi,
> > This patch causes compiler errors on arm and arm64 due to pgd_huge
> > being undefined. I've attached a fixup below, this fixup will require
> > that #define pgd_huge(pgd) 0 be added back into:
> > arch/powerpc/include/asm/page.h
> > For the second patch in this series.
> >
> > Another avenue would be to do something like:
> > #ifndef pgd_huge
> > #define pgd_huge(pgd) (0)
> > #endif
> >
> > Then no changes would be required to arm and arm64 (or other
> > architectures).
> >
> > To help with bisectability, could we please have a suitable fix applied
> > to the two patches in the -mm tree:
> > http://ozlabs.org/~akpm/mmots/broken-out/mm-update-generic-gup-implementation-to-handle-hugepage-directory.patch
> > http://ozlabs.org/~akpm/mmots/broken-out/arch-powerpc-switch-to-generic-rcu-get_user_pages_fast.patch
> >
> > rather than applied afterwards?
> >
> > With pgd_huge(x) defined, this patch passes my futex test on arm
> > (Arndale platform) and arm64(Juno).
> >
> > Cheers,
> > --
> > Steve
> >
> >
> >
> > From 2fb7b0308f0aca94c50611257ba82d656abb0768 Mon Sep 17 00:00:00 2001
> > From: Steve Capper <[email protected]>
> > Date: Thu, 16 Oct 2014 09:09:48 +0100
> > Subject: [PATCH] Fixup for Update generic gup implementation
> >
> > The patch:
> > mm: Update generic gup implementation to handle hugepage directory
> >
> > will not compile for arm or arm64 due to pgd_huge being undefined.
> >
> > Signed-off-by: Steve Capper <[email protected]>
> > ---
> > arch/arm/include/asm/pgtable.h | 2 ++
> > arch/arm64/include/asm/pgtable.h | 2 ++
> > include/linux/hugetlb.h | 1 -
> > 3 files changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
> > index 90aa4583..46f81fb 100644
> > --- a/arch/arm/include/asm/pgtable.h
> > +++ b/arch/arm/include/asm/pgtable.h
> > @@ -181,6 +181,8 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
> > /* to find an entry in a kernel page-table-directory */
> > #define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
> >
> > +#define pgd_huge(pgd) (0)
> > +
> > #define pmd_none(pmd) (!pmd_val(pmd))
> > #define pmd_present(pmd) (pmd_val(pmd))
> >
> > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> > index 464c5ce..d4462ca 100644
> > --- a/arch/arm64/include/asm/pgtable.h
> > +++ b/arch/arm64/include/asm/pgtable.h
> > @@ -462,6 +462,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
> > extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
> > extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
> >
> > +#define pgd_huge(pgd) (0)
> > +
> > /*
> > * Encode and decode a swap entry:
> > * bits 0-1: present (must be zero)
> > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> > index 65e12a2..6e6d338 100644
> > --- a/include/linux/hugetlb.h
> > +++ b/include/linux/hugetlb.h
> > @@ -138,7 +138,6 @@ static inline void hugetlb_show_meminfo(void)
> > #define prepare_hugepage_range(file, addr, len) (-EINVAL)
> > #define pmd_huge(x) 0
> > #define pud_huge(x) 0
> > -#define pgd_huge(x) 0
> > #define is_hugepage_only_range(mm, addr, len) 0
> > #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
> > #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
>
> don't do the last hunk, that will result in build failures on sub
> platforms on ppc64. can you do the arm patch without making the change
> to hugetlb.h ?
>

Hi Aneesh,

The problem with leaving the empty pgd_huge in hugetlb.h is that we
would then have to resort to patterns like this for both arm and arm64:

#ifdef CONFIG_HUGETLB_PAGE
#define pgd_huge(pgd) (0)
#endif

If possible, I'd much rather just have:
#define pgd_huge(pgd) (0)

After the second patch in this series we already have the following
code pattern in arch/powerpc/include/asm/page.h:

#define is_hugepd(hpd) (hugepd_ok(hpd))
int pgd_huge(pgd_t pgd);
#else /* CONFIG_HUGETLB_PAGE */
#define is_hugepd(pdep) 0
#endif /* CONFIG_HUGETLB_PAGE */
#define __hugepd(x) ((hugepd_t) { (x) })

Can we not just add a:
#define pgd_huge(pgd) (0)
above the "#endif /* CONFIG_HUGETLB_PAGE */" line in the second patch?
(or, more precisely, prevent the second patch from removing this line).

That way we get a clearer code overall?

Cheers,
--
Steve

2014-10-16 18:05:26

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm: Update generic gup implementation to handle hugepage directory

Steve Capper <[email protected]> writes:

> Can we not just add a:
> #define pgd_huge(pgd) (0)
> above the "#endif /* CONFIG_HUGETLB_PAGE */" line in the second patch?
> (or, more precisely, prevent the second patch from removing this line).
>
> That way we get a clearer code overall?

it is strange to have both pmd_huge and pud_huge in hugetlb.h and
pgd_huge in page.h. But if that is what we want then we may need.


diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index aa430ec14895..aeca81947dc6 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -383,6 +383,7 @@ static inline int hugepd_ok(hugepd_t hpd)
int pgd_huge(pgd_t pgd);
#else /* CONFIG_HUGETLB_PAGE */
#define is_hugepd(pdep) 0
+#define pgd_huge(pgd) 0
#endif /* CONFIG_HUGETLB_PAGE */
#define __hugepd(x) ((hugepd_t) { (x) })