2018-01-17 09:22:45

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 1/5] powerpc/mm: Enhance 'slice' for supporting PPC32

In preparation for the following patch which will fix an issue on
the 8xx by re-using the 'slices', this patch enhances the
'slices' implementation to support 32 bits CPUs.

On PPC32, the address space is limited to 4Gbytes, hence only the low
slices will be used. As of today, the code uses
SLICE_LOW_TOP (0x100000000ul) and compares it with addr to determine
if addr refers to low or high space.
On PPC32, such a (addr < SLICE_LOW_TOP) test is always false because
0x100000000ul degrades to 0. Therefore, the patch modifies
SLICE_LOW_TOP to (0xfffffffful) and modifies the tests to
(addr <= SLICE_LOW_TOP) which will then always be true on PPC32
as addr has type 'unsigned long' while not modifying the PPC64
behaviour.

This patch moves "slices" functions prototypes from page64.h to page.h

The high slices use bitmaps. As bitmap functions are not prepared to
handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
slice_bitmap_xxx() macros which will take care of the 0 nbits case.

Signed-off-by: Christophe Leroy <[email protected]>
---
v2: First patch of v1 serie split in two parts ; added slice_bitmap_xxx() macros.

arch/powerpc/include/asm/page.h | 14 +++++++++
arch/powerpc/include/asm/page_32.h | 19 ++++++++++++
arch/powerpc/include/asm/page_64.h | 21 ++-----------
arch/powerpc/mm/hash_utils_64.c | 2 +-
arch/powerpc/mm/mmu_context_nohash.c | 7 +++++
arch/powerpc/mm/slice.c | 60 ++++++++++++++++++++++++------------
6 files changed, 83 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 8da5d4c1cab2..d0384f9db9eb 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -342,6 +342,20 @@ typedef struct page *pgtable_t;
#endif
#endif

+#ifdef CONFIG_PPC_MM_SLICES
+struct mm_struct;
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ unsigned long flags, unsigned int psize,
+ int topdown);
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
+
+void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long len, unsigned int psize);
+#endif
+
#include <asm-generic/memory_model.h>
#endif /* __ASSEMBLY__ */

diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index 5c378e9b78c8..f7d1bd1183c8 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -60,4 +60,23 @@ extern void copy_page(void *to, void *from);

#endif /* __ASSEMBLY__ */

+#ifdef CONFIG_PPC_MM_SLICES
+
+#define SLICE_LOW_SHIFT 28
+#define SLICE_HIGH_SHIFT 0
+
+#define SLICE_LOW_TOP (0xfffffffful)
+#define SLICE_NUM_LOW ((SLICE_LOW_TOP >> SLICE_LOW_SHIFT) + 1)
+#define SLICE_NUM_HIGH 0ul
+
+#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
+#define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
+#endif
#endif /* _ASM_POWERPC_PAGE_32_H */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 56234c6fcd61..a7baef5bbe5f 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -91,30 +91,13 @@ extern u64 ppc64_pft_size;
#define SLICE_LOW_SHIFT 28
#define SLICE_HIGH_SHIFT 40

-#define SLICE_LOW_TOP (0x100000000ul)
-#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define SLICE_LOW_TOP (0xfffffffful)
+#define SLICE_NUM_LOW ((SLICE_LOW_TOP >> SLICE_LOW_SHIFT) + 1)
#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)

#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)

-#ifndef __ASSEMBLY__
-struct mm_struct;
-
-extern unsigned long slice_get_unmapped_area(unsigned long addr,
- unsigned long len,
- unsigned long flags,
- unsigned int psize,
- int topdown);
-
-extern unsigned int get_slice_psize(struct mm_struct *mm,
- unsigned long addr);
-
-extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
-extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long len, unsigned int psize);
-
-#endif /* __ASSEMBLY__ */
#else
#define slice_init()
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 655a5a9a183d..3266b3326088 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1101,7 +1101,7 @@ static unsigned int get_paca_psize(unsigned long addr)
unsigned char *hpsizes;
unsigned long index, mask_index;

- if (addr < SLICE_LOW_TOP) {
+ if (addr <= SLICE_LOW_TOP) {
lpsizes = get_paca()->mm_ctx_low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
return (lpsizes >> (index * 4)) & 0xF;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 4554d6527682..42e02f5b6660 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -331,6 +331,13 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
pr_hard("initing context for mm @%p\n", mm);

+#ifdef CONFIG_PPC_MM_SLICES
+ if (!mm->context.slb_addr_limit)
+ mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+ if (!mm->context.id)
+ slice_set_user_psize(mm, mmu_virtual_psize);
+#endif
+
mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0;
return 0;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 23ec2c5e3b78..3f35a93afe13 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -67,16 +67,33 @@ static void slice_print_mask(const char *label, struct slice_mask mask) {}

#endif

+#define slice_bitmap_zero(dst, nbits) \
+ do { if (nbits) bitmap_zero(dst, nbits); } while (0)
+#define slice_bitmap_set(dst, pos, nbits) \
+ do { if (nbits) bitmap_set(dst, pos, nbits); } while (0)
+#define slice_bitmap_copy(dst, src, nbits) \
+ do { if (nbits) bitmap_copy(dst, src, nbits); } while (0)
+#define slice_bitmap_and(dst, src1, src2, nbits) \
+ ({ (nbits) ? bitmap_and(dst, src1, src2, nbits) : 0; })
+#define slice_bitmap_or(dst, src1, src2, nbits) \
+ do { if (nbits) bitmap_or(dst, src1, src2, nbits); } while (0)
+#define slice_bitmap_andnot(dst, src1, src2, nbits) \
+ ({ (nbits) ? bitmap_andnot(dst, src1, src2, nbits) : 0; })
+#define slice_bitmap_equal(src1, src2, nbits) \
+ ({ (nbits) ? bitmap_equal(src1, src2, nbits) : 1; })
+#define slice_bitmap_empty(src, nbits) \
+ ({ (nbits) ? bitmap_empty(src, nbits) : 1; })
+
static void slice_range_to_mask(unsigned long start, unsigned long len,
struct slice_mask *ret)
{
unsigned long end = start + len - 1;

ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);

- if (start < SLICE_LOW_TOP) {
- unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
+ if (start <= SLICE_LOW_TOP) {
+ unsigned long mend = min(end, SLICE_LOW_TOP);

ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
- (1u << GET_LOW_SLICE_INDEX(start));
@@ -87,7 +104,7 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;

- bitmap_set(ret->high_slices, start_index, count);
+ slice_bitmap_set(ret->high_slices, start_index, count);
}
}

@@ -117,7 +134,7 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
* of the high or low area bitmaps, the first high area starts
* at 4GB, not 0 */
if (start == 0)
- start = SLICE_LOW_TOP;
+ start = SLICE_LOW_TOP + 1;

return !slice_area_is_free(mm, start, end - start);
}
@@ -128,7 +145,7 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
unsigned long i;

ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);

for (i = 0; i < SLICE_NUM_LOW; i++)
if (!slice_low_has_vma(mm, i))
@@ -151,7 +168,7 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
u64 lpsizes;

ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);

lpsizes = mm->context.low_slices_psize;
for (i = 0; i < SLICE_NUM_LOW; i++)
@@ -180,11 +197,11 @@ static int slice_check_fit(struct mm_struct *mm,
*/
unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);

- bitmap_and(result, mask.high_slices,
- available.high_slices, slice_count);
+ slice_bitmap_and(result, mask.high_slices,
+ available.high_slices, slice_count);

return (mask.low_slices & available.low_slices) == mask.low_slices &&
- bitmap_equal(result, mask.high_slices, slice_count);
+ slice_bitmap_equal(result, mask.high_slices, slice_count));
}

static void slice_flush_segments(void *parm)
@@ -259,7 +276,7 @@ static bool slice_scan_available(unsigned long addr,
unsigned long *boundary_addr)
{
unsigned long slice;
- if (addr < SLICE_LOW_TOP) {
+ if (addr <= SLICE_LOW_TOP) {
slice = GET_LOW_SLICE_INDEX(addr);
*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
return !!(available.low_slices & (1u << slice));
@@ -391,8 +408,9 @@ static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
DECLARE_BITMAP(result, SLICE_NUM_HIGH);

dst->low_slices |= src->low_slices;
- bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
- bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+ slice_bitmap_or(result, dst->high_slices, src->high_slices,
+ SLICE_NUM_HIGH);
+ slice_bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
}

static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
@@ -401,8 +419,9 @@ static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *

dst->low_slices &= ~src->low_slices;

- bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
- bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+ slice_bitmap_andnot(result, dst->high_slices, src->high_slices,
+ SLICE_NUM_HIGH);
+ slice_bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
}

#ifdef CONFIG_PPC_64K_PAGES
@@ -450,14 +469,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* init different masks
*/
mask.low_slices = 0;
- bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);

/* silence stupid warning */;
potential_mask.low_slices = 0;
- bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);

compat_mask.low_slices = 0;
- bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);

/* Sanity checks */
BUG_ON(mm->task_size == 0);
@@ -595,7 +614,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
convert:
slice_andnot_mask(&mask, &good_mask);
slice_andnot_mask(&mask, &compat_mask);
- if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
+ if (mask.low_slices ||
+ !slice_bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
slice_convert(mm, mask, psize);
if (psize > MMU_PAGE_BASE)
on_each_cpu(slice_flush_segments, mm, 1);
@@ -640,7 +660,7 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
return MMU_PAGE_4K;
#endif
}
- if (addr < SLICE_LOW_TOP) {
+ if (addr <= SLICE_LOW_TOP) {
u64 lpsizes;
lpsizes = mm->context.low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
--
2.13.3


2018-01-17 09:22:56

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 5/5] powerpc/mm: Remove intermediate bitmap copy in 'slices'

bitmap_or() and bitmap_andnot() can work properly with dst identical
to src1 or src2. There is no need of an intermediate result bitmap
that is copied back to dst in a second step.

Signed-off-by: Christophe Leroy <[email protected]>
---
v2: New in v2

arch/powerpc/mm/slice.c | 15 +++------------
1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index fa6f7f63223c..9d88b1c03caa 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -72,8 +72,6 @@ static void slice_print_mask(const char *label, struct slice_mask mask) {}
do { if (nbits) bitmap_zero(dst, nbits); } while (0)
#define slice_bitmap_set(dst, pos, nbits) \
do { if (nbits) bitmap_set(dst, pos, nbits); } while (0)
-#define slice_bitmap_copy(dst, src, nbits) \
- do { if (nbits) bitmap_copy(dst, src, nbits); } while (0)
#define slice_bitmap_and(dst, src1, src2, nbits) \
({ (nbits) ? bitmap_and(dst, src1, src2, nbits) : 0; })
#define slice_bitmap_or(dst, src1, src2, nbits) \
@@ -416,25 +414,18 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,

static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
slice_bitmap_or(dst->low_slices, dst->low_slices, src->low_slices,
SLICE_NUM_LOW);
- slice_bitmap_or(result, dst->high_slices, src->high_slices,
+ slice_bitmap_or(dst->high_slices, dst->high_slices, src->high_slices,
SLICE_NUM_HIGH);
- slice_bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
}

static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
slice_bitmap_andnot(dst->low_slices, dst->low_slices, src->low_slices,
SLICE_NUM_LOW);
-
- slice_bitmap_andnot(result, dst->high_slices, src->high_slices,
- SLICE_NUM_HIGH);
- slice_bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+ slice_bitmap_andnot(dst->high_slices, dst->high_slices,
+ src->high_slices, SLICE_NUM_HIGH);
}

#ifdef CONFIG_PPC_64K_PAGES
--
2.13.3

2018-01-17 09:22:53

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 2/5] powerpc/32: Fix hugepage allocation on 8xx at hint address

On the 8xx, the page size is set in the PMD entry and applies to
all pages of the page table pointed by the said PMD entry.

When an app has some regular pages allocated (e.g. see below) and tries
to mmap() a huge page at a hint address covered by the same PMD entry,
the kernel accepts the hint allthough the 8xx cannot handle different
page sizes in the same PMD entry.

10000000-10001000 r-xp 00000000 00:0f 2597 /root/malloc
10010000-10011000 rwxp 00000000 00:0f 2597 /root/malloc

mmap(0x10080000, 524288, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x10080000

This results the app remaining forever in do_page_fault()/hugetlb_fault()
and when interrupting that app, we get the following warning:

[162980.035629] WARNING: CPU: 0 PID: 2777 at arch/powerpc/mm/hugetlbpage.c:354 hugetlb_free_pgd_range+0xc8/0x1e4
[162980.035699] CPU: 0 PID: 2777 Comm: malloc Tainted: G W 4.14.6 #85
[162980.035744] task: c67e2c00 task.stack: c668e000
[162980.035783] NIP: c000fe18 LR: c00e1eec CTR: c00f90c0
[162980.035830] REGS: c668fc20 TRAP: 0700 Tainted: G W (4.14.6)
[162980.035854] MSR: 00029032 <EE,ME,IR,DR,RI> CR: 24044224 XER: 20000000
[162980.036003]
[162980.036003] GPR00: c00e1eec c668fcd0 c67e2c00 00000010 c6869410 10080000 00000000 77fb4000
[162980.036003] GPR08: ffff0001 0683c001 00000000 ffffff80 44028228 10018a34 00004008 418004fc
[162980.036003] GPR16: c668e000 00040100 c668e000 c06c0000 c668fe78 c668e000 c6835ba0 c668fd48
[162980.036003] GPR24: 00000000 73ffffff 74000000 00000001 77fb4000 100fffff 10100000 10100000
[162980.036743] NIP [c000fe18] hugetlb_free_pgd_range+0xc8/0x1e4
[162980.036839] LR [c00e1eec] free_pgtables+0x12c/0x150
[162980.036861] Call Trace:
[162980.036939] [c668fcd0] [c00f0774] unlink_anon_vmas+0x1c4/0x214 (unreliable)
[162980.037040] [c668fd10] [c00e1eec] free_pgtables+0x12c/0x150
[162980.037118] [c668fd40] [c00eabac] exit_mmap+0xe8/0x1b4
[162980.037210] [c668fda0] [c0019710] mmput.part.9+0x20/0xd8
[162980.037301] [c668fdb0] [c001ecb0] do_exit+0x1f0/0x93c
[162980.037386] [c668fe00] [c001f478] do_group_exit+0x40/0xcc
[162980.037479] [c668fe10] [c002a76c] get_signal+0x47c/0x614
[162980.037570] [c668fe70] [c0007840] do_signal+0x54/0x244
[162980.037654] [c668ff30] [c0007ae8] do_notify_resume+0x34/0x88
[162980.037744] [c668ff40] [c000dae8] do_user_signal+0x74/0xc4
[162980.037781] Instruction dump:
[162980.037821] 7fdff378 81370000 54a3463a 80890020 7d24182e 7c841a14 712a0004 4082ff94
[162980.038014] 2f890000 419e0010 712a0ff0 408200e0 <0fe00000> 54a9000a 7f984840 419d0094
[162980.038216] ---[ end trace c0ceeca8e7a5800a ]---
[162980.038754] BUG: non-zero nr_ptes on freeing mm: 1
[162985.363322] BUG: non-zero nr_ptes on freeing mm: -1

In order to fix this, this patch uses the address space "slices"
implemented for BOOK3S/64 and enhanced to support PPC32 by the
preceding patch.

This patch modifies the context.id on the 8xx to be in the range
[1:16] instead of [0:15] in order to identify context.id == 0 as
not initialised contexts as done on BOOK3S

This patch activates CONFIG_PPC_MM_SLICES when CONFIG_HUGETLB_PAGE is
selected for the 8xx

Alltough we could in theory have as many slices as PMD entries, the
current slices implementation limits the number of low slices to 16.
This limitation is not preventing us to fix the initial issue allthough
it is suboptimal. It will be cured in a subsequent patch.

Fixes: 4b91428699477 ("powerpc/8xx: Implement support of hugepages")
Signed-off-by: Christophe Leroy <[email protected]>
---
v2: First patch of v1 serie split in two parts

arch/powerpc/include/asm/mmu-8xx.h | 6 ++++++
arch/powerpc/kernel/setup-common.c | 2 ++
arch/powerpc/mm/8xx_mmu.c | 2 +-
arch/powerpc/mm/hugetlbpage.c | 2 ++
arch/powerpc/mm/mmu_context_nohash.c | 4 ++--
arch/powerpc/mm/slice.c | 2 ++
arch/powerpc/platforms/Kconfig.cputype | 1 +
7 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 5bb3dbede41a..5f89b6010453 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -169,6 +169,12 @@ typedef struct {
unsigned int id;
unsigned int active;
unsigned long vdso_base;
+#ifdef CONFIG_PPC_MM_SLICES
+ u16 user_psize; /* page size index */
+ u64 low_slices_psize; /* page size encodings */
+ unsigned char high_slices_psize[0];
+ unsigned long slb_addr_limit;
+#endif
} mm_context_t;

#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 9d213542a48b..67075a1cff36 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -927,6 +927,8 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PPC64
if (!radix_enabled())
init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#elif defined(CONFIG_PPC_8xx)
+ init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
#else
#error "context.addr_limit not initialized."
#endif
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index f29212e40f40..0be77709446c 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd)
mtspr(SPRN_M_TW, __pa(pgd) - offset);

/* Update context */
- mtspr(SPRN_M_CASID, id);
+ mtspr(SPRN_M_CASID, id - 1);
/* sync */
mb();
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a9b9083c5e49..79e1378ee303 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -553,9 +553,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));

+#ifdef CONFIG_PPC_RADIX_MMU
if (radix_enabled())
return radix__hugetlb_get_unmapped_area(file, addr, len,
pgoff, flags);
+#endif
return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
}
#endif
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 42e02f5b6660..c1e1bf186871 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -435,8 +435,8 @@ void __init mmu_context_init(void)
* -- BenH
*/
if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
- first_context = 0;
- last_context = 15;
+ first_context = 1;
+ last_context = 16;
no_selective_tlbil = true;
} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
first_context = 1;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 3f35a93afe13..b617acf35836 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -206,6 +206,7 @@ static int slice_check_fit(struct mm_struct *mm,

static void slice_flush_segments(void *parm)
{
+#ifdef CONFIG_PPC_BOOK3S_64
struct mm_struct *mm = parm;
unsigned long flags;

@@ -217,6 +218,7 @@ static void slice_flush_segments(void *parm)
local_irq_save(flags);
slb_flush_and_rebolt();
local_irq_restore(flags);
+#endif
}

static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index ae07470fde3c..73a7ea333e9e 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -334,6 +334,7 @@ config PPC_BOOK3E_MMU
config PPC_MM_SLICES
bool
default y if PPC_BOOK3S_64
+ default y if PPC_8xx && HUGETLB_PAGE
default n

config PPC_HAVE_PMU_SUPPORT
--
2.13.3

2018-01-17 09:23:15

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 4/5] powerpc/8xx: Increase the number of mm slices

On the 8xx, we can have as many slices as PMD entries.
This means we could have 1024 slices in 4k size pages mode
and 64 slices in 16k size pages.

However, due to a stack overflow in slice_get_unmapped_area(),
we limit to 512 slices.

Signed-off-by: Christophe Leroy <[email protected]>
---
v2: no change

arch/powerpc/include/asm/mmu-8xx.h | 6 +++++-
arch/powerpc/include/asm/page_32.h | 3 ++-
2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index d669d0062da4..40aa7b0cd0dc 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -171,7 +171,11 @@ typedef struct {
unsigned long vdso_base;
#ifdef CONFIG_PPC_MM_SLICES
u16 user_psize; /* page size index */
- unsigned char low_slices_psize[8]; /* 16 slices */
+#if defined(CONFIG_PPC_16K_PAGES)
+ unsigned char low_slices_psize[32]; /* 64 slices */
+#else
+ unsigned char low_slices_psize[256]; /* 512 slices */
+#endif
unsigned char high_slices_psize[0];
unsigned long slb_addr_limit;
#endif
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index f7d1bd1183c8..43695ce7ee07 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -62,7 +62,8 @@ extern void copy_page(void *to, void *from);

#ifdef CONFIG_PPC_MM_SLICES

-#define SLICE_LOW_SHIFT 28
+/* SLICE_LOW_SHIFT >= 23 to avoid stack overflow in slice_get_unmapped_area() */
+#define SLICE_LOW_SHIFT (PMD_SHIFT > 23 ? PMD_SHIFT : 23)
#define SLICE_HIGH_SHIFT 0

#define SLICE_LOW_TOP (0xfffffffful)
--
2.13.3

2018-01-17 09:23:43

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v2 3/5] powerpc/mm: Allow more than 16 low slices

While the implementation of the "slices" address space allows
a significant amount of high slices, it limits the number of
low slices to 16 due to the use of a single u64 low_slices_psize
element in struct mm_context_t

On the 8xx, the minimum slice size is the size of the area
covered by a single PMD entry, ie 4M in 4K pages mode and 64M in
16K pages mode. This means we could have resp. up to 1024 and 64
slices.

In order to override this limitation, this patch switches the
handling of low_slices to BITMAPs as done already for high_slices.

Signed-off-by: Christophe Leroy <[email protected]>
---
v2: Usign slice_bitmap_xxx() macros instead of bitmap_xxx() functions.

arch/powerpc/include/asm/book3s/64/mmu.h | 2 +-
arch/powerpc/include/asm/mmu-8xx.h | 2 +-
arch/powerpc/include/asm/paca.h | 2 +-
arch/powerpc/kernel/paca.c | 3 +-
arch/powerpc/mm/hash_utils_64.c | 13 ++--
arch/powerpc/mm/slb_low.S | 8 ++-
arch/powerpc/mm/slice.c | 104 +++++++++++++++++--------------
7 files changed, 74 insertions(+), 60 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index c9448e19847a..27e7e9732ea1 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -91,7 +91,7 @@ typedef struct {
struct npu_context *npu_context;

#ifdef CONFIG_PPC_MM_SLICES
- u64 low_slices_psize; /* SLB page size encodings */
+ unsigned char low_slices_psize[8]; /* SLB page size encodings */
unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
unsigned long slb_addr_limit;
#else
diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 5f89b6010453..d669d0062da4 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -171,7 +171,7 @@ typedef struct {
unsigned long vdso_base;
#ifdef CONFIG_PPC_MM_SLICES
u16 user_psize; /* page size index */
- u64 low_slices_psize; /* page size encodings */
+ unsigned char low_slices_psize[8]; /* 16 slices */
unsigned char high_slices_psize[0];
unsigned long slb_addr_limit;
#endif
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 23ac7fc0af23..0a6eea59a1c1 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -141,7 +141,7 @@ struct paca_struct {
#ifdef CONFIG_PPC_BOOK3S
mm_context_id_t mm_ctx_id;
#ifdef CONFIG_PPC_MM_SLICES
- u64 mm_ctx_low_slices_psize;
+ unsigned char mm_ctx_low_slices_psize[8];
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
unsigned long mm_ctx_slb_addr_limit;
#else
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index d6597038931d..8e1566bf82b8 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -264,7 +264,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
#ifdef CONFIG_PPC_MM_SLICES
VM_BUG_ON(!mm->context.slb_addr_limit);
get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
- get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+ memcpy(&get_paca()->mm_ctx_low_slices_psize,
+ &context->low_slices_psize, sizeof(context->low_slices_psize));
memcpy(&get_paca()->mm_ctx_high_slices_psize,
&context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
#else /* CONFIG_PPC_MM_SLICES */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 3266b3326088..2f0c6b527a83 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1097,19 +1097,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
#ifdef CONFIG_PPC_MM_SLICES
static unsigned int get_paca_psize(unsigned long addr)
{
- u64 lpsizes;
- unsigned char *hpsizes;
+ unsigned char *psizes;
unsigned long index, mask_index;

if (addr <= SLICE_LOW_TOP) {
- lpsizes = get_paca()->mm_ctx_low_slices_psize;
+ psizes = get_paca()->mm_ctx_low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xF;
+ } else {
+ psizes = get_paca()->mm_ctx_high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
}
- hpsizes = get_paca()->mm_ctx_high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
}

#else
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 2cf5ef3fc50d..2c7c717fd2ea 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -200,10 +200,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
5:
/*
* Handle lpsizes
- * r9 is get_paca()->context.low_slices_psize, r11 is index
+ * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
*/
- ld r9,PACALOWSLICESPSIZE(r13)
- mr r11,r10
+ srdi r11,r10,1 /* index */
+ addi r9,r11,PACALOWSLICESPSIZE
+ lbzx r9,r13,r9 /* r9 is lpsizes[r11] */
+ rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */
6:
sldi r11,r11,2 /* index * 4 */
/* Extract the psize and multiply to get an array offset */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index b617acf35836..fa6f7f63223c 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(slice_convert_lock);
* in 1TB size.
*/
struct slice_mask {
- u64 low_slices;
+ DECLARE_BITMAP(low_slices, SLICE_NUM_LOW);
DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
};

@@ -54,7 +54,8 @@ static void slice_print_mask(const char *label, struct slice_mask mask)
{
if (!_slice_debug)
return;
- pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices);
+ pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW,
+ mask.low_slices);
pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices);
}

@@ -89,14 +90,17 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
{
unsigned long end = start + len - 1;

- ret->low_slices = 0;
+ slice_bitmap_zero(ret->low_slices, SLICE_NUM_LOW);
slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);

if (start <= SLICE_LOW_TOP) {
unsigned long mend = min(end, SLICE_LOW_TOP);
+ unsigned long start_index = GET_LOW_SLICE_INDEX(start);
+ unsigned long align_end = ALIGN(mend, (1UL << SLICE_LOW_SHIFT));
+ unsigned long count = GET_LOW_SLICE_INDEX(align_end) -
+ start_index;

- ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
- - (1u << GET_LOW_SLICE_INDEX(start));
+ slice_bitmap_set(ret->low_slices, start_index, count);
}

if ((start + len) > SLICE_LOW_TOP) {
@@ -144,12 +148,12 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
{
unsigned long i;

- ret->low_slices = 0;
+ slice_bitmap_zero(ret->low_slices, SLICE_NUM_LOW);
slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);

for (i = 0; i < SLICE_NUM_LOW; i++)
if (!slice_low_has_vma(mm, i))
- ret->low_slices |= 1u << i;
+ __set_bit(i, ret->low_slices);

if (high_limit <= SLICE_LOW_TOP)
return;
@@ -162,18 +166,20 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
unsigned long high_limit)
{
- unsigned char *hpsizes;
+ unsigned char *hpsizes, *lpsizes;
int index, mask_index;
unsigned long i;
- u64 lpsizes;

- ret->low_slices = 0;
+ slice_bitmap_zero(ret->low_slices, SLICE_NUM_LOW);
slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);

lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == psize)
- ret->low_slices |= 1u << i;
+ for (i = 0; i < SLICE_NUM_LOW; i++) {
+ mask_index = i & 0x1;
+ index = i >> 1;
+ if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
+ __set_bit(i, ret->low_slices);
+ }

if (high_limit <= SLICE_LOW_TOP)
return;
@@ -190,6 +196,7 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
static int slice_check_fit(struct mm_struct *mm,
struct slice_mask mask, struct slice_mask available)
{
+ DECLARE_BITMAP(result_low, SLICE_NUM_LOW);
DECLARE_BITMAP(result, SLICE_NUM_HIGH);
/*
* Make sure we just do bit compare only to the max
@@ -197,11 +204,13 @@ static int slice_check_fit(struct mm_struct *mm,
*/
unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);

+ slice_bitmap_and(result_low, mask.low_slices,
+ available.low_slices, SLICE_NUM_LOW);
slice_bitmap_and(result, mask.high_slices,
available.high_slices, slice_count);

- return (mask.low_slices & available.low_slices) == mask.low_slices &&
- slice_bitmap_equal(result, mask.high_slices, slice_count));
+ return slice_bitmap_equal(result_low, mask.low_slices, SLICE_NUM_LOW) &&
+ slice_bitmap_equal(result, mask.high_slices, slice_count);
}

static void slice_flush_segments(void *parm)
@@ -225,8 +234,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
{
int index, mask_index;
/* Write the new slice psize bits */
- unsigned char *hpsizes;
- u64 lpsizes;
+ unsigned char *hpsizes, *lpsizes;
unsigned long i, flags;

slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
@@ -238,13 +246,14 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
spin_lock_irqsave(&slice_convert_lock, flags);

lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (mask.low_slices & (1u << i))
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
-
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
+ for (i = 0; i < SLICE_NUM_LOW; i++) {
+ mask_index = i & 0x1;
+ index = i >> 1;
+ if (test_bit(i, mask.low_slices))
+ lpsizes[index] = (lpsizes[index] &
+ ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
+ }

hpsizes = mm->context.high_slices_psize;
for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
@@ -281,7 +290,7 @@ static bool slice_scan_available(unsigned long addr,
if (addr <= SLICE_LOW_TOP) {
slice = GET_LOW_SLICE_INDEX(addr);
*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
- return !!(available.low_slices & (1u << slice));
+ return !!test_bit(slice, available.low_slices);
} else {
slice = GET_HIGH_SLICE_INDEX(addr);
*boundary_addr = (slice + end) ?
@@ -409,7 +418,8 @@ static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
{
DECLARE_BITMAP(result, SLICE_NUM_HIGH);

- dst->low_slices |= src->low_slices;
+ slice_bitmap_or(dst->low_slices, dst->low_slices, src->low_slices,
+ SLICE_NUM_LOW);
slice_bitmap_or(result, dst->high_slices, src->high_slices,
SLICE_NUM_HIGH);
slice_bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
@@ -419,7 +429,8 @@ static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *
{
DECLARE_BITMAP(result, SLICE_NUM_HIGH);

- dst->low_slices &= ~src->low_slices;
+ slice_bitmap_andnot(dst->low_slices, dst->low_slices, src->low_slices,
+ SLICE_NUM_LOW);

slice_bitmap_andnot(result, dst->high_slices, src->high_slices,
SLICE_NUM_HIGH);
@@ -470,14 +481,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/*
* init different masks
*/
- mask.low_slices = 0;
+ slice_bitmap_zero(mask.low_slices, SLICE_NUM_LOW);
slice_bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);

/* silence stupid warning */;
- potential_mask.low_slices = 0;
+ slice_bitmap_zero(potential_mask.low_slices, SLICE_NUM_LOW);
slice_bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);

- compat_mask.low_slices = 0;
+ slice_bitmap_zero(compat_mask.low_slices, SLICE_NUM_LOW);
slice_bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);

/* Sanity checks */
@@ -616,7 +627,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
convert:
slice_andnot_mask(&mask, &good_mask);
slice_andnot_mask(&mask, &compat_mask);
- if (mask.low_slices ||
+ if (!slice_bitmap_empty(mask.low_slices, SLICE_NUM_LOW) ||
!slice_bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
slice_convert(mm, mask, psize);
if (psize > MMU_PAGE_BASE)
@@ -649,7 +660,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,

unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
{
- unsigned char *hpsizes;
+ unsigned char *psizes;
int index, mask_index;

/*
@@ -663,15 +674,14 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
#endif
}
if (addr <= SLICE_LOW_TOP) {
- u64 lpsizes;
- lpsizes = mm->context.low_slices_psize;
+ psizes = mm->context.low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xf;
+ } else {
+ psizes = mm->context.high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
}
- hpsizes = mm->context.high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
}
EXPORT_SYMBOL_GPL(get_slice_psize);

@@ -692,8 +702,8 @@ EXPORT_SYMBOL_GPL(get_slice_psize);
void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
{
int index, mask_index;
- unsigned char *hpsizes;
- unsigned long flags, lpsizes;
+ unsigned char *hpsizes, *lpsizes;
+ unsigned long flags;
unsigned int old_psize;
int i;

@@ -711,12 +721,14 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
wmb();

lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
+ for (i = 0; i < SLICE_NUM_LOW; i++) {
+ mask_index = i & 0x1;
+ index = i >> 1;
+ if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
+ lpsizes[index] = (lpsizes[index] &
+ ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
+ }

hpsizes = mm->context.high_slices_psize;
for (i = 0; i < SLICE_NUM_HIGH; i++) {
--
2.13.3

2018-01-19 08:25:39

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] powerpc/mm: Enhance 'slice' for supporting PPC32

Christophe Leroy <[email protected]> writes:

> In preparation for the following patch which will fix an issue on
> the 8xx by re-using the 'slices', this patch enhances the
> 'slices' implementation to support 32 bits CPUs.
>
> On PPC32, the address space is limited to 4Gbytes, hence only the low
> slices will be used. As of today, the code uses
> SLICE_LOW_TOP (0x100000000ul) and compares it with addr to determine
> if addr refers to low or high space.
> On PPC32, such a (addr < SLICE_LOW_TOP) test is always false because
> 0x100000000ul degrades to 0. Therefore, the patch modifies
> SLICE_LOW_TOP to (0xfffffffful) and modifies the tests to
> (addr <= SLICE_LOW_TOP) which will then always be true on PPC32
> as addr has type 'unsigned long' while not modifying the PPC64
> behaviour.
>
> This patch moves "slices" functions prototypes from page64.h to page.h
>
> The high slices use bitmaps. As bitmap functions are not prepared to
> handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
> slice_bitmap_xxx() macros which will take care of the 0 nbits case.
>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> v2: First patch of v1 serie split in two parts ; added slice_bitmap_xxx() macros.
>
> arch/powerpc/include/asm/page.h | 14 +++++++++
> arch/powerpc/include/asm/page_32.h | 19 ++++++++++++
> arch/powerpc/include/asm/page_64.h | 21 ++-----------
> arch/powerpc/mm/hash_utils_64.c | 2 +-
> arch/powerpc/mm/mmu_context_nohash.c | 7 +++++
> arch/powerpc/mm/slice.c | 60 ++++++++++++++++++++++++------------
> 6 files changed, 83 insertions(+), 40 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
> index 8da5d4c1cab2..d0384f9db9eb 100644
> --- a/arch/powerpc/include/asm/page.h
> +++ b/arch/powerpc/include/asm/page.h
> @@ -342,6 +342,20 @@ typedef struct page *pgtable_t;
> #endif
> #endif
>
> +#ifdef CONFIG_PPC_MM_SLICES
> +struct mm_struct;
> +
> +unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
> + unsigned long flags, unsigned int psize,
> + int topdown);
> +
> +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
> +
> +void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
> +void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
> + unsigned long len, unsigned int psize);
> +#endif
> +

Should we do a slice.h ? the way we have other files? and then do

arch/powerpc/include/asm/book3s/64/slice.h that will carry
#define slice_bitmap_zero(dst, nbits) \
do { if (nbits) bitmap_zero(dst, nbits); } while (0)
#define slice_bitmap_set(dst, pos, nbits) \
do { if (nbits) bitmap_set(dst, pos, nbits); } while (0)
#define slice_bitmap_copy(dst, src, nbits) \
do { if (nbits) bitmap_copy(dst, src, nbits); } while (0)
#define slice_bitmap_and(dst, src1, src2, nbits) \
({ (nbits) ? bitmap_and(dst, src1, src2, nbits) : 0; })
#define slice_bitmap_or(dst, src1, src2, nbits) \
do { if (nbits) bitmap_or(dst, src1, src2, nbits); } while (0)
#define slice_bitmap_andnot(dst, src1, src2, nbits) \
({ (nbits) ? bitmap_andnot(dst, src1, src2, nbits) : 0; })
#define slice_bitmap_equal(src1, src2, nbits) \
({ (nbits) ? bitmap_equal(src1, src2, nbits) : 1; })
#define slice_bitmap_empty(src, nbits) \
({ (nbits) ? bitmap_empty(src, nbits) : 1; })

This without that if(nbits) check and a proper static inline so that we
can do type checking.

also related definitions for
#define SLICE_LOW_SHIFT 28
#define SLICE_HIGH_SHIFT 0

#define SLICE_LOW_TOP (0xfffffffful)
#define SLICE_NUM_LOW ((SLICE_LOW_TOP >> SLICE_LOW_SHIFT) + 1)
+#define SLICE_NUM_HIGH 0ul

Common stuff between 64 and 32 can got to
arch/powerpc/include/asm/slice.h ?

It also gives an indication of which 32 bit version we are looking at
here. IIUC 8xx will got to arch/powerpc/include/asm/nohash/32/slice.h?

> #include <asm-generic/memory_model.h>
> #endif /* __ASSEMBLY__ */
>
> diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
> index 5c378e9b78c8..f7d1bd1183c8 100644
> --- a/arch/powerpc/include/asm/page_32.h
> +++ b/arch/powerpc/include/asm/page_32.h
> @@ -60,4 +60,23 @@ extern void copy_page(void *to, void *from);
>
> #endif /* __ASSEMBLY__ */
>
> +#ifdef CONFIG_PPC_MM_SLICES
> +
> +#define SLICE_LOW_SHIFT 28
> +#define SLICE_HIGH_SHIFT 0
> +
> +#define SLICE_LOW_TOP (0xfffffffful)
> +#define SLICE_NUM_LOW ((SLICE_LOW_TOP >> SLICE_LOW_SHIFT) + 1)
> +#define SLICE_NUM_HIGH 0ul
> +
> +#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
> +#define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
> +
> +#ifdef CONFIG_HUGETLB_PAGE
> +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
> +#endif
> +#define HAVE_ARCH_UNMAPPED_AREA
> +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
> +
> +#endif
> #endif /* _ASM_POWERPC_PAGE_32_H */
> diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
> index 56234c6fcd61..a7baef5bbe5f 100644
> --- a/arch/powerpc/include/asm/page_64.h
> +++ b/arch/powerpc/include/asm/page_64.h
> @@ -91,30 +91,13 @@ extern u64 ppc64_pft_size;
> #define SLICE_LOW_SHIFT 28
> #define SLICE_HIGH_SHIFT 40
>
> -#define SLICE_LOW_TOP (0x100000000ul)
> -#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
> +#define SLICE_LOW_TOP (0xfffffffful)
> +#define SLICE_NUM_LOW ((SLICE_LOW_TOP >> SLICE_LOW_SHIFT) + 1)
> #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
>
> #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
> #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
>
> -#ifndef __ASSEMBLY__
> -struct mm_struct;
> -
> -extern unsigned long slice_get_unmapped_area(unsigned long addr,
> - unsigned long len,
> - unsigned long flags,
> - unsigned int psize,
> - int topdown);
> -
> -extern unsigned int get_slice_psize(struct mm_struct *mm,
> - unsigned long addr);
> -
> -extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
> -extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
> - unsigned long len, unsigned int psize);
> -
> -#endif /* __ASSEMBLY__ */
> #else
> #define slice_init()
> #ifdef CONFIG_PPC_BOOK3S_64
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index 655a5a9a183d..3266b3326088 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -1101,7 +1101,7 @@ static unsigned int get_paca_psize(unsigned long addr)
> unsigned char *hpsizes;
> unsigned long index, mask_index;
>
> - if (addr < SLICE_LOW_TOP) {
> + if (addr <= SLICE_LOW_TOP) {
> lpsizes = get_paca()->mm_ctx_low_slices_psize;
> index = GET_LOW_SLICE_INDEX(addr);
> return (lpsizes >> (index * 4)) & 0xF;
> diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
> index 4554d6527682..42e02f5b6660 100644
> --- a/arch/powerpc/mm/mmu_context_nohash.c
> +++ b/arch/powerpc/mm/mmu_context_nohash.c
> @@ -331,6 +331,13 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
> {
> pr_hard("initing context for mm @%p\n", mm);
>
> +#ifdef CONFIG_PPC_MM_SLICES
> + if (!mm->context.slb_addr_limit)
> + mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
> + if (!mm->context.id)
> + slice_set_user_psize(mm, mmu_virtual_psize);
> +#endif
> +
> mm->context.id = MMU_NO_CONTEXT;
> mm->context.active = 0;
> return 0;
> diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
> index 23ec2c5e3b78..3f35a93afe13 100644
> --- a/arch/powerpc/mm/slice.c
> +++ b/arch/powerpc/mm/slice.c
> @@ -67,16 +67,33 @@ static void slice_print_mask(const char *label, struct slice_mask mask) {}
>
> #endif
>
> +#define slice_bitmap_zero(dst, nbits) \
> + do { if (nbits) bitmap_zero(dst, nbits); } while (0)
> +#define slice_bitmap_set(dst, pos, nbits) \
> + do { if (nbits) bitmap_set(dst, pos, nbits); } while (0)
> +#define slice_bitmap_copy(dst, src, nbits) \
> + do { if (nbits) bitmap_copy(dst, src, nbits); } while (0)
> +#define slice_bitmap_and(dst, src1, src2, nbits) \
> + ({ (nbits) ? bitmap_and(dst, src1, src2, nbits) : 0; })
> +#define slice_bitmap_or(dst, src1, src2, nbits) \
> + do { if (nbits) bitmap_or(dst, src1, src2, nbits); } while (0)
> +#define slice_bitmap_andnot(dst, src1, src2, nbits) \
> + ({ (nbits) ? bitmap_andnot(dst, src1, src2, nbits) : 0; })
> +#define slice_bitmap_equal(src1, src2, nbits) \
> + ({ (nbits) ? bitmap_equal(src1, src2, nbits) : 1; })
> +#define slice_bitmap_empty(src, nbits) \
> + ({ (nbits) ? bitmap_empty(src, nbits) : 1; })
> +
> static void slice_range_to_mask(unsigned long start, unsigned long len,
> struct slice_mask *ret)
> {
> unsigned long end = start + len - 1;
>
> ret->low_slices = 0;
> - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
> + slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>
> - if (start < SLICE_LOW_TOP) {
> - unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
> + if (start <= SLICE_LOW_TOP) {
> + unsigned long mend = min(end, SLICE_LOW_TOP);
>
> ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
> - (1u << GET_LOW_SLICE_INDEX(start));
> @@ -87,7 +104,7 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
> unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
> unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
>
> - bitmap_set(ret->high_slices, start_index, count);
> + slice_bitmap_set(ret->high_slices, start_index, count);
> }
> }
>
> @@ -117,7 +134,7 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
> * of the high or low area bitmaps, the first high area starts
> * at 4GB, not 0 */
> if (start == 0)
> - start = SLICE_LOW_TOP;
> + start = SLICE_LOW_TOP + 1;
>
> return !slice_area_is_free(mm, start, end - start);
> }
> @@ -128,7 +145,7 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
> unsigned long i;
>
> ret->low_slices = 0;
> - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
> + slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>
> for (i = 0; i < SLICE_NUM_LOW; i++)
> if (!slice_low_has_vma(mm, i))
> @@ -151,7 +168,7 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
> u64 lpsizes;
>
> ret->low_slices = 0;
> - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
> + slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>
> lpsizes = mm->context.low_slices_psize;
> for (i = 0; i < SLICE_NUM_LOW; i++)
> @@ -180,11 +197,11 @@ static int slice_check_fit(struct mm_struct *mm,
> */
> unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
>
> - bitmap_and(result, mask.high_slices,
> - available.high_slices, slice_count);
> + slice_bitmap_and(result, mask.high_slices,
> + available.high_slices, slice_count);
>
> return (mask.low_slices & available.low_slices) == mask.low_slices &&
> - bitmap_equal(result, mask.high_slices, slice_count);
> + slice_bitmap_equal(result, mask.high_slices, slice_count));
> }
>
> static void slice_flush_segments(void *parm)
> @@ -259,7 +276,7 @@ static bool slice_scan_available(unsigned long addr,
> unsigned long *boundary_addr)
> {
> unsigned long slice;
> - if (addr < SLICE_LOW_TOP) {
> + if (addr <= SLICE_LOW_TOP) {
> slice = GET_LOW_SLICE_INDEX(addr);
> *boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
> return !!(available.low_slices & (1u << slice));
> @@ -391,8 +408,9 @@ static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
> DECLARE_BITMAP(result, SLICE_NUM_HIGH);
>
> dst->low_slices |= src->low_slices;
> - bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
> - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
> + slice_bitmap_or(result, dst->high_slices, src->high_slices,
> + SLICE_NUM_HIGH);
> + slice_bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
> }
>
> static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
> @@ -401,8 +419,9 @@ static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *
>
> dst->low_slices &= ~src->low_slices;
>
> - bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
> - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
> + slice_bitmap_andnot(result, dst->high_slices, src->high_slices,
> + SLICE_NUM_HIGH);
> + slice_bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
> }
>
> #ifdef CONFIG_PPC_64K_PAGES
> @@ -450,14 +469,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
> * init different masks
> */
> mask.low_slices = 0;
> - bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
> + slice_bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
>
> /* silence stupid warning */;
> potential_mask.low_slices = 0;
> - bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
> + slice_bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
>
> compat_mask.low_slices = 0;
> - bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
> + slice_bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
>
> /* Sanity checks */
> BUG_ON(mm->task_size == 0);
> @@ -595,7 +614,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
> convert:
> slice_andnot_mask(&mask, &good_mask);
> slice_andnot_mask(&mask, &compat_mask);
> - if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
> + if (mask.low_slices ||
> + !slice_bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
> slice_convert(mm, mask, psize);
> if (psize > MMU_PAGE_BASE)
> on_each_cpu(slice_flush_segments, mm, 1);
> @@ -640,7 +660,7 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
> return MMU_PAGE_4K;
> #endif
> }
> - if (addr < SLICE_LOW_TOP) {
> + if (addr <= SLICE_LOW_TOP) {
> u64 lpsizes;
> lpsizes = mm->context.low_slices_psize;
> index = GET_LOW_SLICE_INDEX(addr);
> --
> 2.13.3


2018-01-19 08:27:35

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v2 2/5] powerpc/32: Fix hugepage allocation on 8xx at hint address

Christophe Leroy <[email protected]> writes:

> On the 8xx, the page size is set in the PMD entry and applies to
> all pages of the page table pointed by the said PMD entry.
>
> When an app has some regular pages allocated (e.g. see below) and tries
> to mmap() a huge page at a hint address covered by the same PMD entry,
> the kernel accepts the hint allthough the 8xx cannot handle different
> page sizes in the same PMD entry.
>
> 10000000-10001000 r-xp 00000000 00:0f 2597 /root/malloc
> 10010000-10011000 rwxp 00000000 00:0f 2597 /root/malloc
>
> mmap(0x10080000, 524288, PROT_READ|PROT_WRITE,
> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x10080000
>
> This results the app remaining forever in do_page_fault()/hugetlb_fault()
> and when interrupting that app, we get the following warning:
>
> [162980.035629] WARNING: CPU: 0 PID: 2777 at arch/powerpc/mm/hugetlbpage.c:354 hugetlb_free_pgd_range+0xc8/0x1e4
> [162980.035699] CPU: 0 PID: 2777 Comm: malloc Tainted: G W 4.14.6 #85
> [162980.035744] task: c67e2c00 task.stack: c668e000
> [162980.035783] NIP: c000fe18 LR: c00e1eec CTR: c00f90c0
> [162980.035830] REGS: c668fc20 TRAP: 0700 Tainted: G W (4.14.6)
> [162980.035854] MSR: 00029032 <EE,ME,IR,DR,RI> CR: 24044224 XER: 20000000
> [162980.036003]
> [162980.036003] GPR00: c00e1eec c668fcd0 c67e2c00 00000010 c6869410 10080000 00000000 77fb4000
> [162980.036003] GPR08: ffff0001 0683c001 00000000 ffffff80 44028228 10018a34 00004008 418004fc
> [162980.036003] GPR16: c668e000 00040100 c668e000 c06c0000 c668fe78 c668e000 c6835ba0 c668fd48
> [162980.036003] GPR24: 00000000 73ffffff 74000000 00000001 77fb4000 100fffff 10100000 10100000
> [162980.036743] NIP [c000fe18] hugetlb_free_pgd_range+0xc8/0x1e4
> [162980.036839] LR [c00e1eec] free_pgtables+0x12c/0x150
> [162980.036861] Call Trace:
> [162980.036939] [c668fcd0] [c00f0774] unlink_anon_vmas+0x1c4/0x214 (unreliable)
> [162980.037040] [c668fd10] [c00e1eec] free_pgtables+0x12c/0x150
> [162980.037118] [c668fd40] [c00eabac] exit_mmap+0xe8/0x1b4
> [162980.037210] [c668fda0] [c0019710] mmput.part.9+0x20/0xd8
> [162980.037301] [c668fdb0] [c001ecb0] do_exit+0x1f0/0x93c
> [162980.037386] [c668fe00] [c001f478] do_group_exit+0x40/0xcc
> [162980.037479] [c668fe10] [c002a76c] get_signal+0x47c/0x614
> [162980.037570] [c668fe70] [c0007840] do_signal+0x54/0x244
> [162980.037654] [c668ff30] [c0007ae8] do_notify_resume+0x34/0x88
> [162980.037744] [c668ff40] [c000dae8] do_user_signal+0x74/0xc4
> [162980.037781] Instruction dump:
> [162980.037821] 7fdff378 81370000 54a3463a 80890020 7d24182e 7c841a14 712a0004 4082ff94
> [162980.038014] 2f890000 419e0010 712a0ff0 408200e0 <0fe00000> 54a9000a 7f984840 419d0094
> [162980.038216] ---[ end trace c0ceeca8e7a5800a ]---
> [162980.038754] BUG: non-zero nr_ptes on freeing mm: 1
> [162985.363322] BUG: non-zero nr_ptes on freeing mm: -1
>
> In order to fix this, this patch uses the address space "slices"
> implemented for BOOK3S/64 and enhanced to support PPC32 by the
> preceding patch.
>
> This patch modifies the context.id on the 8xx to be in the range
> [1:16] instead of [0:15] in order to identify context.id == 0 as
> not initialised contexts as done on BOOK3S
>
> This patch activates CONFIG_PPC_MM_SLICES when CONFIG_HUGETLB_PAGE is
> selected for the 8xx
>
> Alltough we could in theory have as many slices as PMD entries, the
> current slices implementation limits the number of low slices to 16.
> This limitation is not preventing us to fix the initial issue allthough
> it is suboptimal. It will be cured in a subsequent patch.
>
> Fixes: 4b91428699477 ("powerpc/8xx: Implement support of hugepages")
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> v2: First patch of v1 serie split in two parts
>
> arch/powerpc/include/asm/mmu-8xx.h | 6 ++++++
> arch/powerpc/kernel/setup-common.c | 2 ++
> arch/powerpc/mm/8xx_mmu.c | 2 +-
> arch/powerpc/mm/hugetlbpage.c | 2 ++
> arch/powerpc/mm/mmu_context_nohash.c | 4 ++--
> arch/powerpc/mm/slice.c | 2 ++
> arch/powerpc/platforms/Kconfig.cputype | 1 +
> 7 files changed, 16 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
> index 5bb3dbede41a..5f89b6010453 100644
> --- a/arch/powerpc/include/asm/mmu-8xx.h
> +++ b/arch/powerpc/include/asm/mmu-8xx.h
> @@ -169,6 +169,12 @@ typedef struct {
> unsigned int id;
> unsigned int active;
> unsigned long vdso_base;
> +#ifdef CONFIG_PPC_MM_SLICES
> + u16 user_psize; /* page size index */
> + u64 low_slices_psize; /* page size encodings */
> + unsigned char high_slices_psize[0];
> + unsigned long slb_addr_limit;
> +#endif
> } mm_context_t;
>
> #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> index 9d213542a48b..67075a1cff36 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -927,6 +927,8 @@ void __init setup_arch(char **cmdline_p)
> #ifdef CONFIG_PPC64
> if (!radix_enabled())
> init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
> +#elif defined(CONFIG_PPC_8xx)
> + init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
> #else
> #error "context.addr_limit not initialized."
> #endif
> diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
> index f29212e40f40..0be77709446c 100644
> --- a/arch/powerpc/mm/8xx_mmu.c
> +++ b/arch/powerpc/mm/8xx_mmu.c
> @@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd)
> mtspr(SPRN_M_TW, __pa(pgd) - offset);
>
> /* Update context */
> - mtspr(SPRN_M_CASID, id);
> + mtspr(SPRN_M_CASID, id - 1);
> /* sync */
> mb();
> }
> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
> index a9b9083c5e49..79e1378ee303 100644
> --- a/arch/powerpc/mm/hugetlbpage.c
> +++ b/arch/powerpc/mm/hugetlbpage.c
> @@ -553,9 +553,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
> struct hstate *hstate = hstate_file(file);
> int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
>
> +#ifdef CONFIG_PPC_RADIX_MMU
> if (radix_enabled())
> return radix__hugetlb_get_unmapped_area(file, addr, len,
> pgoff, flags);
> +#endif

if (0) didn't remove the following radix__hugetlb_get_unmapped_area for
you?


> return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
> }
> #endif
> diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
> index 42e02f5b6660..c1e1bf186871 100644
> --- a/arch/powerpc/mm/mmu_context_nohash.c
> +++ b/arch/powerpc/mm/mmu_context_nohash.c
> @@ -435,8 +435,8 @@ void __init mmu_context_init(void)
> * -- BenH
> */
> if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
> - first_context = 0;
> - last_context = 15;
> + first_context = 1;
> + last_context = 16;
> no_selective_tlbil = true;
> } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
> first_context = 1;
> diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
> index 3f35a93afe13..b617acf35836 100644
> --- a/arch/powerpc/mm/slice.c
> +++ b/arch/powerpc/mm/slice.c
> @@ -206,6 +206,7 @@ static int slice_check_fit(struct mm_struct *mm,
>
> static void slice_flush_segments(void *parm)
> {
> +#ifdef CONFIG_PPC_BOOK3S_64
> struct mm_struct *mm = parm;
> unsigned long flags;
>
> @@ -217,6 +218,7 @@ static void slice_flush_segments(void *parm)
> local_irq_save(flags);
> slb_flush_and_rebolt();
> local_irq_restore(flags);
> +#endif
> }
>
> static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
> index ae07470fde3c..73a7ea333e9e 100644
> --- a/arch/powerpc/platforms/Kconfig.cputype
> +++ b/arch/powerpc/platforms/Kconfig.cputype
> @@ -334,6 +334,7 @@ config PPC_BOOK3E_MMU
> config PPC_MM_SLICES
> bool
> default y if PPC_BOOK3S_64
> + default y if PPC_8xx && HUGETLB_PAGE
> default n
>
> config PPC_HAVE_PMU_SUPPORT
> --
> 2.13.3


2018-01-19 08:32:18

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] powerpc/mm: Allow more than 16 low slices

Christophe Leroy <[email protected]> writes:

> While the implementation of the "slices" address space allows
> a significant amount of high slices, it limits the number of
> low slices to 16 due to the use of a single u64 low_slices_psize
> element in struct mm_context_t
>
> On the 8xx, the minimum slice size is the size of the area
> covered by a single PMD entry, ie 4M in 4K pages mode and 64M in
> 16K pages mode. This means we could have resp. up to 1024 and 64
> slices.
>
> In order to override this limitation, this patch switches the
> handling of low_slices to BITMAPs as done already for high_slices.

Does it have a performance impact. When we switched high_slices
that was one of the question asked. Now with a topdown search we should
mostly be using the high_slices. But it will good to get numbers for
ppc64 for this change.


>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> v2: Usign slice_bitmap_xxx() macros instead of bitmap_xxx() functions.
>
> arch/powerpc/include/asm/book3s/64/mmu.h | 2 +-
> arch/powerpc/include/asm/mmu-8xx.h | 2 +-
> arch/powerpc/include/asm/paca.h | 2 +-
> arch/powerpc/kernel/paca.c | 3 +-
> arch/powerpc/mm/hash_utils_64.c | 13 ++--
> arch/powerpc/mm/slb_low.S | 8 ++-
> arch/powerpc/mm/slice.c | 104 +++++++++++++++++--------------
> 7 files changed, 74 insertions(+), 60 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
> index c9448e19847a..27e7e9732ea1 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> @@ -91,7 +91,7 @@ typedef struct {
> struct npu_context *npu_context;
>
> #ifdef CONFIG_PPC_MM_SLICES
> - u64 low_slices_psize; /* SLB page size encodings */
> + unsigned char low_slices_psize[8]; /* SLB page size encodings */

Can that 8 be a #define?


> unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
> unsigned long slb_addr_limit;
> #else

-aneesh


2018-01-19 08:46:27

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] powerpc/mm: Enhance 'slice' for supporting PPC32



Le 19/01/2018 à 09:24, Aneesh Kumar K.V a écrit :
> Christophe Leroy <[email protected]> writes:
>
>> In preparation for the following patch which will fix an issue on
>> the 8xx by re-using the 'slices', this patch enhances the
>> 'slices' implementation to support 32 bits CPUs.
>>
>> On PPC32, the address space is limited to 4Gbytes, hence only the low
>> slices will be used. As of today, the code uses
>> SLICE_LOW_TOP (0x100000000ul) and compares it with addr to determine
>> if addr refers to low or high space.
>> On PPC32, such a (addr < SLICE_LOW_TOP) test is always false because
>> 0x100000000ul degrades to 0. Therefore, the patch modifies
>> SLICE_LOW_TOP to (0xfffffffful) and modifies the tests to
>> (addr <= SLICE_LOW_TOP) which will then always be true on PPC32
>> as addr has type 'unsigned long' while not modifying the PPC64
>> behaviour.
>>
>> This patch moves "slices" functions prototypes from page64.h to page.h
>>
>> The high slices use bitmaps. As bitmap functions are not prepared to
>> handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
>> slice_bitmap_xxx() macros which will take care of the 0 nbits case.
>>
>> Signed-off-by: Christophe Leroy <[email protected]>
>> ---
>> v2: First patch of v1 serie split in two parts ; added slice_bitmap_xxx() macros.
>>
>> arch/powerpc/include/asm/page.h | 14 +++++++++
>> arch/powerpc/include/asm/page_32.h | 19 ++++++++++++
>> arch/powerpc/include/asm/page_64.h | 21 ++-----------
>> arch/powerpc/mm/hash_utils_64.c | 2 +-
>> arch/powerpc/mm/mmu_context_nohash.c | 7 +++++
>> arch/powerpc/mm/slice.c | 60 ++++++++++++++++++++++++------------
>> 6 files changed, 83 insertions(+), 40 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
>> index 8da5d4c1cab2..d0384f9db9eb 100644
>> --- a/arch/powerpc/include/asm/page.h
>> +++ b/arch/powerpc/include/asm/page.h
>> @@ -342,6 +342,20 @@ typedef struct page *pgtable_t;
>> #endif
>> #endif
>>
>> +#ifdef CONFIG_PPC_MM_SLICES
>> +struct mm_struct;
>> +
>> +unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
>> + unsigned long flags, unsigned int psize,
>> + int topdown);
>> +
>> +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
>> +
>> +void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
>> +void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
>> + unsigned long len, unsigned int psize);
>> +#endif
>> +
>
> Should we do a slice.h ? the way we have other files? and then do

Yes we could add a slice.h instead of using page.h for that, good idea.

>
> arch/powerpc/include/asm/book3s/64/slice.h that will carry
> #define slice_bitmap_zero(dst, nbits) \
> do { if (nbits) bitmap_zero(dst, nbits); } while (0)
> #define slice_bitmap_set(dst, pos, nbits) \
> do { if (nbits) bitmap_set(dst, pos, nbits); } while (0)
> #define slice_bitmap_copy(dst, src, nbits) \
> do { if (nbits) bitmap_copy(dst, src, nbits); } while (0)
> #define slice_bitmap_and(dst, src1, src2, nbits) \
> ({ (nbits) ? bitmap_and(dst, src1, src2, nbits) : 0; })
> #define slice_bitmap_or(dst, src1, src2, nbits) \
> do { if (nbits) bitmap_or(dst, src1, src2, nbits); } while (0)
> #define slice_bitmap_andnot(dst, src1, src2, nbits) \
> ({ (nbits) ? bitmap_andnot(dst, src1, src2, nbits) : 0; })
> #define slice_bitmap_equal(src1, src2, nbits) \
> ({ (nbits) ? bitmap_equal(src1, src2, nbits) : 1; })
> #define slice_bitmap_empty(src, nbits) \
> ({ (nbits) ? bitmap_empty(src, nbits) : 1; })
>
> This without that if(nbits) check and a proper static inline so that we
> can do type checking.

Is it really worth duplicating that just for eliminating the 'if
(nbits)' in one case ?

Only in book3s/64 we will be able to eliminate that, for nohash/32 we
need to keep the test due to the difference between low and high slices.

In any case, as the nbits we use in slice.c is a constant, the test is
eliminated at compilation, so I can't see the benefit of making
different slice_bitmap_xxxx() based on platform.

Christophe

>
> also related definitions for
> #define SLICE_LOW_SHIFT 28
> #define SLICE_HIGH_SHIFT 0
>
> #define SLICE_LOW_TOP (0xfffffffful)
> #define SLICE_NUM_LOW ((SLICE_LOW_TOP >> SLICE_LOW_SHIFT) + 1)
> +#define SLICE_NUM_HIGH 0ul
>
> Common stuff between 64 and 32 can got to
> arch/powerpc/include/asm/slice.h ?
>
> It also gives an indication of which 32 bit version we are looking at
> here. IIUC 8xx will got to arch/powerpc/include/asm/nohash/32/slice.h?
>
>> #include <asm-generic/memory_model.h>
>> #endif /* __ASSEMBLY__ */
>>
>> diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
>> index 5c378e9b78c8..f7d1bd1183c8 100644
>> --- a/arch/powerpc/include/asm/page_32.h
>> +++ b/arch/powerpc/include/asm/page_32.h
>> @@ -60,4 +60,23 @@ extern void copy_page(void *to, void *from);
>>
>> #endif /* __ASSEMBLY__ */
>>
>> +#ifdef CONFIG_PPC_MM_SLICES
>> +
>> +#define SLICE_LOW_SHIFT 28
>> +#define SLICE_HIGH_SHIFT 0
>> +
>> +#define SLICE_LOW_TOP (0xfffffffful)
>> +#define SLICE_NUM_LOW ((SLICE_LOW_TOP >> SLICE_LOW_SHIFT) + 1)
>> +#define SLICE_NUM_HIGH 0ul
>> +
>> +#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
>> +#define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
>> +
>> +#ifdef CONFIG_HUGETLB_PAGE
>> +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
>> +#endif
>> +#define HAVE_ARCH_UNMAPPED_AREA
>> +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
>> +
>> +#endif
>> #endif /* _ASM_POWERPC_PAGE_32_H */
>> diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
>> index 56234c6fcd61..a7baef5bbe5f 100644
>> --- a/arch/powerpc/include/asm/page_64.h
>> +++ b/arch/powerpc/include/asm/page_64.h
>> @@ -91,30 +91,13 @@ extern u64 ppc64_pft_size;
>> #define SLICE_LOW_SHIFT 28
>> #define SLICE_HIGH_SHIFT 40
>>
>> -#define SLICE_LOW_TOP (0x100000000ul)
>> -#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
>> +#define SLICE_LOW_TOP (0xfffffffful)
>> +#define SLICE_NUM_LOW ((SLICE_LOW_TOP >> SLICE_LOW_SHIFT) + 1)
>> #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
>>
>> #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
>> #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
>>
>> -#ifndef __ASSEMBLY__
>> -struct mm_struct;
>> -
>> -extern unsigned long slice_get_unmapped_area(unsigned long addr,
>> - unsigned long len,
>> - unsigned long flags,
>> - unsigned int psize,
>> - int topdown);
>> -
>> -extern unsigned int get_slice_psize(struct mm_struct *mm,
>> - unsigned long addr);
>> -
>> -extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
>> -extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
>> - unsigned long len, unsigned int psize);
>> -
>> -#endif /* __ASSEMBLY__ */
>> #else
>> #define slice_init()
>> #ifdef CONFIG_PPC_BOOK3S_64
>> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
>> index 655a5a9a183d..3266b3326088 100644
>> --- a/arch/powerpc/mm/hash_utils_64.c
>> +++ b/arch/powerpc/mm/hash_utils_64.c
>> @@ -1101,7 +1101,7 @@ static unsigned int get_paca_psize(unsigned long addr)
>> unsigned char *hpsizes;
>> unsigned long index, mask_index;
>>
>> - if (addr < SLICE_LOW_TOP) {
>> + if (addr <= SLICE_LOW_TOP) {
>> lpsizes = get_paca()->mm_ctx_low_slices_psize;
>> index = GET_LOW_SLICE_INDEX(addr);
>> return (lpsizes >> (index * 4)) & 0xF;
>> diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
>> index 4554d6527682..42e02f5b6660 100644
>> --- a/arch/powerpc/mm/mmu_context_nohash.c
>> +++ b/arch/powerpc/mm/mmu_context_nohash.c
>> @@ -331,6 +331,13 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
>> {
>> pr_hard("initing context for mm @%p\n", mm);
>>
>> +#ifdef CONFIG_PPC_MM_SLICES
>> + if (!mm->context.slb_addr_limit)
>> + mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
>> + if (!mm->context.id)
>> + slice_set_user_psize(mm, mmu_virtual_psize);
>> +#endif
>> +
>> mm->context.id = MMU_NO_CONTEXT;
>> mm->context.active = 0;
>> return 0;
>> diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
>> index 23ec2c5e3b78..3f35a93afe13 100644
>> --- a/arch/powerpc/mm/slice.c
>> +++ b/arch/powerpc/mm/slice.c
>> @@ -67,16 +67,33 @@ static void slice_print_mask(const char *label, struct slice_mask mask) {}
>>
>> #endif
>>
>> +#define slice_bitmap_zero(dst, nbits) \
>> + do { if (nbits) bitmap_zero(dst, nbits); } while (0)
>> +#define slice_bitmap_set(dst, pos, nbits) \
>> + do { if (nbits) bitmap_set(dst, pos, nbits); } while (0)
>> +#define slice_bitmap_copy(dst, src, nbits) \
>> + do { if (nbits) bitmap_copy(dst, src, nbits); } while (0)
>> +#define slice_bitmap_and(dst, src1, src2, nbits) \
>> + ({ (nbits) ? bitmap_and(dst, src1, src2, nbits) : 0; })
>> +#define slice_bitmap_or(dst, src1, src2, nbits) \
>> + do { if (nbits) bitmap_or(dst, src1, src2, nbits); } while (0)
>> +#define slice_bitmap_andnot(dst, src1, src2, nbits) \
>> + ({ (nbits) ? bitmap_andnot(dst, src1, src2, nbits) : 0; })
>> +#define slice_bitmap_equal(src1, src2, nbits) \
>> + ({ (nbits) ? bitmap_equal(src1, src2, nbits) : 1; })
>> +#define slice_bitmap_empty(src, nbits) \
>> + ({ (nbits) ? bitmap_empty(src, nbits) : 1; })
>> +
>> static void slice_range_to_mask(unsigned long start, unsigned long len,
>> struct slice_mask *ret)
>> {
>> unsigned long end = start + len - 1;
>>
>> ret->low_slices = 0;
>> - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>> + slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>>
>> - if (start < SLICE_LOW_TOP) {
>> - unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
>> + if (start <= SLICE_LOW_TOP) {
>> + unsigned long mend = min(end, SLICE_LOW_TOP);
>>
>> ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
>> - (1u << GET_LOW_SLICE_INDEX(start));
>> @@ -87,7 +104,7 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
>> unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
>> unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
>>
>> - bitmap_set(ret->high_slices, start_index, count);
>> + slice_bitmap_set(ret->high_slices, start_index, count);
>> }
>> }
>>
>> @@ -117,7 +134,7 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
>> * of the high or low area bitmaps, the first high area starts
>> * at 4GB, not 0 */
>> if (start == 0)
>> - start = SLICE_LOW_TOP;
>> + start = SLICE_LOW_TOP + 1;
>>
>> return !slice_area_is_free(mm, start, end - start);
>> }
>> @@ -128,7 +145,7 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
>> unsigned long i;
>>
>> ret->low_slices = 0;
>> - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>> + slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>>
>> for (i = 0; i < SLICE_NUM_LOW; i++)
>> if (!slice_low_has_vma(mm, i))
>> @@ -151,7 +168,7 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
>> u64 lpsizes;
>>
>> ret->low_slices = 0;
>> - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>> + slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>>
>> lpsizes = mm->context.low_slices_psize;
>> for (i = 0; i < SLICE_NUM_LOW; i++)
>> @@ -180,11 +197,11 @@ static int slice_check_fit(struct mm_struct *mm,
>> */
>> unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
>>
>> - bitmap_and(result, mask.high_slices,
>> - available.high_slices, slice_count);
>> + slice_bitmap_and(result, mask.high_slices,
>> + available.high_slices, slice_count);
>>
>> return (mask.low_slices & available.low_slices) == mask.low_slices &&
>> - bitmap_equal(result, mask.high_slices, slice_count);
>> + slice_bitmap_equal(result, mask.high_slices, slice_count));
>> }
>>
>> static void slice_flush_segments(void *parm)
>> @@ -259,7 +276,7 @@ static bool slice_scan_available(unsigned long addr,
>> unsigned long *boundary_addr)
>> {
>> unsigned long slice;
>> - if (addr < SLICE_LOW_TOP) {
>> + if (addr <= SLICE_LOW_TOP) {
>> slice = GET_LOW_SLICE_INDEX(addr);
>> *boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
>> return !!(available.low_slices & (1u << slice));
>> @@ -391,8 +408,9 @@ static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
>> DECLARE_BITMAP(result, SLICE_NUM_HIGH);
>>
>> dst->low_slices |= src->low_slices;
>> - bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
>> - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
>> + slice_bitmap_or(result, dst->high_slices, src->high_slices,
>> + SLICE_NUM_HIGH);
>> + slice_bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
>> }
>>
>> static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
>> @@ -401,8 +419,9 @@ static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *
>>
>> dst->low_slices &= ~src->low_slices;
>>
>> - bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
>> - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
>> + slice_bitmap_andnot(result, dst->high_slices, src->high_slices,
>> + SLICE_NUM_HIGH);
>> + slice_bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
>> }
>>
>> #ifdef CONFIG_PPC_64K_PAGES
>> @@ -450,14 +469,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
>> * init different masks
>> */
>> mask.low_slices = 0;
>> - bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
>> + slice_bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
>>
>> /* silence stupid warning */;
>> potential_mask.low_slices = 0;
>> - bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
>> + slice_bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
>>
>> compat_mask.low_slices = 0;
>> - bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
>> + slice_bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
>>
>> /* Sanity checks */
>> BUG_ON(mm->task_size == 0);
>> @@ -595,7 +614,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
>> convert:
>> slice_andnot_mask(&mask, &good_mask);
>> slice_andnot_mask(&mask, &compat_mask);
>> - if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
>> + if (mask.low_slices ||
>> + !slice_bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
>> slice_convert(mm, mask, psize);
>> if (psize > MMU_PAGE_BASE)
>> on_each_cpu(slice_flush_segments, mm, 1);
>> @@ -640,7 +660,7 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
>> return MMU_PAGE_4K;
>> #endif
>> }
>> - if (addr < SLICE_LOW_TOP) {
>> + if (addr <= SLICE_LOW_TOP) {
>> u64 lpsizes;
>> lpsizes = mm->context.low_slices_psize;
>> index = GET_LOW_SLICE_INDEX(addr);
>> --
>> 2.13.3

2018-01-19 08:50:30

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 2/5] powerpc/32: Fix hugepage allocation on 8xx at hint address



Le 19/01/2018 à 09:26, Aneesh Kumar K.V a écrit :
> Christophe Leroy <[email protected]> writes:
>
>> On the 8xx, the page size is set in the PMD entry and applies to
>> all pages of the page table pointed by the said PMD entry.
>>
>> When an app has some regular pages allocated (e.g. see below) and tries
>> to mmap() a huge page at a hint address covered by the same PMD entry,
>> the kernel accepts the hint allthough the 8xx cannot handle different
>> page sizes in the same PMD entry.
>>
>> 10000000-10001000 r-xp 00000000 00:0f 2597 /root/malloc
>> 10010000-10011000 rwxp 00000000 00:0f 2597 /root/malloc
>>
>> mmap(0x10080000, 524288, PROT_READ|PROT_WRITE,
>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x10080000
>>
>> This results the app remaining forever in do_page_fault()/hugetlb_fault()
>> and when interrupting that app, we get the following warning:
>>
>> [162980.035629] WARNING: CPU: 0 PID: 2777 at arch/powerpc/mm/hugetlbpage.c:354 hugetlb_free_pgd_range+0xc8/0x1e4
>> [162980.035699] CPU: 0 PID: 2777 Comm: malloc Tainted: G W 4.14.6 #85
>> [162980.035744] task: c67e2c00 task.stack: c668e000
>> [162980.035783] NIP: c000fe18 LR: c00e1eec CTR: c00f90c0
>> [162980.035830] REGS: c668fc20 TRAP: 0700 Tainted: G W (4.14.6)
>> [162980.035854] MSR: 00029032 <EE,ME,IR,DR,RI> CR: 24044224 XER: 20000000
>> [162980.036003]
>> [162980.036003] GPR00: c00e1eec c668fcd0 c67e2c00 00000010 c6869410 10080000 00000000 77fb4000
>> [162980.036003] GPR08: ffff0001 0683c001 00000000 ffffff80 44028228 10018a34 00004008 418004fc
>> [162980.036003] GPR16: c668e000 00040100 c668e000 c06c0000 c668fe78 c668e000 c6835ba0 c668fd48
>> [162980.036003] GPR24: 00000000 73ffffff 74000000 00000001 77fb4000 100fffff 10100000 10100000
>> [162980.036743] NIP [c000fe18] hugetlb_free_pgd_range+0xc8/0x1e4
>> [162980.036839] LR [c00e1eec] free_pgtables+0x12c/0x150
>> [162980.036861] Call Trace:
>> [162980.036939] [c668fcd0] [c00f0774] unlink_anon_vmas+0x1c4/0x214 (unreliable)
>> [162980.037040] [c668fd10] [c00e1eec] free_pgtables+0x12c/0x150
>> [162980.037118] [c668fd40] [c00eabac] exit_mmap+0xe8/0x1b4
>> [162980.037210] [c668fda0] [c0019710] mmput.part.9+0x20/0xd8
>> [162980.037301] [c668fdb0] [c001ecb0] do_exit+0x1f0/0x93c
>> [162980.037386] [c668fe00] [c001f478] do_group_exit+0x40/0xcc
>> [162980.037479] [c668fe10] [c002a76c] get_signal+0x47c/0x614
>> [162980.037570] [c668fe70] [c0007840] do_signal+0x54/0x244
>> [162980.037654] [c668ff30] [c0007ae8] do_notify_resume+0x34/0x88
>> [162980.037744] [c668ff40] [c000dae8] do_user_signal+0x74/0xc4
>> [162980.037781] Instruction dump:
>> [162980.037821] 7fdff378 81370000 54a3463a 80890020 7d24182e 7c841a14 712a0004 4082ff94
>> [162980.038014] 2f890000 419e0010 712a0ff0 408200e0 <0fe00000> 54a9000a 7f984840 419d0094
>> [162980.038216] ---[ end trace c0ceeca8e7a5800a ]---
>> [162980.038754] BUG: non-zero nr_ptes on freeing mm: 1
>> [162985.363322] BUG: non-zero nr_ptes on freeing mm: -1
>>
>> In order to fix this, this patch uses the address space "slices"
>> implemented for BOOK3S/64 and enhanced to support PPC32 by the
>> preceding patch.
>>
>> This patch modifies the context.id on the 8xx to be in the range
>> [1:16] instead of [0:15] in order to identify context.id == 0 as
>> not initialised contexts as done on BOOK3S
>>
>> This patch activates CONFIG_PPC_MM_SLICES when CONFIG_HUGETLB_PAGE is
>> selected for the 8xx
>>
>> Alltough we could in theory have as many slices as PMD entries, the
>> current slices implementation limits the number of low slices to 16.
>> This limitation is not preventing us to fix the initial issue allthough
>> it is suboptimal. It will be cured in a subsequent patch.
>>
>> Fixes: 4b91428699477 ("powerpc/8xx: Implement support of hugepages")
>> Signed-off-by: Christophe Leroy <[email protected]>
>> ---
>> v2: First patch of v1 serie split in two parts
>>
>> arch/powerpc/include/asm/mmu-8xx.h | 6 ++++++
>> arch/powerpc/kernel/setup-common.c | 2 ++
>> arch/powerpc/mm/8xx_mmu.c | 2 +-
>> arch/powerpc/mm/hugetlbpage.c | 2 ++
>> arch/powerpc/mm/mmu_context_nohash.c | 4 ++--
>> arch/powerpc/mm/slice.c | 2 ++
>> arch/powerpc/platforms/Kconfig.cputype | 1 +
>> 7 files changed, 16 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
>> index 5bb3dbede41a..5f89b6010453 100644
>> --- a/arch/powerpc/include/asm/mmu-8xx.h
>> +++ b/arch/powerpc/include/asm/mmu-8xx.h
>> @@ -169,6 +169,12 @@ typedef struct {
>> unsigned int id;
>> unsigned int active;
>> unsigned long vdso_base;
>> +#ifdef CONFIG_PPC_MM_SLICES
>> + u16 user_psize; /* page size index */
>> + u64 low_slices_psize; /* page size encodings */
>> + unsigned char high_slices_psize[0];
>> + unsigned long slb_addr_limit;
>> +#endif
>> } mm_context_t;
>>
>> #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
>> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
>> index 9d213542a48b..67075a1cff36 100644
>> --- a/arch/powerpc/kernel/setup-common.c
>> +++ b/arch/powerpc/kernel/setup-common.c
>> @@ -927,6 +927,8 @@ void __init setup_arch(char **cmdline_p)
>> #ifdef CONFIG_PPC64
>> if (!radix_enabled())
>> init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
>> +#elif defined(CONFIG_PPC_8xx)
>> + init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
>> #else
>> #error "context.addr_limit not initialized."
>> #endif
>> diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
>> index f29212e40f40..0be77709446c 100644
>> --- a/arch/powerpc/mm/8xx_mmu.c
>> +++ b/arch/powerpc/mm/8xx_mmu.c
>> @@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd)
>> mtspr(SPRN_M_TW, __pa(pgd) - offset);
>>
>> /* Update context */
>> - mtspr(SPRN_M_CASID, id);
>> + mtspr(SPRN_M_CASID, id - 1);
>> /* sync */
>> mb();
>> }
>> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
>> index a9b9083c5e49..79e1378ee303 100644
>> --- a/arch/powerpc/mm/hugetlbpage.c
>> +++ b/arch/powerpc/mm/hugetlbpage.c
>> @@ -553,9 +553,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
>> struct hstate *hstate = hstate_file(file);
>> int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
>>
>> +#ifdef CONFIG_PPC_RADIX_MMU
>> if (radix_enabled())
>> return radix__hugetlb_get_unmapped_area(file, addr, len,
>> pgoff, flags);
>> +#endif
>
> if (0) didn't remove the following radix__hugetlb_get_unmapped_area for
> you?
>

No

CC arch/powerpc/mm/hugetlbpage.o
arch/powerpc/mm/hugetlbpage.c: In function ‘hugetlb_get_unmapped_area’:
arch/powerpc/mm/hugetlbpage.c:558:10: error: implicit declaration of
function ‘radix__hugetlb_get_unmapped_area’
[-Werror=implicit-function-declaration]
return radix__hugetlb_get_unmapped_area(file, addr, len,
^
cc1: all warnings being treated as errors
make[1]: *** [arch/powerpc/mm/hugetlbpage.o] Error 1


Christophe

>
>> return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
>> }
>> #endif
>> diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
>> index 42e02f5b6660..c1e1bf186871 100644
>> --- a/arch/powerpc/mm/mmu_context_nohash.c
>> +++ b/arch/powerpc/mm/mmu_context_nohash.c
>> @@ -435,8 +435,8 @@ void __init mmu_context_init(void)
>> * -- BenH
>> */
>> if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
>> - first_context = 0;
>> - last_context = 15;
>> + first_context = 1;
>> + last_context = 16;
>> no_selective_tlbil = true;
>> } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
>> first_context = 1;
>> diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
>> index 3f35a93afe13..b617acf35836 100644
>> --- a/arch/powerpc/mm/slice.c
>> +++ b/arch/powerpc/mm/slice.c
>> @@ -206,6 +206,7 @@ static int slice_check_fit(struct mm_struct *mm,
>>
>> static void slice_flush_segments(void *parm)
>> {
>> +#ifdef CONFIG_PPC_BOOK3S_64
>> struct mm_struct *mm = parm;
>> unsigned long flags;
>>
>> @@ -217,6 +218,7 @@ static void slice_flush_segments(void *parm)
>> local_irq_save(flags);
>> slb_flush_and_rebolt();
>> local_irq_restore(flags);
>> +#endif
>> }
>>
>> static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
>> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
>> index ae07470fde3c..73a7ea333e9e 100644
>> --- a/arch/powerpc/platforms/Kconfig.cputype
>> +++ b/arch/powerpc/platforms/Kconfig.cputype
>> @@ -334,6 +334,7 @@ config PPC_BOOK3E_MMU
>> config PPC_MM_SLICES
>> bool
>> default y if PPC_BOOK3S_64
>> + default y if PPC_8xx && HUGETLB_PAGE
>> default n
>>
>> config PPC_HAVE_PMU_SUPPORT
>> --
>> 2.13.3

2018-01-19 09:00:31

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] powerpc/mm: Allow more than 16 low slices



Le 19/01/2018 à 09:30, Aneesh Kumar K.V a écrit :
> Christophe Leroy <[email protected]> writes:
>
>> While the implementation of the "slices" address space allows
>> a significant amount of high slices, it limits the number of
>> low slices to 16 due to the use of a single u64 low_slices_psize
>> element in struct mm_context_t
>>
>> On the 8xx, the minimum slice size is the size of the area
>> covered by a single PMD entry, ie 4M in 4K pages mode and 64M in
>> 16K pages mode. This means we could have resp. up to 1024 and 64
>> slices.
>>
>> In order to override this limitation, this patch switches the
>> handling of low_slices to BITMAPs as done already for high_slices.
>
> Does it have a performance impact. When we switched high_slices
> that was one of the question asked. Now with a topdown search we should
> mostly be using the high_slices. But it will good to get numbers for
> ppc64 for this change.

It should have almost no performance impact at all, because all bitmap
functions used a simplified way when the number of bits is small and
constant:

- ret->low_slices = 0;
+ slice_bitmap_zero(ret->low_slices, SLICE_NUM_LOW);


static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = 0UL;
else {
unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
memset(dst, 0, len);
}
}



- dst->low_slices |= src->low_slices;
+ slice_bitmap_or(dst->low_slices, dst->low_slices, src->low_slices,
+ SLICE_NUM_LOW);


static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = *src1 | *src2;
else
__bitmap_or(dst, src1, src2, nbits);
}


>
>
>>
>> Signed-off-by: Christophe Leroy <[email protected]>
>> ---
>> v2: Usign slice_bitmap_xxx() macros instead of bitmap_xxx() functions.
>>
>> arch/powerpc/include/asm/book3s/64/mmu.h | 2 +-
>> arch/powerpc/include/asm/mmu-8xx.h | 2 +-
>> arch/powerpc/include/asm/paca.h | 2 +-
>> arch/powerpc/kernel/paca.c | 3 +-
>> arch/powerpc/mm/hash_utils_64.c | 13 ++--
>> arch/powerpc/mm/slb_low.S | 8 ++-
>> arch/powerpc/mm/slice.c | 104 +++++++++++++++++--------------
>> 7 files changed, 74 insertions(+), 60 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
>> index c9448e19847a..27e7e9732ea1 100644
>> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
>> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
>> @@ -91,7 +91,7 @@ typedef struct {
>> struct npu_context *npu_context;
>>
>> #ifdef CONFIG_PPC_MM_SLICES
>> - u64 low_slices_psize; /* SLB page size encodings */
>> + unsigned char low_slices_psize[8]; /* SLB page size encodings */
>
> Can that 8 be a #define?

Sure

>
>
>> unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
>> unsigned long slb_addr_limit;
>> #else
>
> -aneesh
>

Christophe

2018-01-19 09:04:59

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] powerpc/mm: Enhance 'slice' for supporting PPC32



On 01/19/2018 02:14 PM, Christophe LEROY wrote:
>
>
> Le 19/01/2018 à 09:24, Aneesh Kumar K.V a écrit :
>> Christophe Leroy <[email protected]> writes:
>>
>>> In preparation for the following patch which will fix an issue on
>>> the 8xx by re-using the 'slices', this patch enhances the
>>> 'slices' implementation to support 32 bits CPUs.
>>>
>>> On PPC32, the address space is limited to 4Gbytes, hence only the low
>>> slices will be used. As of today, the code uses
>>> SLICE_LOW_TOP (0x100000000ul) and compares it with addr to determine
>>> if addr refers to low or high space.
>>> On PPC32, such a (addr < SLICE_LOW_TOP) test is always false because
>>> 0x100000000ul degrades to 0. Therefore, the patch modifies
>>> SLICE_LOW_TOP to (0xfffffffful) and modifies the tests to
>>> (addr <= SLICE_LOW_TOP) which will then always be true on PPC32
>>> as addr has type 'unsigned long' while not modifying the PPC64
>>> behaviour.
>>>
>>> This patch moves "slices" functions prototypes from page64.h to page.h
>>>
>>> The high slices use bitmaps. As bitmap functions are not prepared to
>>> handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
>>> slice_bitmap_xxx() macros which will take care of the 0 nbits case.
>>>
>>> Signed-off-by: Christophe Leroy <[email protected]>
>>> ---
>>>   v2: First patch of v1 serie split in two parts ; added
>>> slice_bitmap_xxx() macros.
>>>
>>>   arch/powerpc/include/asm/page.h      | 14 +++++++++
>>>   arch/powerpc/include/asm/page_32.h   | 19 ++++++++++++
>>>   arch/powerpc/include/asm/page_64.h   | 21 ++-----------
>>>   arch/powerpc/mm/hash_utils_64.c      |  2 +-
>>>   arch/powerpc/mm/mmu_context_nohash.c |  7 +++++
>>>   arch/powerpc/mm/slice.c              | 60
>>> ++++++++++++++++++++++++------------
>>>   6 files changed, 83 insertions(+), 40 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/page.h
>>> b/arch/powerpc/include/asm/page.h
>>> index 8da5d4c1cab2..d0384f9db9eb 100644
>>> --- a/arch/powerpc/include/asm/page.h
>>> +++ b/arch/powerpc/include/asm/page.h
>>> @@ -342,6 +342,20 @@ typedef struct page *pgtable_t;
>>>   #endif
>>>   #endif
>>> +#ifdef CONFIG_PPC_MM_SLICES
>>> +struct mm_struct;
>>> +
>>> +unsigned long slice_get_unmapped_area(unsigned long addr, unsigned
>>> long len,
>>> +                      unsigned long flags, unsigned int psize,
>>> +                      int topdown);
>>> +
>>> +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
>>> +
>>> +void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
>>> +void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
>>> +               unsigned long len, unsigned int psize);
>>> +#endif
>>> +
>>
>> Should we do a slice.h ? the way we have other files? and then do
>
> Yes we could add a slice.h instead of using page.h for that, good idea.
>
>>
>> arch/powerpc/include/asm/book3s/64/slice.h that will carry
>> #define slice_bitmap_zero(dst, nbits) \
>>     do { if (nbits) bitmap_zero(dst, nbits); } while (0)
>> #define slice_bitmap_set(dst, pos, nbits) \
>> do { if (nbits) bitmap_set(dst, pos, nbits); } while (0)
>> #define slice_bitmap_copy(dst, src, nbits) \
>> do { if (nbits) bitmap_copy(dst, src, nbits); } while (0)
>> #define slice_bitmap_and(dst, src1, src2, nbits) \
>>     ({ (nbits) ? bitmap_and(dst, src1, src2, nbits) : 0; })
>> #define slice_bitmap_or(dst, src1, src2, nbits) \
>>     do { if (nbits) bitmap_or(dst, src1, src2, nbits); } while (0)
>> #define slice_bitmap_andnot(dst, src1, src2, nbits) \
>>     ({ (nbits) ? bitmap_andnot(dst, src1, src2, nbits) : 0; })
>> #define slice_bitmap_equal(src1, src2, nbits) \
>>     ({ (nbits) ? bitmap_equal(src1, src2, nbits) : 1; })
>> #define slice_bitmap_empty(src, nbits) \
>>     ({ (nbits) ? bitmap_empty(src, nbits) : 1; })
>>
>> This without that if(nbits) check and a proper static inline so that we
>> can do type checking.
>
> Is it really worth duplicating that just for eliminating the 'if
> (nbits)' in one case ?
>
> Only in book3s/64 we will be able to eliminate that, for nohash/32 we
> need to keep the test due to the difference between low and high slices.

the other advantage is we move the SLICE_LOW_SHIFT to the right
location. IMHO mm subystem is really complex with these really
overloaded headers. If we can keep it seperate we should with minimal
code duplication?
>
> In any case, as the nbits we use in slice.c is a constant, the test is
> eliminated at compilation, so I can't see the benefit of making

-aneesh


2018-01-19 09:08:39

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] powerpc/mm: Enhance 'slice' for supporting PPC32



Le 19/01/2018 à 10:02, Aneesh Kumar K.V a écrit :
>
>
> On 01/19/2018 02:14 PM, Christophe LEROY wrote:
>>
>>
>> Le 19/01/2018 à 09:24, Aneesh Kumar K.V a écrit :
>>> Christophe Leroy <[email protected]> writes:
>>>
>>>> In preparation for the following patch which will fix an issue on
>>>> the 8xx by re-using the 'slices', this patch enhances the
>>>> 'slices' implementation to support 32 bits CPUs.
>>>>
>>>> On PPC32, the address space is limited to 4Gbytes, hence only the low
>>>> slices will be used. As of today, the code uses
>>>> SLICE_LOW_TOP (0x100000000ul) and compares it with addr to determine
>>>> if addr refers to low or high space.
>>>> On PPC32, such a (addr < SLICE_LOW_TOP) test is always false because
>>>> 0x100000000ul degrades to 0. Therefore, the patch modifies
>>>> SLICE_LOW_TOP to (0xfffffffful) and modifies the tests to
>>>> (addr <= SLICE_LOW_TOP) which will then always be true on PPC32
>>>> as addr has type 'unsigned long' while not modifying the PPC64
>>>> behaviour.
>>>>
>>>> This patch moves "slices" functions prototypes from page64.h to page.h
>>>>
>>>> The high slices use bitmaps. As bitmap functions are not prepared to
>>>> handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
>>>> slice_bitmap_xxx() macros which will take care of the 0 nbits case.
>>>>
>>>> Signed-off-by: Christophe Leroy <[email protected]>
>>>> ---
>>>>   v2: First patch of v1 serie split in two parts ; added
>>>> slice_bitmap_xxx() macros.
>>>>
>>>>   arch/powerpc/include/asm/page.h      | 14 +++++++++
>>>>   arch/powerpc/include/asm/page_32.h   | 19 ++++++++++++
>>>>   arch/powerpc/include/asm/page_64.h   | 21 ++-----------
>>>>   arch/powerpc/mm/hash_utils_64.c      |  2 +-
>>>>   arch/powerpc/mm/mmu_context_nohash.c |  7 +++++
>>>>   arch/powerpc/mm/slice.c              | 60
>>>> ++++++++++++++++++++++++------------
>>>>   6 files changed, 83 insertions(+), 40 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/include/asm/page.h
>>>> b/arch/powerpc/include/asm/page.h
>>>> index 8da5d4c1cab2..d0384f9db9eb 100644
>>>> --- a/arch/powerpc/include/asm/page.h
>>>> +++ b/arch/powerpc/include/asm/page.h
>>>> @@ -342,6 +342,20 @@ typedef struct page *pgtable_t;
>>>>   #endif
>>>>   #endif
>>>> +#ifdef CONFIG_PPC_MM_SLICES
>>>> +struct mm_struct;
>>>> +
>>>> +unsigned long slice_get_unmapped_area(unsigned long addr, unsigned
>>>> long len,
>>>> +                      unsigned long flags, unsigned int psize,
>>>> +                      int topdown);
>>>> +
>>>> +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long
>>>> addr);
>>>> +
>>>> +void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
>>>> +void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
>>>> +               unsigned long len, unsigned int psize);
>>>> +#endif
>>>> +
>>>
>>> Should we do a slice.h ? the way we have other files? and then do
>>
>> Yes we could add a slice.h instead of using page.h for that, good idea.
>>
>>>
>>> arch/powerpc/include/asm/book3s/64/slice.h that will carry
>>> #define slice_bitmap_zero(dst, nbits) \
>>>     do { if (nbits) bitmap_zero(dst, nbits); } while (0)
>>> #define slice_bitmap_set(dst, pos, nbits) \
>>> do { if (nbits) bitmap_set(dst, pos, nbits); } while (0)
>>> #define slice_bitmap_copy(dst, src, nbits) \
>>> do { if (nbits) bitmap_copy(dst, src, nbits); } while (0)
>>> #define slice_bitmap_and(dst, src1, src2, nbits) \
>>>     ({ (nbits) ? bitmap_and(dst, src1, src2, nbits) : 0; })
>>> #define slice_bitmap_or(dst, src1, src2, nbits) \
>>>     do { if (nbits) bitmap_or(dst, src1, src2, nbits); } while (0)
>>> #define slice_bitmap_andnot(dst, src1, src2, nbits) \
>>>     ({ (nbits) ? bitmap_andnot(dst, src1, src2, nbits) : 0; })
>>> #define slice_bitmap_equal(src1, src2, nbits) \
>>>     ({ (nbits) ? bitmap_equal(src1, src2, nbits) : 1; })
>>> #define slice_bitmap_empty(src, nbits) \
>>>     ({ (nbits) ? bitmap_empty(src, nbits) : 1; })
>>>
>>> This without that if(nbits) check and a proper static inline so that we
>>> can do type checking.
>>
>> Is it really worth duplicating that just for eliminating the 'if
>> (nbits)' in one case ?
>>
>> Only in book3s/64 we will be able to eliminate that, for nohash/32 we
>> need to keep the test due to the difference between low and high slices.
>
> the other advantage is we move the SLICE_LOW_SHIFT to the right
> location. IMHO mm subystem is really complex with these really
> overloaded headers. If we can keep it  seperate we should with minimal
> code duplication?

For the constants I fully agree with your proposal and I will do it. I
was only questionning the benefit of moving the slice_bitmap_xxxx()
stuff, taking into account that the 'if (nbits)' test is already
eliminated by the compiler.

Christophe

>>
>> In any case, as the nbits we use in slice.c is a constant, the test is
>> eliminated at compilation, so I can't see the benefit of making
>
> -aneesh

2018-01-19 09:09:23

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] powerpc/mm: Allow more than 16 low slices



On 01/19/2018 02:29 PM, Christophe LEROY wrote:
>
>
> Le 19/01/2018 à 09:30, Aneesh Kumar K.V a écrit :
>> Christophe Leroy <[email protected]> writes:
>>
>>> While the implementation of the "slices" address space allows
>>> a significant amount of high slices, it limits the number of
>>> low slices to 16 due to the use of a single u64 low_slices_psize
>>> element in struct mm_context_t
>>>
>>> On the 8xx, the minimum slice size is the size of the area
>>> covered by a single PMD entry, ie 4M in 4K pages mode and 64M in
>>> 16K pages mode. This means we could have resp. up to 1024 and 64
>>> slices.
>>>
>>> In order to override this limitation, this patch switches the
>>> handling of low_slices to BITMAPs as done already for high_slices.
>>
>> Does it have a performance impact. When we switched high_slices
>> that was one of the question asked. Now with a topdown search we should
>> mostly be using the high_slices. But it will good to get numbers for
>> ppc64 for this change.
>
> It should have almost no performance impact at all, because all bitmap
> functions used a simplified way when the number of bits is small and
> constant:
>
> -    ret->low_slices = 0;
> +    slice_bitmap_zero(ret->low_slices, SLICE_NUM_LOW);
>
>
> static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
> {
>     if (small_const_nbits(nbits))
>         *dst = 0UL;
>     else {
>         unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
>         memset(dst, 0, len);
>     }
> }
>
>
>
> -    dst->low_slices |= src->low_slices;
> +    slice_bitmap_or(dst->low_slices, dst->low_slices, src->low_slices,
> +            SLICE_NUM_LOW);
>
>
> static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
>             const unsigned long *src2, unsigned int nbits)
> {
>     if (small_const_nbits(nbits))
>         *dst = *src1 | *src2;
>     else
>         __bitmap_or(dst, src1, src2, nbits);
> }
>
>
>

may be capture that in commit message saying since we are 64 bit on
ppc64 there is no impact there?

-aneesh


2018-01-19 09:14:05

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] powerpc/mm: Enhance 'slice' for supporting PPC32



On 01/19/2018 02:37 PM, Christophe LEROY wrote:
>
>
> Le 19/01/2018 à 10:02, Aneesh Kumar K.V a écrit :
>>
>>
>> On 01/19/2018 02:14 PM, Christophe LEROY wrote:
>>>
>>>
>>> Le 19/01/2018 à 09:24, Aneesh Kumar K.V a écrit :
>>>> Christophe Leroy <[email protected]> writes:
>>>>
>>>>> In preparation for the following patch which will fix an issue on
>>>>> the 8xx by re-using the 'slices', this patch enhances the
>>>>> 'slices' implementation to support 32 bits CPUs.
>>>>>
>>>>> On PPC32, the address space is limited to 4Gbytes, hence only the low
>>>>> slices will be used. As of today, the code uses
>>>>> SLICE_LOW_TOP (0x100000000ul) and compares it with addr to determine
>>>>> if addr refers to low or high space.
>>>>> On PPC32, such a (addr < SLICE_LOW_TOP) test is always false because
>>>>> 0x100000000ul degrades to 0. Therefore, the patch modifies
>>>>> SLICE_LOW_TOP to (0xfffffffful) and modifies the tests to
>>>>> (addr <= SLICE_LOW_TOP) which will then always be true on PPC32
>>>>> as addr has type 'unsigned long' while not modifying the PPC64
>>>>> behaviour.
>>>>>
>>>>> This patch moves "slices" functions prototypes from page64.h to page.h
>>>>>
>>>>> The high slices use bitmaps. As bitmap functions are not prepared to
>>>>> handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
>>>>> slice_bitmap_xxx() macros which will take care of the 0 nbits case.
>>>>>
>>>>> Signed-off-by: Christophe Leroy <[email protected]>
>>>>> ---
>>>>>   v2: First patch of v1 serie split in two parts ; added
>>>>> slice_bitmap_xxx() macros.
>>>>>
>>>>>   arch/powerpc/include/asm/page.h      | 14 +++++++++
>>>>>   arch/powerpc/include/asm/page_32.h   | 19 ++++++++++++
>>>>>   arch/powerpc/include/asm/page_64.h   | 21 ++-----------
>>>>>   arch/powerpc/mm/hash_utils_64.c      |  2 +-
>>>>>   arch/powerpc/mm/mmu_context_nohash.c |  7 +++++
>>>>>   arch/powerpc/mm/slice.c              | 60
>>>>> ++++++++++++++++++++++++------------
>>>>>   6 files changed, 83 insertions(+), 40 deletions(-)
>>>>>
>>>>> diff --git a/arch/powerpc/include/asm/page.h
>>>>> b/arch/powerpc/include/asm/page.h
>>>>> index 8da5d4c1cab2..d0384f9db9eb 100644
>>>>> --- a/arch/powerpc/include/asm/page.h
>>>>> +++ b/arch/powerpc/include/asm/page.h
>>>>> @@ -342,6 +342,20 @@ typedef struct page *pgtable_t;
>>>>>   #endif
>>>>>   #endif
>>>>> +#ifdef CONFIG_PPC_MM_SLICES
>>>>> +struct mm_struct;
>>>>> +
>>>>> +unsigned long slice_get_unmapped_area(unsigned long addr, unsigned
>>>>> long len,
>>>>> +                      unsigned long flags, unsigned int psize,
>>>>> +                      int topdown);
>>>>> +
>>>>> +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long
>>>>> addr);
>>>>> +
>>>>> +void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
>>>>> +void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
>>>>> +               unsigned long len, unsigned int psize);
>>>>> +#endif
>>>>> +
>>>>
>>>> Should we do a slice.h ? the way we have other files? and then do
>>>
>>> Yes we could add a slice.h instead of using page.h for that, good idea.
>>>
>>>>
>>>> arch/powerpc/include/asm/book3s/64/slice.h that will carry
>>>> #define slice_bitmap_zero(dst, nbits) \
>>>>     do { if (nbits) bitmap_zero(dst, nbits); } while (0)
>>>> #define slice_bitmap_set(dst, pos, nbits) \
>>>> do { if (nbits) bitmap_set(dst, pos, nbits); } while (0)
>>>> #define slice_bitmap_copy(dst, src, nbits) \
>>>> do { if (nbits) bitmap_copy(dst, src, nbits); } while (0)
>>>> #define slice_bitmap_and(dst, src1, src2, nbits) \
>>>>     ({ (nbits) ? bitmap_and(dst, src1, src2, nbits) : 0; })
>>>> #define slice_bitmap_or(dst, src1, src2, nbits) \
>>>>     do { if (nbits) bitmap_or(dst, src1, src2, nbits); } while (0)
>>>> #define slice_bitmap_andnot(dst, src1, src2, nbits) \
>>>>     ({ (nbits) ? bitmap_andnot(dst, src1, src2, nbits) : 0; })
>>>> #define slice_bitmap_equal(src1, src2, nbits) \
>>>>     ({ (nbits) ? bitmap_equal(src1, src2, nbits) : 1; })
>>>> #define slice_bitmap_empty(src, nbits) \
>>>>     ({ (nbits) ? bitmap_empty(src, nbits) : 1; })
>>>>
>>>> This without that if(nbits) check and a proper static inline so that we
>>>> can do type checking.
>>>
>>> Is it really worth duplicating that just for eliminating the 'if
>>> (nbits)' in one case ?
>>>
>>> Only in book3s/64 we will be able to eliminate that, for nohash/32 we
>>> need to keep the test due to the difference between low and high slices.
>>
>> the other advantage is we move the SLICE_LOW_SHIFT to the right
>> location. IMHO mm subystem is really complex with these really
>> overloaded headers. If we can keep it  seperate we should with minimal
>> code duplication?
>
> For the constants I fully agree with your proposal and I will do it. I
> was only questionning the benefit of moving the slice_bitmap_xxxx()
> stuff, taking into account that the 'if (nbits)' test is already
> eliminated by the compiler.
>

That is compiler dependent as you are finding with the other patch where
if (0) didn't get compiled out

-aneesh


2018-01-19 09:46:42

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] powerpc/mm: Enhance 'slice' for supporting PPC32



Le 19/01/2018 à 10:13, Aneesh Kumar K.V a écrit :
>
>
> On 01/19/2018 02:37 PM, Christophe LEROY wrote:
>>
>>
>> Le 19/01/2018 à 10:02, Aneesh Kumar K.V a écrit :
>>>
>>>
>>> On 01/19/2018 02:14 PM, Christophe LEROY wrote:
>>>>
>>>>
>>>> Le 19/01/2018 à 09:24, Aneesh Kumar K.V a écrit :
>>>>> Christophe Leroy <[email protected]> writes:
>>>>>
>>>>>> In preparation for the following patch which will fix an issue on
>>>>>> the 8xx by re-using the 'slices', this patch enhances the
>>>>>> 'slices' implementation to support 32 bits CPUs.
>>>>>>
>>>>>> On PPC32, the address space is limited to 4Gbytes, hence only the low
>>>>>> slices will be used. As of today, the code uses
>>>>>> SLICE_LOW_TOP (0x100000000ul) and compares it with addr to determine
>>>>>> if addr refers to low or high space.
>>>>>> On PPC32, such a (addr < SLICE_LOW_TOP) test is always false because
>>>>>> 0x100000000ul degrades to 0. Therefore, the patch modifies
>>>>>> SLICE_LOW_TOP to (0xfffffffful) and modifies the tests to
>>>>>> (addr <= SLICE_LOW_TOP) which will then always be true on PPC32
>>>>>> as addr has type 'unsigned long' while not modifying the PPC64
>>>>>> behaviour.
>>>>>>
>>>>>> This patch moves "slices" functions prototypes from page64.h to
>>>>>> page.h
>>>>>>
>>>>>> The high slices use bitmaps. As bitmap functions are not prepared to
>>>>>> handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
>>>>>> slice_bitmap_xxx() macros which will take care of the 0 nbits case.
>>>>>>
>>>>>> Signed-off-by: Christophe Leroy <[email protected]>
>>>>>> ---
>>>>>>   v2: First patch of v1 serie split in two parts ; added
>>>>>> slice_bitmap_xxx() macros.
>>>>>>
>>>>>>   arch/powerpc/include/asm/page.h      | 14 +++++++++
>>>>>>   arch/powerpc/include/asm/page_32.h   | 19 ++++++++++++
>>>>>>   arch/powerpc/include/asm/page_64.h   | 21 ++-----------
>>>>>>   arch/powerpc/mm/hash_utils_64.c      |  2 +-
>>>>>>   arch/powerpc/mm/mmu_context_nohash.c |  7 +++++
>>>>>>   arch/powerpc/mm/slice.c              | 60
>>>>>> ++++++++++++++++++++++++------------
>>>>>>   6 files changed, 83 insertions(+), 40 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/powerpc/include/asm/page.h
>>>>>> b/arch/powerpc/include/asm/page.h
>>>>>> index 8da5d4c1cab2..d0384f9db9eb 100644
>>>>>> --- a/arch/powerpc/include/asm/page.h
>>>>>> +++ b/arch/powerpc/include/asm/page.h
>>>>>> @@ -342,6 +342,20 @@ typedef struct page *pgtable_t;
>>>>>>   #endif
>>>>>>   #endif
>>>>>> +#ifdef CONFIG_PPC_MM_SLICES
>>>>>> +struct mm_struct;
>>>>>> +
>>>>>> +unsigned long slice_get_unmapped_area(unsigned long addr,
>>>>>> unsigned long len,
>>>>>> +                      unsigned long flags, unsigned int psize,
>>>>>> +                      int topdown);
>>>>>> +
>>>>>> +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long
>>>>>> addr);
>>>>>> +
>>>>>> +void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
>>>>>> +void slice_set_range_psize(struct mm_struct *mm, unsigned long
>>>>>> start,
>>>>>> +               unsigned long len, unsigned int psize);
>>>>>> +#endif
>>>>>> +
>>>>>
>>>>> Should we do a slice.h ? the way we have other files? and then do
>>>>
>>>> Yes we could add a slice.h instead of using page.h for that, good idea.
>>>>
>>>>>
>>>>> arch/powerpc/include/asm/book3s/64/slice.h that will carry
>>>>> #define slice_bitmap_zero(dst, nbits) \
>>>>>     do { if (nbits) bitmap_zero(dst, nbits); } while (0)
>>>>> #define slice_bitmap_set(dst, pos, nbits) \
>>>>> do { if (nbits) bitmap_set(dst, pos, nbits); } while (0)
>>>>> #define slice_bitmap_copy(dst, src, nbits) \
>>>>> do { if (nbits) bitmap_copy(dst, src, nbits); } while (0)
>>>>> #define slice_bitmap_and(dst, src1, src2, nbits) \
>>>>>     ({ (nbits) ? bitmap_and(dst, src1, src2, nbits) : 0; })
>>>>> #define slice_bitmap_or(dst, src1, src2, nbits) \
>>>>>     do { if (nbits) bitmap_or(dst, src1, src2, nbits); } while (0)
>>>>> #define slice_bitmap_andnot(dst, src1, src2, nbits) \
>>>>>     ({ (nbits) ? bitmap_andnot(dst, src1, src2, nbits) : 0; })
>>>>> #define slice_bitmap_equal(src1, src2, nbits) \
>>>>>     ({ (nbits) ? bitmap_equal(src1, src2, nbits) : 1; })
>>>>> #define slice_bitmap_empty(src, nbits) \
>>>>>     ({ (nbits) ? bitmap_empty(src, nbits) : 1; })
>>>>>
>>>>> This without that if(nbits) check and a proper static inline so
>>>>> that we
>>>>> can do type checking.
>>>>
>>>> Is it really worth duplicating that just for eliminating the 'if
>>>> (nbits)' in one case ?
>>>>
>>>> Only in book3s/64 we will be able to eliminate that, for nohash/32
>>>> we need to keep the test due to the difference between low and high
>>>> slices.
>>>
>>> the other advantage is we move the SLICE_LOW_SHIFT to the right
>>> location. IMHO mm subystem is really complex with these really
>>> overloaded headers. If we can keep it  seperate we should with
>>> minimal code duplication?
>>
>> For the constants I fully agree with your proposal and I will do it. I
>> was only questionning the benefit of moving the slice_bitmap_xxxx()
>> stuff, taking into account that the 'if (nbits)' test is already
>> eliminated by the compiler.
>>
>
> That is compiler dependent as you are finding with the other patch where
> if (0) didn't get compiled out

I don't think so. When I had the missing prototype, the compilation goes
ok, including the final link. Which means at the end the code is not
included since radix_enabled() evaluates to 0.

Many many parts of the kernel are based on this assumption.

Christophe

>
> -aneesh

2018-01-20 08:25:18

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] powerpc/mm: Enhance 'slice' for supporting PPC32

Hi Segher,

Le 19/01/2018 à 10:45, Christophe LEROY a écrit :
>
>
> Le 19/01/2018 à 10:13, Aneesh Kumar K.V a écrit :
>>
>>
>> On 01/19/2018 02:37 PM, Christophe LEROY wrote:
>>>
>>>
>>> Le 19/01/2018 à 10:02, Aneesh Kumar K.V a écrit :
>>>>
>>>>
>>>> On 01/19/2018 02:14 PM, Christophe LEROY wrote:
>>>>>
>>>>>
>>>>> Le 19/01/2018 à 09:24, Aneesh Kumar K.V a écrit :
>>>>>> Christophe Leroy <[email protected]> writes:
>>>>>>
>>>>>>> In preparation for the following patch which will fix an issue on
>>>>>>> the 8xx by re-using the 'slices', this patch enhances the
>>>>>>> 'slices' implementation to support 32 bits CPUs.
>>>>>>>
>>>>>>> On PPC32, the address space is limited to 4Gbytes, hence only the
>>>>>>> low
>>>>>>> slices will be used. As of today, the code uses
>>>>>>> SLICE_LOW_TOP (0x100000000ul) and compares it with addr to determine
>>>>>>> if addr refers to low or high space.
>>>>>>> On PPC32, such a (addr < SLICE_LOW_TOP) test is always false because
>>>>>>> 0x100000000ul degrades to 0. Therefore, the patch modifies
>>>>>>> SLICE_LOW_TOP to (0xfffffffful) and modifies the tests to
>>>>>>> (addr <= SLICE_LOW_TOP) which will then always be true on PPC32
>>>>>>> as addr has type 'unsigned long' while not modifying the PPC64
>>>>>>> behaviour.
>>>>>>>
>>>>>>> This patch moves "slices" functions prototypes from page64.h to
>>>>>>> page.h
>>>>>>>
>>>>>>> The high slices use bitmaps. As bitmap functions are not prepared to
>>>>>>> handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
>>>>>>> slice_bitmap_xxx() macros which will take care of the 0 nbits case.
>>>>>>>
>>>>>>> Signed-off-by: Christophe Leroy <[email protected]>
>>>>>>> ---
>>>>>>>   v2: First patch of v1 serie split in two parts ; added
>>>>>>> slice_bitmap_xxx() macros.
>>>>>>>
>>>>>>>   arch/powerpc/include/asm/page.h      | 14 +++++++++
>>>>>>>   arch/powerpc/include/asm/page_32.h   | 19 ++++++++++++
>>>>>>>   arch/powerpc/include/asm/page_64.h   | 21 ++-----------
>>>>>>>   arch/powerpc/mm/hash_utils_64.c      |  2 +-
>>>>>>>   arch/powerpc/mm/mmu_context_nohash.c |  7 +++++
>>>>>>>   arch/powerpc/mm/slice.c              | 60
>>>>>>> ++++++++++++++++++++++++------------
>>>>>>>   6 files changed, 83 insertions(+), 40 deletions(-)
>>>>>>>
>>>>>>> diff --git a/arch/powerpc/include/asm/page.h
>>>>>>> b/arch/powerpc/include/asm/page.h
>>>>>>> index 8da5d4c1cab2..d0384f9db9eb 100644
>>>>>>> --- a/arch/powerpc/include/asm/page.h
>>>>>>> +++ b/arch/powerpc/include/asm/page.h
>>>>>>> @@ -342,6 +342,20 @@ typedef struct page *pgtable_t;
>>>>>>>   #endif
>>>>>>>   #endif
>>>>>>> +#ifdef CONFIG_PPC_MM_SLICES
>>>>>>> +struct mm_struct;
>>>>>>> +
>>>>>>> +unsigned long slice_get_unmapped_area(unsigned long addr,
>>>>>>> unsigned long len,
>>>>>>> +                      unsigned long flags, unsigned int psize,
>>>>>>> +                      int topdown);
>>>>>>> +
>>>>>>> +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long
>>>>>>> addr);
>>>>>>> +
>>>>>>> +void slice_set_user_psize(struct mm_struct *mm, unsigned int
>>>>>>> psize);
>>>>>>> +void slice_set_range_psize(struct mm_struct *mm, unsigned long
>>>>>>> start,
>>>>>>> +               unsigned long len, unsigned int psize);
>>>>>>> +#endif
>>>>>>> +
>>>>>>
>>>>>> Should we do a slice.h ? the way we have other files? and then do
>>>>>
>>>>> Yes we could add a slice.h instead of using page.h for that, good
>>>>> idea.
>>>>>
>>>>>>
>>>>>> arch/powerpc/include/asm/book3s/64/slice.h that will carry
>>>>>> #define slice_bitmap_zero(dst, nbits) \
>>>>>>     do { if (nbits) bitmap_zero(dst, nbits); } while (0)
>>>>>> #define slice_bitmap_set(dst, pos, nbits) \
>>>>>> do { if (nbits) bitmap_set(dst, pos, nbits); } while (0)
>>>>>> #define slice_bitmap_copy(dst, src, nbits) \
>>>>>> do { if (nbits) bitmap_copy(dst, src, nbits); } while (0)
>>>>>> #define slice_bitmap_and(dst, src1, src2, nbits) \
>>>>>>     ({ (nbits) ? bitmap_and(dst, src1, src2, nbits) : 0; })
>>>>>> #define slice_bitmap_or(dst, src1, src2, nbits) \
>>>>>>     do { if (nbits) bitmap_or(dst, src1, src2, nbits); } while (0)
>>>>>> #define slice_bitmap_andnot(dst, src1, src2, nbits) \
>>>>>>     ({ (nbits) ? bitmap_andnot(dst, src1, src2, nbits) : 0; })
>>>>>> #define slice_bitmap_equal(src1, src2, nbits) \
>>>>>>     ({ (nbits) ? bitmap_equal(src1, src2, nbits) : 1; })
>>>>>> #define slice_bitmap_empty(src, nbits) \
>>>>>>     ({ (nbits) ? bitmap_empty(src, nbits) : 1; })
>>>>>>
>>>>>> This without that if(nbits) check and a proper static inline so
>>>>>> that we
>>>>>> can do type checking.
>>>>>
>>>>> Is it really worth duplicating that just for eliminating the 'if
>>>>> (nbits)' in one case ?
>>>>>
>>>>> Only in book3s/64 we will be able to eliminate that, for nohash/32
>>>>> we need to keep the test due to the difference between low and high
>>>>> slices.
>>>>
>>>> the other advantage is we move the SLICE_LOW_SHIFT to the right
>>>> location. IMHO mm subystem is really complex with these really
>>>> overloaded headers. If we can keep it  seperate we should with
>>>> minimal code duplication?
>>>
>>> For the constants I fully agree with your proposal and I will do it.
>>> I was only questionning the benefit of moving the slice_bitmap_xxxx()
>>> stuff, taking into account that the 'if (nbits)' test is already
>>> eliminated by the compiler.
>>>
>>
>> That is compiler dependent as you are finding with the other patch
>> where if (0) didn't get compiled out
>
> I don't think so. When I had the missing prototype, the compilation goes
> ok, including the final link. Which means at the end the code is not
> included since radix_enabled() evaluates to 0.
>
> Many many parts of the kernel are based on this assumption.
>


Segher, what is your opinion on the above ? Can we consider that a ' if
(nbits)' will always be compiled out when nbits is a #define constant,
or should we duplicate the macros as suggested in order to avoid
unneccessary 'if' test on platforms where 'nbits' is always not null by
definition ?

Patch is at https://patchwork.ozlabs.org/patch/862117/

Christophe

---
L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel antivirus Avast.
https://www.avast.com/antivirus


2018-01-20 17:59:30

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] powerpc/mm: Enhance 'slice' for supporting PPC32

Hi!

On Sat, Jan 20, 2018 at 09:22:50AM +0100, christophe leroy wrote:
> >>>>>>>On PPC32, the address space is limited to 4Gbytes, hence only the
> >>>>>>>low
> >>>>>>>slices will be used. As of today, the code uses
> >>>>>>>SLICE_LOW_TOP (0x100000000ul) and compares it with addr to determine
> >>>>>>>if addr refers to low or high space.
> >>>>>>>On PPC32, such a (addr < SLICE_LOW_TOP) test is always false because
> >>>>>>>0x100000000ul degrades to 0. Therefore, the patch modifies
> >>>>>>>SLICE_LOW_TOP to (0xfffffffful) and modifies the tests to
> >>>>>>>(addr <= SLICE_LOW_TOP) which will then always be true on PPC32
> >>>>>>>as addr has type 'unsigned long' while not modifying the PPC64
> >>>>>>>behaviour.

It should work to define SLICE_LOW_TOP as 0x100000000ull and keep
everything else the same, no?

> >I don't think so. When I had the missing prototype, the compilation goes
> >ok, including the final link. Which means at the end the code is not
> >included since radix_enabled() evaluates to 0.
> >
> >Many many parts of the kernel are based on this assumption.
>
> Segher, what is your opinion on the above ? Can we consider that a ' if
> (nbits)' will always be compiled out when nbits is a #define constant,
> or should we duplicate the macros as suggested in order to avoid
> unneccessary 'if' test on platforms where 'nbits' is always not null by
> definition ?

Doing things like

if (nbits)
some_undeclared_function();

will likely work in practice if the condition evaluates to false at
compile time, but a) it will warn; b) it is just yuck; and c) it will
not always work (for example, you get the wrong prototype in this case,
not lethal here with most ABIs, but ugh).

Just make sure to declare all functions, or define it to some empty
thing, or #ifdeffery if you have to. There are many options, it is
not hard, and if it means you have to pull code further apart that is
not so bad: you get cleaner, clearer code.


Segher

2018-01-22 07:53:35

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] powerpc/mm: Enhance 'slice' for supporting PPC32



Le 20/01/2018 à 18:56, Segher Boessenkool a écrit :
> Hi!
>
> On Sat, Jan 20, 2018 at 09:22:50AM +0100, christophe leroy wrote:
>>>>>>>>> On PPC32, the address space is limited to 4Gbytes, hence only the
>>>>>>>>> low
>>>>>>>>> slices will be used. As of today, the code uses
>>>>>>>>> SLICE_LOW_TOP (0x100000000ul) and compares it with addr to determine
>>>>>>>>> if addr refers to low or high space.
>>>>>>>>> On PPC32, such a (addr < SLICE_LOW_TOP) test is always false because
>>>>>>>>> 0x100000000ul degrades to 0. Therefore, the patch modifies
>>>>>>>>> SLICE_LOW_TOP to (0xfffffffful) and modifies the tests to
>>>>>>>>> (addr <= SLICE_LOW_TOP) which will then always be true on PPC32
>>>>>>>>> as addr has type 'unsigned long' while not modifying the PPC64
>>>>>>>>> behaviour.
>
> It should work to define SLICE_LOW_TOP as 0x100000000ull and keep
> everything else the same, no?

great, yes it works indeed.

>
>>> I don't think so. When I had the missing prototype, the compilation goes
>>> ok, including the final link. Which means at the end the code is not
>>> included since radix_enabled() evaluates to 0.
>>>
>>> Many many parts of the kernel are based on this assumption.
>>
>> Segher, what is your opinion on the above ? Can we consider that a ' if
>> (nbits)' will always be compiled out when nbits is a #define constant,
>> or should we duplicate the macros as suggested in order to avoid
>> unneccessary 'if' test on platforms where 'nbits' is always not null by
>> definition ?
>
> Doing things like
>
> if (nbits)
> some_undeclared_function();
>
> will likely work in practice if the condition evaluates to false at
> compile time, but a) it will warn; b) it is just yuck; and c) it will
> not always work (for example, you get the wrong prototype in this case,
> not lethal here with most ABIs, but ugh).
>
> Just make sure to declare all functions, or define it to some empty
> thing, or #ifdeffery if you have to. There are many options, it is
> not hard, and if it means you have to pull code further apart that is
> not so bad: you get cleaner, clearer code.

Ok, if I understand well, your comment applies to the following indeed,
so you confirm the #ifdef is necessary.

--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -553,9 +553,11 @@ unsigned long hugetlb_get_unmapped_area(struct file
*file, unsigned long addr,
struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));

+#ifdef CONFIG_PPC_RADIX_MMU
if (radix_enabled())
return radix__hugetlb_get_unmapped_area(file, addr, len,
pgoff, flags);
+#endif
return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
}
#endif


However, my question was related to another part of the current
patchset, where the functions are always refined:


On PPC32 we set:

+#define SLICE_LOW_SHIFT 28
+#define SLICE_HIGH_SHIFT 0

On PPC64 we set:

#define SLICE_LOW_SHIFT 28
#define SLICE_HIGH_SHIFT 40

We define:

+#define slice_bitmap_zero(dst, nbits) \
+ do { if (nbits) bitmap_zero(dst, nbits); } while (0)


We have a function with:
{
slice_bitmap_zero(ret->low_slices, SLICE_NUM_LOW);
slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
}

So the question is to find the better approach. Is the above approach
correct, including performance wise ?
Or should we define two sets of the macro slice_bitmap_zero(), one for
CONFIG_PPC32 with the 'if (nbits)' test and one for CONFIG_PPC64 without
the unnecessary test ?

Or should we avoid this macro entirely and instead do something like:

{
bitmap_zero(ret->low_slices, SLICE_NUM_LOW);
#if SLICE_NUM_HIGH != 0
bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
#endif
}

And if we say the 'macro' approach is OK, should it be better the use a
static inline function instead ?

Thanks,
Christophe

2018-01-23 21:49:17

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] powerpc/mm: Enhance 'slice' for supporting PPC32

On Mon, Jan 22, 2018 at 08:52:53AM +0100, Christophe LEROY wrote:
> >Just make sure to declare all functions, or define it to some empty
> >thing, or #ifdeffery if you have to. There are many options, it is
> >not hard, and if it means you have to pull code further apart that is
> >not so bad: you get cleaner, clearer code.
>
> Ok, if I understand well, your comment applies to the following indeed,
> so you confirm the #ifdef is necessary.

As I said, not necessary, but it might be the easiest or even the
cleanest here. Something for you and the maintainers to fight about,
I'll stay out of it :-)

> However, my question was related to another part of the current
> patchset, where the functions are always refined:
>
>
> On PPC32 we set:
>
> +#define SLICE_LOW_SHIFT 28
> +#define SLICE_HIGH_SHIFT 0
>
> On PPC64 we set:
>
> #define SLICE_LOW_SHIFT 28
> #define SLICE_HIGH_SHIFT 40
>
> We define:
>
> +#define slice_bitmap_zero(dst, nbits) \
> + do { if (nbits) bitmap_zero(dst, nbits); } while (0)
>
>
> We have a function with:
> {
> slice_bitmap_zero(ret->low_slices, SLICE_NUM_LOW);
> slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
> }

SLICE_NUM_xx is not the same as SLICE_xx_SHIFT; I don't see how any of
those shift values give nbits == 0.

> So the question is to find the better approach. Is the above approach
> correct, including performance wise ?

If slice_bitmap_zero is inlined (or partially inlined) it is fine. Is it?


Segher

2018-01-27 09:38:28

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v2 2/5] powerpc/32: Fix hugepage allocation on 8xx at hint address

"Aneesh Kumar K.V" <[email protected]> writes:

> Christophe Leroy <[email protected]> writes:
>
>> On the 8xx, the page size is set in the PMD entry and applies to
>> all pages of the page table pointed by the said PMD entry.
>>
...
>> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
>> index a9b9083c5e49..79e1378ee303 100644
>> --- a/arch/powerpc/mm/hugetlbpage.c
>> +++ b/arch/powerpc/mm/hugetlbpage.c
>> @@ -553,9 +553,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
>> struct hstate *hstate = hstate_file(file);
>> int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
>>
>> +#ifdef CONFIG_PPC_RADIX_MMU
>> if (radix_enabled())
>> return radix__hugetlb_get_unmapped_area(file, addr, len,
>> pgoff, flags);
>> +#endif
>
> if (0) didn't remove the following radix__hugetlb_get_unmapped_area for
> you?

It will remove the call, but you still need at least a prototype, or an
empty static inline.

cheers