2018-01-24 09:06:42

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v3 1/5] powerpc/mm: Remove intermediate bitmap copy in 'slices'

bitmap_or() and bitmap_andnot() can work properly with dst identical
to src1 or src2. There is no need of an intermediate result bitmap
that is copied back to dst in a second step.

Signed-off-by: Christophe Leroy <[email protected]>
---
v2: New in v2
v3: patch moved up front of the serie to avoid ephemeral slice_bitmap_copy() function in following patch

arch/powerpc/mm/slice.c | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 23ec2c5e3b78..98b53d48968f 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -388,21 +388,17 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,

static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
dst->low_slices |= src->low_slices;
- bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
- bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+ bitmap_or(dst->high_slices, dst->high_slices, src->high_slices,
+ SLICE_NUM_HIGH);
}

static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
dst->low_slices &= ~src->low_slices;

- bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
- bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+ bitmap_andnot(dst->high_slices, dst->high_slices, src->high_slices,
+ SLICE_NUM_HIGH);
}

#ifdef CONFIG_PPC_64K_PAGES
--
2.13.3



2018-01-24 09:03:36

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v3 5/5] powerpc/mm: Fix growth direction for hugepages mmaps with slice

An application running with libhugetlbfs fails to allocate
additional pages to HEAP due to the hugemap being done
inconditionally as topdown mapping:

mmap(0x10080000, 1572864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x73e80000
[...]
mmap(0x74000000, 1048576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d80000
munmap(0x73d80000, 1048576) = 0
[...]
mmap(0x74000000, 1572864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d00000
munmap(0x73d00000, 1572864) = 0
[...]
mmap(0x74000000, 1572864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d00000
munmap(0x73d00000, 1572864) = 0
[...]

As one can see from the above strace log, mmap() allocates further
pages below the initial one because no space is available on top of it.

This patch fixes it by requesting bottomup mapping as the non
generic hugetlb_get_unmapped_area() does

Fixes: d0f13e3c20b6f ("[POWERPC] Introduce address space "slices" ")
Signed-off-by: Christophe Leroy <[email protected]>
---
v3: Was a standalone patch before, but conflicts with this serie.

arch/powerpc/mm/hugetlbpage.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 79e1378ee303..368ea6b248ad 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -558,7 +558,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return radix__hugetlb_get_unmapped_area(file, addr, len,
pgoff, flags);
#endif
- return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
+ return slice_get_unmapped_area(addr, len, flags, mmu_psize, 0);
}
#endif

--
2.13.3


2018-01-24 09:04:17

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v3 4/5] powerpc/mm: Allow up to 64 low slices

While the implementation of the "slices" address space allows
a significant amount of high slices, it limits the number of
low slices to 16 due to the use of a single u64 low_slices_psize
element in struct mm_context_t

On the 8xx, the minimum slice size is the size of the area
covered by a single PMD entry, ie 4M in 4K pages mode and 64M in
16K pages mode. This means we could have at least 64 slices.

In order to override this limitation, this patch switches the
handling of low_slices_psize to char array as done already for
high_slices_psize. This allows to increase the number of low
slices to 64 on the 8xx.

Signed-off-by: Christophe Leroy <[email protected]>
---
v2: Usign slice_bitmap_xxx() macros instead of bitmap_xxx() functions.
v3: keep low_slices as a u64, this allows 64 slices which is enough.

arch/powerpc/include/asm/book3s/64/mmu.h | 3 +-
arch/powerpc/include/asm/mmu-8xx.h | 7 +++-
arch/powerpc/include/asm/paca.h | 2 +-
arch/powerpc/include/asm/slice.h | 1 -
arch/powerpc/include/asm/slice_32.h | 2 ++
arch/powerpc/include/asm/slice_64.h | 2 ++
arch/powerpc/kernel/paca.c | 3 +-
arch/powerpc/mm/hash_utils_64.c | 13 ++++----
arch/powerpc/mm/slb_low.S | 8 +++--
arch/powerpc/mm/slice.c | 57 +++++++++++++++++---------------
10 files changed, 56 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index c9448e19847a..b076a2d74c69 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -91,7 +91,8 @@ typedef struct {
struct npu_context *npu_context;

#ifdef CONFIG_PPC_MM_SLICES
- u64 low_slices_psize; /* SLB page size encodings */
+ /* SLB page size encodings*/
+ unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
unsigned long slb_addr_limit;
#else
diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 5f89b6010453..5f37ba06b56c 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -164,6 +164,11 @@
*/
#define SPRN_M_TW 799

+#ifdef CONFIG_PPC_MM_SLICES
+#include <asm/slice_32.h>
+#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
+#endif
+
#ifndef __ASSEMBLY__
typedef struct {
unsigned int id;
@@ -171,7 +176,7 @@ typedef struct {
unsigned long vdso_base;
#ifdef CONFIG_PPC_MM_SLICES
u16 user_psize; /* page size index */
- u64 low_slices_psize; /* page size encodings */
+ unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
unsigned char high_slices_psize[0];
unsigned long slb_addr_limit;
#endif
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 23ac7fc0af23..a3e531fe9ac7 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -141,7 +141,7 @@ struct paca_struct {
#ifdef CONFIG_PPC_BOOK3S
mm_context_id_t mm_ctx_id;
#ifdef CONFIG_PPC_MM_SLICES
- u64 mm_ctx_low_slices_psize;
+ unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
unsigned long mm_ctx_slb_addr_limit;
#else
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
index 2b4b70de7e71..b67ba8faa507 100644
--- a/arch/powerpc/include/asm/slice.h
+++ b/arch/powerpc/include/asm/slice.h
@@ -16,7 +16,6 @@
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN

-#define SLICE_LOW_SHIFT 28
#define SLICE_LOW_TOP (0x100000000ull)
#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
diff --git a/arch/powerpc/include/asm/slice_32.h b/arch/powerpc/include/asm/slice_32.h
index 7e27c0dfb913..349187c20100 100644
--- a/arch/powerpc/include/asm/slice_32.h
+++ b/arch/powerpc/include/asm/slice_32.h
@@ -2,6 +2,8 @@
#ifndef _ASM_POWERPC_SLICE_32_H
#define _ASM_POWERPC_SLICE_32_H

+#define SLICE_LOW_SHIFT 26 /* 64 slices */
+
#define SLICE_HIGH_SHIFT 0
#define SLICE_NUM_HIGH 0ul
#define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
diff --git a/arch/powerpc/include/asm/slice_64.h b/arch/powerpc/include/asm/slice_64.h
index 9d1c97b83010..0959475239c6 100644
--- a/arch/powerpc/include/asm/slice_64.h
+++ b/arch/powerpc/include/asm/slice_64.h
@@ -2,6 +2,8 @@
#ifndef _ASM_POWERPC_SLICE_64_H
#define _ASM_POWERPC_SLICE_64_H

+#define SLICE_LOW_SHIFT 28
+
#define SLICE_HIGH_SHIFT 40
#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index d6597038931d..8e1566bf82b8 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -264,7 +264,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
#ifdef CONFIG_PPC_MM_SLICES
VM_BUG_ON(!mm->context.slb_addr_limit);
get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
- get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+ memcpy(&get_paca()->mm_ctx_low_slices_psize,
+ &context->low_slices_psize, sizeof(context->low_slices_psize));
memcpy(&get_paca()->mm_ctx_high_slices_psize,
&context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
#else /* CONFIG_PPC_MM_SLICES */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 655a5a9a183d..da696565b969 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1097,19 +1097,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
#ifdef CONFIG_PPC_MM_SLICES
static unsigned int get_paca_psize(unsigned long addr)
{
- u64 lpsizes;
- unsigned char *hpsizes;
+ unsigned char *psizes;
unsigned long index, mask_index;

if (addr < SLICE_LOW_TOP) {
- lpsizes = get_paca()->mm_ctx_low_slices_psize;
+ psizes = get_paca()->mm_ctx_low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xF;
+ } else {
+ psizes = get_paca()->mm_ctx_high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
}
- hpsizes = get_paca()->mm_ctx_high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
}

#else
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 2cf5ef3fc50d..2c7c717fd2ea 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -200,10 +200,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
5:
/*
* Handle lpsizes
- * r9 is get_paca()->context.low_slices_psize, r11 is index
+ * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
*/
- ld r9,PACALOWSLICESPSIZE(r13)
- mr r11,r10
+ srdi r11,r10,1 /* index */
+ addi r9,r11,PACALOWSLICESPSIZE
+ lbzx r9,r13,r9 /* r9 is lpsizes[r11] */
+ rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */
6:
sldi r11,r11,2 /* index * 4 */
/* Extract the psize and multiply to get an array offset */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 549704dfa777..3d573a038d42 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -148,18 +148,20 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
unsigned long high_limit)
{
- unsigned char *hpsizes;
+ unsigned char *hpsizes, *lpsizes;
int index, mask_index;
unsigned long i;
- u64 lpsizes;

ret->low_slices = 0;
slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);

lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == psize)
+ for (i = 0; i < SLICE_NUM_LOW; i++) {
+ mask_index = i & 0x1;
+ index = i >> 1;
+ if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
ret->low_slices |= 1u << i;
+ }

if (high_limit <= SLICE_LOW_TOP)
return;
@@ -211,8 +213,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
{
int index, mask_index;
/* Write the new slice psize bits */
- unsigned char *hpsizes;
- u64 lpsizes;
+ unsigned char *hpsizes, *lpsizes;
unsigned long i, flags;

slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
@@ -225,12 +226,13 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz

lpsizes = mm->context.low_slices_psize;
for (i = 0; i < SLICE_NUM_LOW; i++)
- if (mask.low_slices & (1u << i))
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
-
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
+ if (mask.low_slices & (1u << i)) {
+ mask_index = i & 0x1;
+ index = i >> 1;
+ lpsizes[index] = (lpsizes[index] &
+ ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
+ }

hpsizes = mm->context.high_slices_psize;
for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
@@ -629,7 +631,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,

unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
{
- unsigned char *hpsizes;
+ unsigned char *psizes;
int index, mask_index;

/*
@@ -643,15 +645,14 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
#endif
}
if (addr < SLICE_LOW_TOP) {
- u64 lpsizes;
- lpsizes = mm->context.low_slices_psize;
+ psizes = mm->context.low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xf;
+ } else {
+ psizes = mm->context.high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
}
- hpsizes = mm->context.high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
}
EXPORT_SYMBOL_GPL(get_slice_psize);

@@ -672,8 +673,8 @@ EXPORT_SYMBOL_GPL(get_slice_psize);
void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
{
int index, mask_index;
- unsigned char *hpsizes;
- unsigned long flags, lpsizes;
+ unsigned char *hpsizes, *lpsizes;
+ unsigned long flags;
unsigned int old_psize;
int i;

@@ -691,12 +692,14 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
wmb();

lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
+ for (i = 0; i < SLICE_NUM_LOW; i++) {
+ mask_index = i & 0x1;
+ index = i >> 1;
+ if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
+ lpsizes[index] = (lpsizes[index] &
+ ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
+ }

hpsizes = mm->context.high_slices_psize;
for (i = 0; i < SLICE_NUM_HIGH; i++) {
--
2.13.3


2018-01-24 09:06:33

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v3 3/5] powerpc/32: Fix hugepage allocation on 8xx at hint address

On the 8xx, the page size is set in the PMD entry and applies to
all pages of the page table pointed by the said PMD entry.

When an app has some regular pages allocated (e.g. see below) and tries
to mmap() a huge page at a hint address covered by the same PMD entry,
the kernel accepts the hint allthough the 8xx cannot handle different
page sizes in the same PMD entry.

10000000-10001000 r-xp 00000000 00:0f 2597 /root/malloc
10010000-10011000 rwxp 00000000 00:0f 2597 /root/malloc

mmap(0x10080000, 524288, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x10080000

This results the app remaining forever in do_page_fault()/hugetlb_fault()
and when interrupting that app, we get the following warning:

[162980.035629] WARNING: CPU: 0 PID: 2777 at arch/powerpc/mm/hugetlbpage.c:354 hugetlb_free_pgd_range+0xc8/0x1e4
[162980.035699] CPU: 0 PID: 2777 Comm: malloc Tainted: G W 4.14.6 #85
[162980.035744] task: c67e2c00 task.stack: c668e000
[162980.035783] NIP: c000fe18 LR: c00e1eec CTR: c00f90c0
[162980.035830] REGS: c668fc20 TRAP: 0700 Tainted: G W (4.14.6)
[162980.035854] MSR: 00029032 <EE,ME,IR,DR,RI> CR: 24044224 XER: 20000000
[162980.036003]
[162980.036003] GPR00: c00e1eec c668fcd0 c67e2c00 00000010 c6869410 10080000 00000000 77fb4000
[162980.036003] GPR08: ffff0001 0683c001 00000000 ffffff80 44028228 10018a34 00004008 418004fc
[162980.036003] GPR16: c668e000 00040100 c668e000 c06c0000 c668fe78 c668e000 c6835ba0 c668fd48
[162980.036003] GPR24: 00000000 73ffffff 74000000 00000001 77fb4000 100fffff 10100000 10100000
[162980.036743] NIP [c000fe18] hugetlb_free_pgd_range+0xc8/0x1e4
[162980.036839] LR [c00e1eec] free_pgtables+0x12c/0x150
[162980.036861] Call Trace:
[162980.036939] [c668fcd0] [c00f0774] unlink_anon_vmas+0x1c4/0x214 (unreliable)
[162980.037040] [c668fd10] [c00e1eec] free_pgtables+0x12c/0x150
[162980.037118] [c668fd40] [c00eabac] exit_mmap+0xe8/0x1b4
[162980.037210] [c668fda0] [c0019710] mmput.part.9+0x20/0xd8
[162980.037301] [c668fdb0] [c001ecb0] do_exit+0x1f0/0x93c
[162980.037386] [c668fe00] [c001f478] do_group_exit+0x40/0xcc
[162980.037479] [c668fe10] [c002a76c] get_signal+0x47c/0x614
[162980.037570] [c668fe70] [c0007840] do_signal+0x54/0x244
[162980.037654] [c668ff30] [c0007ae8] do_notify_resume+0x34/0x88
[162980.037744] [c668ff40] [c000dae8] do_user_signal+0x74/0xc4
[162980.037781] Instruction dump:
[162980.037821] 7fdff378 81370000 54a3463a 80890020 7d24182e 7c841a14 712a0004 4082ff94
[162980.038014] 2f890000 419e0010 712a0ff0 408200e0 <0fe00000> 54a9000a 7f984840 419d0094
[162980.038216] ---[ end trace c0ceeca8e7a5800a ]---
[162980.038754] BUG: non-zero nr_ptes on freeing mm: 1
[162985.363322] BUG: non-zero nr_ptes on freeing mm: -1

In order to fix this, this patch uses the address space "slices"
implemented for BOOK3S/64 and enhanced to support PPC32 by the
preceding patch.

This patch modifies the context.id on the 8xx to be in the range
[1:16] instead of [0:15] in order to identify context.id == 0 as
not initialised contexts as done on BOOK3S

This patch activates CONFIG_PPC_MM_SLICES when CONFIG_HUGETLB_PAGE is
selected for the 8xx

Alltough we could in theory have as many slices as PMD entries, the
current slices implementation limits the number of low slices to 16.
This limitation is not preventing us to fix the initial issue allthough
it is suboptimal. It will be cured in a subsequent patch.

Fixes: 4b91428699477 ("powerpc/8xx: Implement support of hugepages")
Signed-off-by: Christophe Leroy <[email protected]>
---
v2: First patch of v1 serie split in two parts
v3: No changes

arch/powerpc/include/asm/mmu-8xx.h | 6 ++++++
arch/powerpc/kernel/setup-common.c | 2 ++
arch/powerpc/mm/8xx_mmu.c | 2 +-
arch/powerpc/mm/hugetlbpage.c | 2 ++
arch/powerpc/mm/mmu_context_nohash.c | 18 ++++++++++++++++--
arch/powerpc/platforms/Kconfig.cputype | 1 +
6 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 5bb3dbede41a..5f89b6010453 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -169,6 +169,12 @@ typedef struct {
unsigned int id;
unsigned int active;
unsigned long vdso_base;
+#ifdef CONFIG_PPC_MM_SLICES
+ u16 user_psize; /* page size index */
+ u64 low_slices_psize; /* page size encodings */
+ unsigned char high_slices_psize[0];
+ unsigned long slb_addr_limit;
+#endif
} mm_context_t;

#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 8fd3a70047f1..edf98ea92035 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -916,6 +916,8 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PPC64
if (!radix_enabled())
init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#elif defined(CONFIG_PPC_8xx)
+ init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
#else
#error "context.addr_limit not initialized."
#endif
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index f29212e40f40..0be77709446c 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd)
mtspr(SPRN_M_TW, __pa(pgd) - offset);

/* Update context */
- mtspr(SPRN_M_CASID, id);
+ mtspr(SPRN_M_CASID, id - 1);
/* sync */
mb();
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a9b9083c5e49..79e1378ee303 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -553,9 +553,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));

+#ifdef CONFIG_PPC_RADIX_MMU
if (radix_enabled())
return radix__hugetlb_get_unmapped_area(file, addr, len,
pgoff, flags);
+#endif
return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
}
#endif
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 4554d6527682..d98f7e5c141b 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -331,6 +331,20 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
pr_hard("initing context for mm @%p\n", mm);

+#ifdef CONFIG_PPC_MM_SLICES
+ if (!mm->context.slb_addr_limit)
+ mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+
+ /*
+ * We have MMU_NO_CONTEXT set to be ~0. Hence check
+ * explicitly against context.id == 0. This ensures that we properly
+ * initialize context slice details for newly allocated mm's (which will
+ * have id == 0) and don't alter context slice inherited via fork (which
+ * will have id != 0).
+ */
+ if (mm->context.id == 0)
+ slice_set_user_psize(mm, mmu_virtual_psize);
+#endif
mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0;
return 0;
@@ -428,8 +442,8 @@ void __init mmu_context_init(void)
* -- BenH
*/
if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
- first_context = 0;
- last_context = 15;
+ first_context = 1;
+ last_context = 16;
no_selective_tlbil = true;
} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
first_context = 1;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index ae07470fde3c..73a7ea333e9e 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -334,6 +334,7 @@ config PPC_BOOK3E_MMU
config PPC_MM_SLICES
bool
default y if PPC_BOOK3S_64
+ default y if PPC_8xx && HUGETLB_PAGE
default n

config PPC_HAVE_PMU_SUPPORT
--
2.13.3


2018-01-24 09:06:50

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH v3 2/5] powerpc/mm: Enhance 'slice' for supporting PPC32

In preparation for the following patch which will fix an issue on
the 8xx by re-using the 'slices', this patch enhances the
'slices' implementation to support 32 bits CPUs.

On PPC32, the address space is limited to 4Gbytes, hence only the low
slices will be used.

This patch moves "slices" functions prototypes from page64.h to slice.h

The high slices use bitmaps. As bitmap functions are not prepared to
handling bitmaps of size 0, the bitmap_xxx() calls are wrapped into
slice_bitmap_xxx() functions which will void on PPC32

Signed-off-by: Christophe Leroy <[email protected]>
---
v2: First patch of v1 serie split in two parts ; added slice_bitmap_xxx() macros.
v3: Moving slice related stuff in slice.h and slice_32/64.h
slice_bitmap_xxx() are now static inline functions and platform dependent
SLICE_LOW_TOP declared ull on PPC32 with correct casts allows to keep it 0x100000000

arch/powerpc/include/asm/page.h | 1 +
arch/powerpc/include/asm/page_64.h | 59 ----------------------------------
arch/powerpc/include/asm/slice.h | 63 +++++++++++++++++++++++++++++++++++++
arch/powerpc/include/asm/slice_32.h | 56 +++++++++++++++++++++++++++++++++
arch/powerpc/include/asm/slice_64.h | 61 +++++++++++++++++++++++++++++++++++
arch/powerpc/mm/slice.c | 38 ++++++++++++----------
6 files changed, 203 insertions(+), 75 deletions(-)
create mode 100644 arch/powerpc/include/asm/slice.h
create mode 100644 arch/powerpc/include/asm/slice_32.h
create mode 100644 arch/powerpc/include/asm/slice_64.h

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 8da5d4c1cab2..d5f1c41b7dba 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -344,5 +344,6 @@ typedef struct page *pgtable_t;

#include <asm-generic/memory_model.h>
#endif /* __ASSEMBLY__ */
+#include <asm/slice.h>

#endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 56234c6fcd61..af04acdb873f 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -86,65 +86,6 @@ extern u64 ppc64_pft_size;

#endif /* __ASSEMBLY__ */

-#ifdef CONFIG_PPC_MM_SLICES
-
-#define SLICE_LOW_SHIFT 28
-#define SLICE_HIGH_SHIFT 40
-
-#define SLICE_LOW_TOP (0x100000000ul)
-#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
-#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
-
-#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
-#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
-
-#ifndef __ASSEMBLY__
-struct mm_struct;
-
-extern unsigned long slice_get_unmapped_area(unsigned long addr,
- unsigned long len,
- unsigned long flags,
- unsigned int psize,
- int topdown);
-
-extern unsigned int get_slice_psize(struct mm_struct *mm,
- unsigned long addr);
-
-extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
-extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long len, unsigned int psize);
-
-#endif /* __ASSEMBLY__ */
-#else
-#define slice_init()
-#ifdef CONFIG_PPC_BOOK3S_64
-#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
-#define slice_set_user_psize(mm, psize) \
-do { \
- (mm)->context.user_psize = (psize); \
- (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
-} while (0)
-#else /* !CONFIG_PPC_BOOK3S_64 */
-#ifdef CONFIG_PPC_64K_PAGES
-#define get_slice_psize(mm, addr) MMU_PAGE_64K
-#else /* CONFIG_PPC_64K_PAGES */
-#define get_slice_psize(mm, addr) MMU_PAGE_4K
-#endif /* !CONFIG_PPC_64K_PAGES */
-#define slice_set_user_psize(mm, psize) do { BUG(); } while(0)
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-#define slice_set_range_psize(mm, start, len, psize) \
- slice_set_user_psize((mm), (psize))
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#ifdef CONFIG_HUGETLB_PAGE
-
-#ifdef CONFIG_PPC_MM_SLICES
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-
-#endif /* !CONFIG_HUGETLB_PAGE */
-
#define VM_DATA_DEFAULT_FLAGS \
(is_32bit_task() ? \
VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
new file mode 100644
index 000000000000..2b4b70de7e71
--- /dev/null
+++ b/arch/powerpc/include/asm/slice.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SLICE_H
+#define _ASM_POWERPC_SLICE_H
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#ifdef CONFIG_PPC64
+#include <asm/slice_64.h>
+#else
+#include <asm/slice_32.h>
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
+#define SLICE_LOW_SHIFT 28
+#define SLICE_LOW_TOP (0x100000000ull)
+#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ unsigned long flags, unsigned int psize,
+ int topdown);
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
+
+void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long len, unsigned int psize);
+
+#else
+#define slice_init()
+#ifdef CONFIG_PPC_BOOK3S_64
+#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
+#define slice_set_user_psize(mm, psize) \
+do { \
+ (mm)->context.user_psize = (psize); \
+ (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
+} while (0)
+#else /* !CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_PPC_64K_PAGES
+#define get_slice_psize(mm, addr) MMU_PAGE_64K
+#else /* CONFIG_PPC_64K_PAGES */
+#define get_slice_psize(mm, addr) MMU_PAGE_4K
+#endif /* !CONFIG_PPC_64K_PAGES */
+#define slice_set_user_psize(mm, psize) do { BUG(); } while(0)
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#define slice_set_range_psize(mm, start, len, psize) \
+ slice_set_user_psize((mm), (psize))
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_SLICE_H */
diff --git a/arch/powerpc/include/asm/slice_32.h b/arch/powerpc/include/asm/slice_32.h
new file mode 100644
index 000000000000..7e27c0dfb913
--- /dev/null
+++ b/arch/powerpc/include/asm/slice_32.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SLICE_32_H
+#define _ASM_POWERPC_SLICE_32_H
+
+#define SLICE_HIGH_SHIFT 0
+#define SLICE_NUM_HIGH 0ul
+#define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
+
+#ifndef __ASSEMBLY__
+
+static inline void slice_bitmap_zero(unsigned long *dst, unsigned int nbits)
+{
+}
+
+static inline int slice_bitmap_and(unsigned long *dst,
+ const unsigned long *src1,
+ const unsigned long *src2,
+ unsigned int nbits)
+{
+ return 0;
+}
+
+static inline void slice_bitmap_or(unsigned long *dst,
+ const unsigned long *src1,
+ const unsigned long *src2,
+ unsigned int nbits)
+{
+}
+
+static inline int slice_bitmap_andnot(unsigned long *dst,
+ const unsigned long *src1,
+ const unsigned long *src2,
+ unsigned int nbits)
+{
+ return 0;
+}
+
+static inline int slice_bitmap_equal(const unsigned long *src1,
+ const unsigned long *src2,
+ unsigned int nbits)
+{
+ return 1;
+}
+
+static inline int slice_bitmap_empty(const unsigned long *src, unsigned nbits)
+{
+ return 1;
+}
+
+static inline void slice_bitmap_set(unsigned long *map, unsigned int start,
+ unsigned int nbits)
+{
+}
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_SLICE_32_H */
diff --git a/arch/powerpc/include/asm/slice_64.h b/arch/powerpc/include/asm/slice_64.h
new file mode 100644
index 000000000000..9d1c97b83010
--- /dev/null
+++ b/arch/powerpc/include/asm/slice_64.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SLICE_64_H
+#define _ASM_POWERPC_SLICE_64_H
+
+#define SLICE_HIGH_SHIFT 40
+#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
+#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bitmap.h>
+
+static inline void slice_bitmap_zero(unsigned long *dst, unsigned int nbits)
+{
+ bitmap_zero(dst, nbits);
+}
+
+static inline int slice_bitmap_and(unsigned long *dst,
+ const unsigned long *src1,
+ const unsigned long *src2,
+ unsigned int nbits)
+{
+ return bitmap_and(dst, src1, src2, nbits);
+}
+
+static inline void slice_bitmap_or(unsigned long *dst,
+ const unsigned long *src1,
+ const unsigned long *src2,
+ unsigned int nbits)
+{
+ bitmap_or(dst, src1, src2, nbits);
+}
+
+static inline int slice_bitmap_andnot(unsigned long *dst,
+ const unsigned long *src1,
+ const unsigned long *src2,
+ unsigned int nbits)
+{
+ return bitmap_andnot(dst, src1, src2, nbits);
+}
+
+static inline int slice_bitmap_equal(const unsigned long *src1,
+ const unsigned long *src2,
+ unsigned int nbits)
+{
+ return bitmap_equal(src1, src2, nbits);
+}
+
+static inline int slice_bitmap_empty(const unsigned long *src, unsigned nbits)
+{
+ return bitmap_empty(src, nbits);
+}
+
+static inline void slice_bitmap_set(unsigned long *map, unsigned int start,
+ unsigned int nbits)
+{
+ bitmap_set(map, start, nbits);
+}
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_SLICE_64_H */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 98b53d48968f..549704dfa777 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -73,10 +73,11 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
unsigned long end = start + len - 1;

ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);

if (start < SLICE_LOW_TOP) {
- unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
+ unsigned long mend = min(end,
+ (unsigned long)(SLICE_LOW_TOP - 1));

ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
- (1u << GET_LOW_SLICE_INDEX(start));
@@ -87,7 +88,7 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;

- bitmap_set(ret->high_slices, start_index, count);
+ slice_bitmap_set(ret->high_slices, start_index, count);
}
}

@@ -113,11 +114,13 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
unsigned long start = slice << SLICE_HIGH_SHIFT;
unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);

+#ifdef CONFIG_PPC64
/* Hack, so that each addresses is controlled by exactly one
* of the high or low area bitmaps, the first high area starts
* at 4GB, not 0 */
if (start == 0)
start = SLICE_LOW_TOP;
+#endif

return !slice_area_is_free(mm, start, end - start);
}
@@ -128,7 +131,7 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
unsigned long i;

ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);

for (i = 0; i < SLICE_NUM_LOW; i++)
if (!slice_low_has_vma(mm, i))
@@ -151,7 +154,7 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
u64 lpsizes;

ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);

lpsizes = mm->context.low_slices_psize;
for (i = 0; i < SLICE_NUM_LOW; i++)
@@ -180,15 +183,16 @@ static int slice_check_fit(struct mm_struct *mm,
*/
unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);

- bitmap_and(result, mask.high_slices,
- available.high_slices, slice_count);
+ slice_bitmap_and(result, mask.high_slices, available.high_slices,
+ slice_count);

return (mask.low_slices & available.low_slices) == mask.low_slices &&
- bitmap_equal(result, mask.high_slices, slice_count);
+ slice_bitmap_equal(result, mask.high_slices, slice_count);
}

static void slice_flush_segments(void *parm)
{
+#ifdef CONFIG_PPC64
struct mm_struct *mm = parm;
unsigned long flags;

@@ -200,6 +204,7 @@ static void slice_flush_segments(void *parm)
local_irq_save(flags);
slb_flush_and_rebolt();
local_irq_restore(flags);
+#endif
}

static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
@@ -389,16 +394,16 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
{
dst->low_slices |= src->low_slices;
- bitmap_or(dst->high_slices, dst->high_slices, src->high_slices,
- SLICE_NUM_HIGH);
+ slice_bitmap_or(dst->high_slices, dst->high_slices, src->high_slices,
+ SLICE_NUM_HIGH);
}

static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
{
dst->low_slices &= ~src->low_slices;

- bitmap_andnot(dst->high_slices, dst->high_slices, src->high_slices,
- SLICE_NUM_HIGH);
+ slice_bitmap_andnot(dst->high_slices, dst->high_slices,
+ src->high_slices, SLICE_NUM_HIGH);
}

#ifdef CONFIG_PPC_64K_PAGES
@@ -446,14 +451,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* init different masks
*/
mask.low_slices = 0;
- bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);

/* silence stupid warning */;
potential_mask.low_slices = 0;
- bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);

compat_mask.low_slices = 0;
- bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
+ slice_bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);

/* Sanity checks */
BUG_ON(mm->task_size == 0);
@@ -591,7 +596,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
convert:
slice_andnot_mask(&mask, &good_mask);
slice_andnot_mask(&mask, &compat_mask);
- if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
+ if (mask.low_slices ||
+ !slice_bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
slice_convert(mm, mask, psize);
if (psize > MMU_PAGE_BASE)
on_each_cpu(slice_flush_segments, mm, 1);
--
2.13.3


2018-01-24 09:16:19

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] powerpc/mm: Fix growth direction for hugepages mmaps with slice



On 01/24/2018 02:32 PM, Christophe Leroy wrote:
> An application running with libhugetlbfs fails to allocate
> additional pages to HEAP due to the hugemap being done
> inconditionally as topdown mapping:
>
> mmap(0x10080000, 1572864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x73e80000
> [...]
> mmap(0x74000000, 1048576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d80000
> munmap(0x73d80000, 1048576) = 0
> [...]
> mmap(0x74000000, 1572864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d00000
> munmap(0x73d00000, 1572864) = 0
> [...]
> mmap(0x74000000, 1572864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d00000
> munmap(0x73d00000, 1572864) = 0
> [...]
>
> As one can see from the above strace log, mmap() allocates further
> pages below the initial one because no space is available on top of it.
>
> This patch fixes it by requesting bottomup mapping as the non
> generic hugetlb_get_unmapped_area() does
>
> Fixes: d0f13e3c20b6f ("[POWERPC] Introduce address space "slices" ")
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> v3: Was a standalone patch before, but conflicts with this serie.
>
> arch/powerpc/mm/hugetlbpage.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
> index 79e1378ee303..368ea6b248ad 100644
> --- a/arch/powerpc/mm/hugetlbpage.c
> +++ b/arch/powerpc/mm/hugetlbpage.c
> @@ -558,7 +558,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
> return radix__hugetlb_get_unmapped_area(file, addr, len,
> pgoff, flags);
> #endif
> - return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
> + return slice_get_unmapped_area(addr, len, flags, mmu_psize, 0);
> }
> #endif

Why make this change also for PPC64? Can you do this #ifdef 8xx?.You can
ideally move hugetlb_get_unmapped_area to slice.h and then make this
much simpler for 8xxx?

-aneesh

-aneesh


2018-01-24 09:28:25

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] powerpc/mm: Fix growth direction for hugepages mmaps with slice



Le 24/01/2018 à 10:15, Aneesh Kumar K.V a écrit :
>
>
> On 01/24/2018 02:32 PM, Christophe Leroy wrote:
>> An application running with libhugetlbfs fails to allocate
>> additional pages to HEAP due to the hugemap being done
>> inconditionally as topdown mapping:
>>
>> mmap(0x10080000, 1572864, PROT_READ|PROT_WRITE,
>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x73e80000
>> [...]
>> mmap(0x74000000, 1048576, PROT_READ|PROT_WRITE,
>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d80000
>> munmap(0x73d80000, 1048576)             = 0
>> [...]
>> mmap(0x74000000, 1572864, PROT_READ|PROT_WRITE,
>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d00000
>> munmap(0x73d00000, 1572864)             = 0
>> [...]
>> mmap(0x74000000, 1572864, PROT_READ|PROT_WRITE,
>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d00000
>> munmap(0x73d00000, 1572864)             = 0
>> [...]
>>
>> As one can see from the above strace log, mmap() allocates further
>> pages below the initial one because no space is available on top of it.
>>
>> This patch fixes it by requesting bottomup mapping as the non
>> generic hugetlb_get_unmapped_area() does
>>
>> Fixes: d0f13e3c20b6f ("[POWERPC] Introduce address space "slices" ")
>> Signed-off-by: Christophe Leroy <[email protected]>
>> ---
>>   v3: Was a standalone patch before, but conflicts with this serie.
>>
>>   arch/powerpc/mm/hugetlbpage.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/mm/hugetlbpage.c
>> b/arch/powerpc/mm/hugetlbpage.c
>> index 79e1378ee303..368ea6b248ad 100644
>> --- a/arch/powerpc/mm/hugetlbpage.c
>> +++ b/arch/powerpc/mm/hugetlbpage.c
>> @@ -558,7 +558,7 @@ unsigned long hugetlb_get_unmapped_area(struct
>> file *file, unsigned long addr,
>>           return radix__hugetlb_get_unmapped_area(file, addr, len,
>>                                  pgoff, flags);
>>   #endif
>> -    return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
>> +    return slice_get_unmapped_area(addr, len, flags, mmu_psize, 0);
>>   }
>>   #endif
>
> Why make this change also for PPC64? Can you do this #ifdef 8xx?.You can
> ideally move hugetlb_get_unmapped_area to slice.h and then make this
> much simpler for 8xxx?
>

Did you try with HUGETLB_MORECORE_HEAPBASE=0x11000000 on PPC64 as I
suggested in my last email on this subject (22/01/2018 9:22) ?

Before doing anything specific to the PPC32/8xx, I'd like to be sure the
issue is definitly only on PPC32.

Thanks,
Christophe


2018-01-24 09:36:22

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] powerpc/mm: Fix growth direction for hugepages mmaps with slice



On 01/24/2018 02:57 PM, Christophe LEROY wrote:
>
>
> Le 24/01/2018 à 10:15, Aneesh Kumar K.V a écrit :
>>
>>
>> On 01/24/2018 02:32 PM, Christophe Leroy wrote:
>>> An application running with libhugetlbfs fails to allocate
>>> additional pages to HEAP due to the hugemap being done
>>> inconditionally as topdown mapping:
>>>
>>> mmap(0x10080000, 1572864, PROT_READ|PROT_WRITE,
>>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x73e80000
>>> [...]
>>> mmap(0x74000000, 1048576, PROT_READ|PROT_WRITE,
>>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d80000
>>> munmap(0x73d80000, 1048576)             = 0
>>> [...]
>>> mmap(0x74000000, 1572864, PROT_READ|PROT_WRITE,
>>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d00000
>>> munmap(0x73d00000, 1572864)             = 0
>>> [...]
>>> mmap(0x74000000, 1572864, PROT_READ|PROT_WRITE,
>>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d00000
>>> munmap(0x73d00000, 1572864)             = 0
>>> [...]
>>>
>>> As one can see from the above strace log, mmap() allocates further
>>> pages below the initial one because no space is available on top of it.
>>>
>>> This patch fixes it by requesting bottomup mapping as the non
>>> generic hugetlb_get_unmapped_area() does
>>>
>>> Fixes: d0f13e3c20b6f ("[POWERPC] Introduce address space "slices" ")
>>> Signed-off-by: Christophe Leroy <[email protected]>
>>> ---
>>>   v3: Was a standalone patch before, but conflicts with this serie.
>>>
>>>   arch/powerpc/mm/hugetlbpage.c | 2 +-
>>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/arch/powerpc/mm/hugetlbpage.c
>>> b/arch/powerpc/mm/hugetlbpage.c
>>> index 79e1378ee303..368ea6b248ad 100644
>>> --- a/arch/powerpc/mm/hugetlbpage.c
>>> +++ b/arch/powerpc/mm/hugetlbpage.c
>>> @@ -558,7 +558,7 @@ unsigned long hugetlb_get_unmapped_area(struct
>>> file *file, unsigned long addr,
>>>           return radix__hugetlb_get_unmapped_area(file, addr, len,
>>>                                  pgoff, flags);
>>>   #endif
>>> -    return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
>>> +    return slice_get_unmapped_area(addr, len, flags, mmu_psize, 0);
>>>   }
>>>   #endif
>>
>> Why make this change also for PPC64? Can you do this #ifdef 8xx?.You
>> can ideally move hugetlb_get_unmapped_area to slice.h and then make
>> this much simpler for 8xxx?
>>
>
> Did you try with HUGETLB_MORECORE_HEAPBASE=0x11000000 on PPC64 as I
> suggested in my last email on this subject (22/01/2018 9:22) ?


yes. The test ran fine for me

kvaneesh@ltctulc6a-p1:[~]$ HUGETLB_MORECORE=yes
HUGETLB_MORECORE_HEAPBASE=0x30000000 ./a.out
10000000-10010000 r-xp 00000000 fc:00 9044312
/home/kvaneesh/a.out
10010000-10020000 r--p 00000000 fc:00 9044312
/home/kvaneesh/a.out
10020000-10030000 rw-p 00010000 fc:00 9044312
/home/kvaneesh/a.out
30000000-33000000 rw-p 00000000 00:0d 1062697
/anon_hugepage (deleted)
33000000-35000000 rw-p 03000000 00:0d 1062698
/anon_hugepage (deleted)
35000000-37000000 rw-p 05000000 00:0d 1062699
/anon_hugepage (deleted)
7ffff7d60000-7ffff7f10000 r-xp 00000000 fc:00 9250090
/lib/powerpc64le-linux-gnu/libc-2.23.so
7ffff7f10000-7ffff7f20000 r--p 001a0000 fc:00 9250090
/lib/powerpc64le-linux-gnu/libc-2.23.so
7ffff7f20000-7ffff7f30000 rw-p 001b0000 fc:00 9250090
/lib/powerpc64le-linux-gnu/libc-2.23.so
7ffff7f40000-7ffff7f60000 r-xp 00000000 fc:00 10754812
/usr/lib/libhugetlbfs.so.0
7ffff7f60000-7ffff7f70000 r--p 00010000 fc:00 10754812
/usr/lib/libhugetlbfs.so.0
7ffff7f70000-7ffff7f80000 rw-p 00020000 fc:00 10754812
/usr/lib/libhugetlbfs.so.0
7ffff7f80000-7ffff7fa0000 r-xp 00000000 00:00 0
[vdso]
7ffff7fa0000-7ffff7fe0000 r-xp 00000000 fc:00 9250107
/lib/powerpc64le-linux-gnu/ld-2.23.so
7ffff7fe0000-7ffff7ff0000 r--p 00030000 fc:00 9250107
/lib/powerpc64le-linux-gnu/ld-2.23.so
7ffff7ff0000-7ffff8000000 rw-p 00040000 fc:00 9250107
/lib/powerpc64le-linux-gnu/ld-2.23.so
7ffffffd0000-800000000000 rw-p 00000000 00:00 0
[stack]


>
> Before doing anything specific to the PPC32/8xx, I'd like to be sure the
> issue is definitly only on PPC32.
>

I am not sure I understand the problem correctly. If there is a free
space in the required range, both topdown/bottomup search should be able
to find it. Unless topdown found another free area suitable for hugetlb
allocation above. My take is we should not change the topdown to
bottomup without really understanding the failure scenarios.

-aneesh


2018-01-24 09:39:54

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] powerpc/mm: Fix growth direction for hugepages mmaps with slice



Le 24/01/2018 à 10:35, Aneesh Kumar K.V a écrit :
>
>
> On 01/24/2018 02:57 PM, Christophe LEROY wrote:
>>
>>
>> Le 24/01/2018 à 10:15, Aneesh Kumar K.V a écrit :
>>>
>>>
>>> On 01/24/2018 02:32 PM, Christophe Leroy wrote:
>>>> An application running with libhugetlbfs fails to allocate
>>>> additional pages to HEAP due to the hugemap being done
>>>> inconditionally as topdown mapping:
>>>>
>>>> mmap(0x10080000, 1572864, PROT_READ|PROT_WRITE,
>>>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x73e80000
>>>> [...]
>>>> mmap(0x74000000, 1048576, PROT_READ|PROT_WRITE,
>>>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d80000
>>>> munmap(0x73d80000, 1048576)             = 0
>>>> [...]
>>>> mmap(0x74000000, 1572864, PROT_READ|PROT_WRITE,
>>>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d00000
>>>> munmap(0x73d00000, 1572864)             = 0
>>>> [...]
>>>> mmap(0x74000000, 1572864, PROT_READ|PROT_WRITE,
>>>> MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0x180000) = 0x73d00000
>>>> munmap(0x73d00000, 1572864)             = 0
>>>> [...]
>>>>
>>>> As one can see from the above strace log, mmap() allocates further
>>>> pages below the initial one because no space is available on top of it.
>>>>
>>>> This patch fixes it by requesting bottomup mapping as the non
>>>> generic hugetlb_get_unmapped_area() does
>>>>
>>>> Fixes: d0f13e3c20b6f ("[POWERPC] Introduce address space "slices" ")
>>>> Signed-off-by: Christophe Leroy <[email protected]>
>>>> ---
>>>>   v3: Was a standalone patch before, but conflicts with this serie.
>>>>
>>>>   arch/powerpc/mm/hugetlbpage.c | 2 +-
>>>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/powerpc/mm/hugetlbpage.c
>>>> b/arch/powerpc/mm/hugetlbpage.c
>>>> index 79e1378ee303..368ea6b248ad 100644
>>>> --- a/arch/powerpc/mm/hugetlbpage.c
>>>> +++ b/arch/powerpc/mm/hugetlbpage.c
>>>> @@ -558,7 +558,7 @@ unsigned long hugetlb_get_unmapped_area(struct
>>>> file *file, unsigned long addr,
>>>>           return radix__hugetlb_get_unmapped_area(file, addr, len,
>>>>                                  pgoff, flags);
>>>>   #endif
>>>> -    return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
>>>> +    return slice_get_unmapped_area(addr, len, flags, mmu_psize, 0);
>>>>   }
>>>>   #endif
>>>
>>> Why make this change also for PPC64? Can you do this #ifdef 8xx?.You
>>> can ideally move hugetlb_get_unmapped_area to slice.h and then make
>>> this much simpler for 8xxx?
>>>
>>
>> Did you try with HUGETLB_MORECORE_HEAPBASE=0x11000000 on PPC64 as I
>> suggested in my last email on this subject (22/01/2018 9:22) ?
>
>
> yes. The test ran fine for me

You tried with 0x30000000, it works as well on PPC32.

I'd really like you to try with 0x11000000 which is in the same slice as
the 10020000-10030000 range.

Christophe

>
> kvaneesh@ltctulc6a-p1:[~]$  HUGETLB_MORECORE=yes
> HUGETLB_MORECORE_HEAPBASE=0x30000000 ./a.out
> 10000000-10010000 r-xp 00000000 fc:00 9044312 /home/kvaneesh/a.out
> 10010000-10020000 r--p 00000000 fc:00 9044312 /home/kvaneesh/a.out
> 10020000-10030000 rw-p 00010000 fc:00 9044312 /home/kvaneesh/a.out
> 30000000-33000000 rw-p 00000000 00:0d 1062697 /anon_hugepage (deleted)
> 33000000-35000000 rw-p 03000000 00:0d 1062698 /anon_hugepage (deleted)
> 35000000-37000000 rw-p 05000000 00:0d 1062699 /anon_hugepage (deleted)
> 7ffff7d60000-7ffff7f10000 r-xp 00000000 fc:00 9250090
> /lib/powerpc64le-linux-gnu/libc-2.23.so
> 7ffff7f10000-7ffff7f20000 r--p 001a0000 fc:00 9250090
> /lib/powerpc64le-linux-gnu/libc-2.23.so
> 7ffff7f20000-7ffff7f30000 rw-p 001b0000 fc:00 9250090
> /lib/powerpc64le-linux-gnu/libc-2.23.so
> 7ffff7f40000-7ffff7f60000 r-xp 00000000 fc:00 10754812
> /usr/lib/libhugetlbfs.so.0
> 7ffff7f60000-7ffff7f70000 r--p 00010000 fc:00 10754812
> /usr/lib/libhugetlbfs.so.0
> 7ffff7f70000-7ffff7f80000 rw-p 00020000 fc:00 10754812
> /usr/lib/libhugetlbfs.so.0
> 7ffff7f80000-7ffff7fa0000 r-xp 00000000 00:00 0 [vdso]
> 7ffff7fa0000-7ffff7fe0000 r-xp 00000000 fc:00 9250107
> /lib/powerpc64le-linux-gnu/ld-2.23.so
> 7ffff7fe0000-7ffff7ff0000 r--p 00030000 fc:00 9250107
> /lib/powerpc64le-linux-gnu/ld-2.23.so
> 7ffff7ff0000-7ffff8000000 rw-p 00040000 fc:00 9250107
> /lib/powerpc64le-linux-gnu/ld-2.23.so
> 7ffffffd0000-800000000000 rw-p 00000000 00:00 0 [stack]
>
>
>>
>> Before doing anything specific to the PPC32/8xx, I'd like to be sure
>> the issue is definitly only on PPC32.
>>
>
> I am not sure I understand the problem correctly. If there is a free
> space in the required range, both topdown/bottomup search should be able
> to find it. Unless topdown found another free area suitable for hugetlb
> allocation above. My take is we should not change the topdown to
> bottomup without really understanding the failure scenarios.
>
> -aneesh

2018-01-24 09:52:50

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] powerpc/mm: Fix growth direction for hugepages mmaps with slice



On 01/24/2018 03:09 PM, Christophe LEROY wrote:
>
>
> Le 24/01/2018 à 10:35, Aneesh Kumar K.V a écrit :
>>

>>> Did you try with HUGETLB_MORECORE_HEAPBASE=0x11000000 on PPC64 as I
>>> suggested in my last email on this subject (22/01/2018 9:22) ?
>>
>>
>> yes. The test ran fine for me
>
> You tried with 0x30000000, it works as well on PPC32.
>
> I'd really like you to try with 0x11000000 which is in the same slice as
> the 10020000-10030000 range.
>
>

Now that explains is better. But then the requested HEAPBASE was not
free and hence topdown search got an address in the below range.

7efffd000000-7f0000000000 rw-p 00000000 00:0d 1082770
/anon_hugepage (deleted)


The new range allocated is such that there is no scope for expansion of
heap if we do a topdown search. But why should that require us to change
from topdown/bottomup search?


10000000-10010000 r-xp 00000000 fc:00 9044312
/home/kvaneesh/a.out
10010000-10020000 r--p 00000000 fc:00 9044312
/home/kvaneesh/a.out
10020000-10030000 rw-p 00010000 fc:00 9044312
/home/kvaneesh/a.out
7efffd000000-7f0000000000 rw-p 00000000 00:0d 1082770
/anon_hugepage (deleted)
7ffff2d40000-7ffff7d60000 rw-p 00000000 00:00 0
7ffff7d60000-7ffff7f10000 r-xp 00000000 fc:00 9250090
/lib/powerpc64le-linux-gnu/libc-2.23.so
7ffff7f10000-7ffff7f20000 r--p 001a0000 fc:00 9250090
/lib/powerpc64le-linux-gnu/libc-2.23.so
7ffff7f20000-7ffff7f30000 rw-p 001b0000 fc:00 9250090
/lib/powerpc64le-linux-gnu/libc-2.23.so
7ffff7f40000-7ffff7f60000 r-xp 00000000 fc:00 10754812
/usr/lib/libhugetlbfs.so.0
7ffff7f60000-7ffff7f70000 r--p 00010000 fc:00 10754812
/usr/lib/libhugetlbfs.so.0
7ffff7f70000-7ffff7f80000 rw-p 00020000 fc:00 10754812
/usr/lib/libhugetlbfs.so.0
7ffff7f80000-7ffff7fa0000 r-xp 00000000 00:00 0
[vdso]
7ffff7fa0000-7ffff7fe0000 r-xp 00000000 fc:00 9250107
/lib/powerpc64le-linux-gnu/ld-2.23.so
7ffff7fe0000-7ffff7ff0000 r--p 00030000 fc:00 9250107
/lib/powerpc64le-linux-gnu/ld-2.23.so
7ffff7ff0000-7ffff8000000 rw-p 00040000 fc:00 9250107
/lib/powerpc64le-linux-gnu/ld-2.23.so
7ffffffd0000-800000000000 rw-p 00000000 00:00 0
[stack]


For the specific test, one should pass the HEAPBASE value such that it
can be expanded if required isn't it ?

-aneesh


2018-01-24 10:04:25

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] powerpc/mm: Fix growth direction for hugepages mmaps with slice



Le 24/01/2018 à 10:51, Aneesh Kumar K.V a écrit :
>
>
> On 01/24/2018 03:09 PM, Christophe LEROY wrote:
>>
>>
>> Le 24/01/2018 à 10:35, Aneesh Kumar K.V a écrit :
>>>
>
>>>> Did you try with HUGETLB_MORECORE_HEAPBASE=0x11000000 on PPC64 as I
>>>> suggested in my last email on this subject (22/01/2018 9:22) ?
>>>
>>>
>>> yes. The test ran fine for me
>>
>> You tried with 0x30000000, it works as well on PPC32.
>>
>> I'd really like you to try with 0x11000000 which is in the same slice
>> as the 10020000-10030000 range.
>>
>>
>
> Now that explains is better. But then the requested HEAPBASE was not
> free and hence topdown search got an address in the below range.
>
> 7efffd000000-7f0000000000 rw-p 00000000 00:0d 1082770 /anon_hugepage
> (deleted)
>
>
> The new range allocated is such that there is no scope for expansion of
> heap if we do a topdown search. But why should that require us to change
> from topdown/bottomup search?
>
>
> 10000000-10010000 r-xp 00000000 fc:00 9044312 /home/kvaneesh/a.out
> 10010000-10020000 r--p 00000000 fc:00 9044312 /home/kvaneesh/a.out
> 10020000-10030000 rw-p 00010000 fc:00 9044312 /home/kvaneesh/a.out
> 7efffd000000-7f0000000000 rw-p 00000000 00:0d 1082770 /anon_hugepage
> (deleted)
> 7ffff2d40000-7ffff7d60000 rw-p 00000000 00:00 0
> 7ffff7d60000-7ffff7f10000 r-xp 00000000 fc:00 9250090
> /lib/powerpc64le-linux-gnu/libc-2.23.so
> 7ffff7f10000-7ffff7f20000 r--p 001a0000 fc:00 9250090
> /lib/powerpc64le-linux-gnu/libc-2.23.so
> 7ffff7f20000-7ffff7f30000 rw-p 001b0000 fc:00 9250090
> /lib/powerpc64le-linux-gnu/libc-2.23.so
> 7ffff7f40000-7ffff7f60000 r-xp 00000000 fc:00 10754812
> /usr/lib/libhugetlbfs.so.0
> 7ffff7f60000-7ffff7f70000 r--p 00010000 fc:00 10754812
> /usr/lib/libhugetlbfs.so.0
> 7ffff7f70000-7ffff7f80000 rw-p 00020000 fc:00 10754812
> /usr/lib/libhugetlbfs.so.0
> 7ffff7f80000-7ffff7fa0000 r-xp 00000000 00:00 0 [vdso]
> 7ffff7fa0000-7ffff7fe0000 r-xp 00000000 fc:00 9250107
> /lib/powerpc64le-linux-gnu/ld-2.23.so
> 7ffff7fe0000-7ffff7ff0000 r--p 00030000 fc:00 9250107
> /lib/powerpc64le-linux-gnu/ld-2.23.so
> 7ffff7ff0000-7ffff8000000 rw-p 00040000 fc:00 9250107
> /lib/powerpc64le-linux-gnu/ld-2.23.so
> 7ffffffd0000-800000000000 rw-p 00000000 00:00 0 [stack]
>
>
> For the specific test, one should pass the HEAPBASE value such that it
> can be expanded if required isn't it ?

For the test, yes, it is dumb to pass an unusable HEAPBASE, but what
happens in real life:
* PPC32: No HEAPBASE, hugetlbfs defines a HEAPBASE at sbrk(0) +
PAGE_SIZE = 0x10800000 ==> This is in the same slice as already
allocated ==> the kernel does as if mmap() had been called with no hint
address and allocates something unusable instead.
* PPC64: No HEAPBASE, hugetlbfs seems to define a HEAPBASE at
100000000000, which doesn't conflict with an already allocated mapping
==> it works.

Now, when we take the generic case, ie when slice is not activated, when
you call mmap() without a hint address, it allocates a suitable address
because it does bottom-up. Why do differently with slices ?

Christophe

2018-01-24 10:09:51

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] powerpc/mm: Fix growth direction for hugepages mmaps with slice



On 01/24/2018 03:33 PM, Christophe LEROY wrote:
>
>
> Le 24/01/2018 à 10:51, Aneesh Kumar K.V a écrit :
>>
>>
>> On 01/24/2018 03:09 PM, Christophe LEROY wrote:
>>>
>>>
>>> Le 24/01/2018 à 10:35, Aneesh Kumar K.V a écrit :
>>>>
>>
>>>>> Did you try with HUGETLB_MORECORE_HEAPBASE=0x11000000 on PPC64 as I
>>>>> suggested in my last email on this subject (22/01/2018 9:22) ?
>>>>
>>>>
>>>> yes. The test ran fine for me
>>>
>>> You tried with 0x30000000, it works as well on PPC32.
>>>
>>> I'd really like you to try with 0x11000000 which is in the same slice
>>> as the 10020000-10030000 range.
>>>
>>>
>>
>> Now that explains is better. But then the requested HEAPBASE was not
>> free and hence topdown search got an address in the below range.
>>
>> 7efffd000000-7f0000000000 rw-p 00000000 00:0d 1082770 /anon_hugepage
>> (deleted)
>>
>>
>> The new range allocated is such that there is no scope for expansion
>> of heap if we do a topdown search. But why should that require us to
>> change from topdown/bottomup search?
>>
>>
>> 10000000-10010000 r-xp 00000000 fc:00 9044312 /home/kvaneesh/a.out
>> 10010000-10020000 r--p 00000000 fc:00 9044312 /home/kvaneesh/a.out
>> 10020000-10030000 rw-p 00010000 fc:00 9044312 /home/kvaneesh/a.out
>> 7efffd000000-7f0000000000 rw-p 00000000 00:0d 1082770 /anon_hugepage
>> (deleted)
>> 7ffff2d40000-7ffff7d60000 rw-p 00000000 00:00 0
>> 7ffff7d60000-7ffff7f10000 r-xp 00000000 fc:00 9250090
>> /lib/powerpc64le-linux-gnu/libc-2.23.so
>> 7ffff7f10000-7ffff7f20000 r--p 001a0000 fc:00 9250090
>> /lib/powerpc64le-linux-gnu/libc-2.23.so
>> 7ffff7f20000-7ffff7f30000 rw-p 001b0000 fc:00 9250090
>> /lib/powerpc64le-linux-gnu/libc-2.23.so
>> 7ffff7f40000-7ffff7f60000 r-xp 00000000 fc:00 10754812
>> /usr/lib/libhugetlbfs.so.0
>> 7ffff7f60000-7ffff7f70000 r--p 00010000 fc:00 10754812
>> /usr/lib/libhugetlbfs.so.0
>> 7ffff7f70000-7ffff7f80000 rw-p 00020000 fc:00 10754812
>> /usr/lib/libhugetlbfs.so.0
>> 7ffff7f80000-7ffff7fa0000 r-xp 00000000 00:00 0 [vdso]
>> 7ffff7fa0000-7ffff7fe0000 r-xp 00000000 fc:00 9250107
>> /lib/powerpc64le-linux-gnu/ld-2.23.so
>> 7ffff7fe0000-7ffff7ff0000 r--p 00030000 fc:00 9250107
>> /lib/powerpc64le-linux-gnu/ld-2.23.so
>> 7ffff7ff0000-7ffff8000000 rw-p 00040000 fc:00 9250107
>> /lib/powerpc64le-linux-gnu/ld-2.23.so
>> 7ffffffd0000-800000000000 rw-p 00000000 00:00 0 [stack]
>>
>>
>> For the specific test, one should pass the HEAPBASE value such that it
>> can be expanded if required isn't it ?
>
> For the test, yes, it is dumb to pass an unusable HEAPBASE, but what
> happens in real life:
> * PPC32: No HEAPBASE, hugetlbfs defines a HEAPBASE at sbrk(0) +
> PAGE_SIZE = 0x10800000 ==> This is in the same slice as already
> allocated ==> the kernel does as if mmap() had been called with no hint
> address and allocates something unusable instead.
> * PPC64: No HEAPBASE, hugetlbfs seems to define a HEAPBASE at
> 100000000000, which doesn't conflict with an already allocated mapping
> ==> it works.
>
> Now, when we take the generic case, ie when slice is not activated, when
> you call mmap() without a hint address, it allocates a suitable address
> because it does bottom-up. Why do differently with slices ?
>

IIUC that is largely arch dependent, PPC64 always did topdown search.
Even for regular non hugetlb mmap it did topdown search. If you set
legacy mmap we selected bottom up approach. You can check
arch_pick_mmap_layout() for more details. Now x86 is slightly different.
For the default search if we can't find a mapping address it will try a
bottomup search. Having said that if you think libhugetlbfs made
assumptions with respect to 8xx and you don't want to break it make
8xx unmapped area search bottomup.

-aneesh


2018-01-24 10:21:11

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] powerpc/mm: Fix growth direction for hugepages mmaps with slice



Le 24/01/2018 à 11:08, Aneesh Kumar K.V a écrit :
>
>
> On 01/24/2018 03:33 PM, Christophe LEROY wrote:
>>
>>
>> Le 24/01/2018 à 10:51, Aneesh Kumar K.V a écrit :
>>>
>>>
>>> On 01/24/2018 03:09 PM, Christophe LEROY wrote:
>>>>
>>>>
>>>> Le 24/01/2018 à 10:35, Aneesh Kumar K.V a écrit :
>>>>>
>>>
>>>>>> Did you try with HUGETLB_MORECORE_HEAPBASE=0x11000000 on PPC64 as
>>>>>> I suggested in my last email on this subject (22/01/2018 9:22) ?
>>>>>
>>>>>
>>>>> yes. The test ran fine for me
>>>>
>>>> You tried with 0x30000000, it works as well on PPC32.
>>>>
>>>> I'd really like you to try with 0x11000000 which is in the same
>>>> slice as the 10020000-10030000 range.
>>>>
>>>>
>>>
>>> Now that explains is better. But then the requested HEAPBASE was not
>>> free and hence topdown search got an address in the below range.
>>>
>>> 7efffd000000-7f0000000000 rw-p 00000000 00:0d 1082770 /anon_hugepage
>>> (deleted)
>>>
>>>
>>> The new range allocated is such that there is no scope for expansion
>>> of heap if we do a topdown search. But why should that require us to
>>> change from topdown/bottomup search?
>>>
>>>
>>> 10000000-10010000 r-xp 00000000 fc:00 9044312 /home/kvaneesh/a.out
>>> 10010000-10020000 r--p 00000000 fc:00 9044312 /home/kvaneesh/a.out
>>> 10020000-10030000 rw-p 00010000 fc:00 9044312 /home/kvaneesh/a.out
>>> 7efffd000000-7f0000000000 rw-p 00000000 00:0d 1082770 /anon_hugepage
>>> (deleted)
>>> 7ffff2d40000-7ffff7d60000 rw-p 00000000 00:00 0
>>> 7ffff7d60000-7ffff7f10000 r-xp 00000000 fc:00 9250090
>>> /lib/powerpc64le-linux-gnu/libc-2.23.so
>>> 7ffff7f10000-7ffff7f20000 r--p 001a0000 fc:00 9250090
>>> /lib/powerpc64le-linux-gnu/libc-2.23.so
>>> 7ffff7f20000-7ffff7f30000 rw-p 001b0000 fc:00 9250090
>>> /lib/powerpc64le-linux-gnu/libc-2.23.so
>>> 7ffff7f40000-7ffff7f60000 r-xp 00000000 fc:00 10754812
>>> /usr/lib/libhugetlbfs.so.0
>>> 7ffff7f60000-7ffff7f70000 r--p 00010000 fc:00 10754812
>>> /usr/lib/libhugetlbfs.so.0
>>> 7ffff7f70000-7ffff7f80000 rw-p 00020000 fc:00 10754812
>>> /usr/lib/libhugetlbfs.so.0
>>> 7ffff7f80000-7ffff7fa0000 r-xp 00000000 00:00 0 [vdso]
>>> 7ffff7fa0000-7ffff7fe0000 r-xp 00000000 fc:00 9250107
>>> /lib/powerpc64le-linux-gnu/ld-2.23.so
>>> 7ffff7fe0000-7ffff7ff0000 r--p 00030000 fc:00 9250107
>>> /lib/powerpc64le-linux-gnu/ld-2.23.so
>>> 7ffff7ff0000-7ffff8000000 rw-p 00040000 fc:00 9250107
>>> /lib/powerpc64le-linux-gnu/ld-2.23.so
>>> 7ffffffd0000-800000000000 rw-p 00000000 00:00 0 [stack]
>>>
>>>
>>> For the specific test, one should pass the HEAPBASE value such that
>>> it can be expanded if required isn't it ?
>>
>> For the test, yes, it is dumb to pass an unusable HEAPBASE, but what
>> happens in real life:
>> * PPC32: No HEAPBASE, hugetlbfs defines a HEAPBASE at sbrk(0) +
>> PAGE_SIZE = 0x10800000 ==> This is in the same slice as already
>> allocated ==> the kernel does as if mmap() had been called with no
>> hint address and allocates something unusable instead.
>> * PPC64: No HEAPBASE, hugetlbfs seems to define a HEAPBASE at
>> 100000000000, which doesn't conflict with an already allocated mapping
>> ==> it works.
>>
>> Now, when we take the generic case, ie when slice is not activated,
>> when you call mmap() without a hint address, it allocates a suitable
>> address because it does bottom-up. Why do differently with slices ?
>>
>
> IIUC that is largely arch dependent, PPC64 always did topdown search.
> Even for regular non hugetlb mmap it did topdown search. If you set
> legacy mmap we selected bottom up approach. You can check
> arch_pick_mmap_layout() for more details. Now x86 is slightly different.
> For the default search if we can't find a mapping address it will try a
> bottomup search. Having said that if you think libhugetlbfs made
> assumptions with respect to 8xx and you don't want to break it make
> 8xx unmapped area search bottomup.
>

Or would there be a way to make libhugetlbfs aware of the slices
constraints and make it choose a suitable hint address at first try ?

Christophe

2018-01-29 06:18:45

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v3 1/5] powerpc/mm: Remove intermediate bitmap copy in 'slices'

Christophe Leroy <[email protected]> writes:

> bitmap_or() and bitmap_andnot() can work properly with dst identical
> to src1 or src2. There is no need of an intermediate result bitmap
> that is copied back to dst in a second step.
>
Reviewed-by: Aneesh Kumar K.V <[email protected]>
> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> v2: New in v2
> v3: patch moved up front of the serie to avoid ephemeral slice_bitmap_copy() function in following patch
>
> arch/powerpc/mm/slice.c | 12 ++++--------
> 1 file changed, 4 insertions(+), 8 deletions(-)
>
> diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
> index 23ec2c5e3b78..98b53d48968f 100644
> --- a/arch/powerpc/mm/slice.c
> +++ b/arch/powerpc/mm/slice.c
> @@ -388,21 +388,17 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
>
> static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
> {
> - DECLARE_BITMAP(result, SLICE_NUM_HIGH);
> -
> dst->low_slices |= src->low_slices;
> - bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
> - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
> + bitmap_or(dst->high_slices, dst->high_slices, src->high_slices,
> + SLICE_NUM_HIGH);
> }
>
> static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
> {
> - DECLARE_BITMAP(result, SLICE_NUM_HIGH);
> -
> dst->low_slices &= ~src->low_slices;
>
> - bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
> - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
> + bitmap_andnot(dst->high_slices, dst->high_slices, src->high_slices,
> + SLICE_NUM_HIGH);
> }
>
> #ifdef CONFIG_PPC_64K_PAGES
> --
> 2.13.3


2018-01-29 06:30:28

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH v3 4/5] powerpc/mm: Allow up to 64 low slices

Christophe Leroy <[email protected]> writes:

> While the implementation of the "slices" address space allows
> a significant amount of high slices, it limits the number of
> low slices to 16 due to the use of a single u64 low_slices_psize
> element in struct mm_context_t
>
> On the 8xx, the minimum slice size is the size of the area
> covered by a single PMD entry, ie 4M in 4K pages mode and 64M in
> 16K pages mode. This means we could have at least 64 slices.
>
> In order to override this limitation, this patch switches the
> handling of low_slices_psize to char array as done already for
> high_slices_psize. This allows to increase the number of low
> slices to 64 on the 8xx.
>

Maybe update the subject to "make low slice also a bitmap".Also indicate
that the bitmap functions optimize the operation if the bitmapsize les
<= long ?

Also switch the 8xx to higher value in the another patch?

> Signed-off-by: Christophe Leroy <[email protected]>
> ---
> v2: Usign slice_bitmap_xxx() macros instead of bitmap_xxx() functions.
> v3: keep low_slices as a u64, this allows 64 slices which is enough.
>
> arch/powerpc/include/asm/book3s/64/mmu.h | 3 +-
> arch/powerpc/include/asm/mmu-8xx.h | 7 +++-
> arch/powerpc/include/asm/paca.h | 2 +-
> arch/powerpc/include/asm/slice.h | 1 -
> arch/powerpc/include/asm/slice_32.h | 2 ++
> arch/powerpc/include/asm/slice_64.h | 2 ++
> arch/powerpc/kernel/paca.c | 3 +-
> arch/powerpc/mm/hash_utils_64.c | 13 ++++----
> arch/powerpc/mm/slb_low.S | 8 +++--
> arch/powerpc/mm/slice.c | 57 +++++++++++++++++---------------
> 10 files changed, 56 insertions(+), 42 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
> index c9448e19847a..b076a2d74c69 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> @@ -91,7 +91,8 @@ typedef struct {
> struct npu_context *npu_context;
>
> #ifdef CONFIG_PPC_MM_SLICES
> - u64 low_slices_psize; /* SLB page size encodings */
> + /* SLB page size encodings*/
> + unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
> unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
> unsigned long slb_addr_limit;
> #else
> diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
> index 5f89b6010453..5f37ba06b56c 100644
> --- a/arch/powerpc/include/asm/mmu-8xx.h
> +++ b/arch/powerpc/include/asm/mmu-8xx.h
> @@ -164,6 +164,11 @@
> */
> #define SPRN_M_TW 799
>
> +#ifdef CONFIG_PPC_MM_SLICES
> +#include <asm/slice_32.h>
> +#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
> +#endif
> +
> #ifndef __ASSEMBLY__
> typedef struct {
> unsigned int id;
> @@ -171,7 +176,7 @@ typedef struct {
> unsigned long vdso_base;
> #ifdef CONFIG_PPC_MM_SLICES
> u16 user_psize; /* page size index */
> - u64 low_slices_psize; /* page size encodings */
> + unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
> unsigned char high_slices_psize[0];
> unsigned long slb_addr_limit;
> #endif
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index 23ac7fc0af23..a3e531fe9ac7 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -141,7 +141,7 @@ struct paca_struct {
> #ifdef CONFIG_PPC_BOOK3S
> mm_context_id_t mm_ctx_id;
> #ifdef CONFIG_PPC_MM_SLICES
> - u64 mm_ctx_low_slices_psize;
> + unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
> unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
> unsigned long mm_ctx_slb_addr_limit;
> #else
> diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
> index 2b4b70de7e71..b67ba8faa507 100644
> --- a/arch/powerpc/include/asm/slice.h
> +++ b/arch/powerpc/include/asm/slice.h
> @@ -16,7 +16,6 @@
> #define HAVE_ARCH_UNMAPPED_AREA
> #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
>
> -#define SLICE_LOW_SHIFT 28
> #define SLICE_LOW_TOP (0x100000000ull)
> #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
> #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
> diff --git a/arch/powerpc/include/asm/slice_32.h b/arch/powerpc/include/asm/slice_32.h
> index 7e27c0dfb913..349187c20100 100644
> --- a/arch/powerpc/include/asm/slice_32.h
> +++ b/arch/powerpc/include/asm/slice_32.h
> @@ -2,6 +2,8 @@
> #ifndef _ASM_POWERPC_SLICE_32_H
> #define _ASM_POWERPC_SLICE_32_H
>
> +#define SLICE_LOW_SHIFT 26 /* 64 slices */
> +
> #define SLICE_HIGH_SHIFT 0
> #define SLICE_NUM_HIGH 0ul
> #define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
> diff --git a/arch/powerpc/include/asm/slice_64.h b/arch/powerpc/include/asm/slice_64.h
> index 9d1c97b83010..0959475239c6 100644
> --- a/arch/powerpc/include/asm/slice_64.h
> +++ b/arch/powerpc/include/asm/slice_64.h
> @@ -2,6 +2,8 @@
> #ifndef _ASM_POWERPC_SLICE_64_H
> #define _ASM_POWERPC_SLICE_64_H
>
> +#define SLICE_LOW_SHIFT 28
> +

You are moving the LOW_SHIFT here, may be you can fix that up in earlier
patch as reviewed there.

> #define SLICE_HIGH_SHIFT 40
> #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
> #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
> diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
> index d6597038931d..8e1566bf82b8 100644
> --- a/arch/powerpc/kernel/paca.c
> +++ b/arch/powerpc/kernel/paca.c
> @@ -264,7 +264,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
> #ifdef CONFIG_PPC_MM_SLICES
> VM_BUG_ON(!mm->context.slb_addr_limit);
> get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
> - get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
> + memcpy(&get_paca()->mm_ctx_low_slices_psize,
> + &context->low_slices_psize, sizeof(context->low_slices_psize));
> memcpy(&get_paca()->mm_ctx_high_slices_psize,
> &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
> #else /* CONFIG_PPC_MM_SLICES */
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index 655a5a9a183d..da696565b969 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -1097,19 +1097,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
> #ifdef CONFIG_PPC_MM_SLICES
> static unsigned int get_paca_psize(unsigned long addr)
> {
> - u64 lpsizes;
> - unsigned char *hpsizes;
> + unsigned char *psizes;
> unsigned long index, mask_index;
>
> if (addr < SLICE_LOW_TOP) {
> - lpsizes = get_paca()->mm_ctx_low_slices_psize;
> + psizes = get_paca()->mm_ctx_low_slices_psize;
> index = GET_LOW_SLICE_INDEX(addr);
> - return (lpsizes >> (index * 4)) & 0xF;
> + } else {
> + psizes = get_paca()->mm_ctx_high_slices_psize;
> + index = GET_HIGH_SLICE_INDEX(addr);
> }
> - hpsizes = get_paca()->mm_ctx_high_slices_psize;
> - index = GET_HIGH_SLICE_INDEX(addr);
> mask_index = index & 0x1;
> - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
> + return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
> }
>
> #else
> diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
> index 2cf5ef3fc50d..2c7c717fd2ea 100644
> --- a/arch/powerpc/mm/slb_low.S
> +++ b/arch/powerpc/mm/slb_low.S
> @@ -200,10 +200,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
> 5:
> /*
> * Handle lpsizes
> - * r9 is get_paca()->context.low_slices_psize, r11 is index
> + * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
> */
> - ld r9,PACALOWSLICESPSIZE(r13)
> - mr r11,r10
> + srdi r11,r10,1 /* index */
> + addi r9,r11,PACALOWSLICESPSIZE
> + lbzx r9,r13,r9 /* r9 is lpsizes[r11] */
> + rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */
> 6:
> sldi r11,r11,2 /* index * 4 */
> /* Extract the psize and multiply to get an array offset */
> diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
> index 549704dfa777..3d573a038d42 100644
> --- a/arch/powerpc/mm/slice.c
> +++ b/arch/powerpc/mm/slice.c
> @@ -148,18 +148,20 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
> static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
> unsigned long high_limit)
> {
> - unsigned char *hpsizes;
> + unsigned char *hpsizes, *lpsizes;
> int index, mask_index;
> unsigned long i;
> - u64 lpsizes;
>
> ret->low_slices = 0;
> slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>
> lpsizes = mm->context.low_slices_psize;
> - for (i = 0; i < SLICE_NUM_LOW; i++)
> - if (((lpsizes >> (i * 4)) & 0xf) == psize)
> + for (i = 0; i < SLICE_NUM_LOW; i++) {
> + mask_index = i & 0x1;
> + index = i >> 1;
> + if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
> ret->low_slices |= 1u << i;
> + }
>
> if (high_limit <= SLICE_LOW_TOP)
> return;
> @@ -211,8 +213,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
> {
> int index, mask_index;
> /* Write the new slice psize bits */
> - unsigned char *hpsizes;
> - u64 lpsizes;
> + unsigned char *hpsizes, *lpsizes;
> unsigned long i, flags;
>
> slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
> @@ -225,12 +226,13 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
>
> lpsizes = mm->context.low_slices_psize;
> for (i = 0; i < SLICE_NUM_LOW; i++)
> - if (mask.low_slices & (1u << i))
> - lpsizes = (lpsizes & ~(0xful << (i * 4))) |
> - (((unsigned long)psize) << (i * 4));
> -
> - /* Assign the value back */
> - mm->context.low_slices_psize = lpsizes;
> + if (mask.low_slices & (1u << i)) {
> + mask_index = i & 0x1;
> + index = i >> 1;
> + lpsizes[index] = (lpsizes[index] &
> + ~(0xf << (mask_index * 4))) |
> + (((unsigned long)psize) << (mask_index * 4));
> + }
>
> hpsizes = mm->context.high_slices_psize;
> for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
> @@ -629,7 +631,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
>
> unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
> {
> - unsigned char *hpsizes;
> + unsigned char *psizes;
> int index, mask_index;
>
> /*
> @@ -643,15 +645,14 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
> #endif
> }
> if (addr < SLICE_LOW_TOP) {
> - u64 lpsizes;
> - lpsizes = mm->context.low_slices_psize;
> + psizes = mm->context.low_slices_psize;
> index = GET_LOW_SLICE_INDEX(addr);
> - return (lpsizes >> (index * 4)) & 0xf;
> + } else {
> + psizes = mm->context.high_slices_psize;
> + index = GET_HIGH_SLICE_INDEX(addr);
> }
> - hpsizes = mm->context.high_slices_psize;
> - index = GET_HIGH_SLICE_INDEX(addr);
> mask_index = index & 0x1;
> - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
> + return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
> }
> EXPORT_SYMBOL_GPL(get_slice_psize);
>
> @@ -672,8 +673,8 @@ EXPORT_SYMBOL_GPL(get_slice_psize);
> void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
> {
> int index, mask_index;
> - unsigned char *hpsizes;
> - unsigned long flags, lpsizes;
> + unsigned char *hpsizes, *lpsizes;
> + unsigned long flags;
> unsigned int old_psize;
> int i;
>
> @@ -691,12 +692,14 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
> wmb();
>
> lpsizes = mm->context.low_slices_psize;
> - for (i = 0; i < SLICE_NUM_LOW; i++)
> - if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
> - lpsizes = (lpsizes & ~(0xful << (i * 4))) |
> - (((unsigned long)psize) << (i * 4));
> - /* Assign the value back */
> - mm->context.low_slices_psize = lpsizes;
> + for (i = 0; i < SLICE_NUM_LOW; i++) {
> + mask_index = i & 0x1;
> + index = i >> 1;
> + if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
> + lpsizes[index] = (lpsizes[index] &
> + ~(0xf << (mask_index * 4))) |
> + (((unsigned long)psize) << (mask_index * 4));
> + }
>
> hpsizes = mm->context.high_slices_psize;
> for (i = 0; i < SLICE_NUM_HIGH; i++) {
> --
> 2.13.3


2018-01-29 08:57:46

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v3 4/5] powerpc/mm: Allow up to 64 low slices



Le 29/01/2018 à 07:29, Aneesh Kumar K.V a écrit :
> Christophe Leroy <[email protected]> writes:
>
>> While the implementation of the "slices" address space allows
>> a significant amount of high slices, it limits the number of
>> low slices to 16 due to the use of a single u64 low_slices_psize
>> element in struct mm_context_t
>>
>> On the 8xx, the minimum slice size is the size of the area
>> covered by a single PMD entry, ie 4M in 4K pages mode and 64M in
>> 16K pages mode. This means we could have at least 64 slices.
>>
>> In order to override this limitation, this patch switches the
>> handling of low_slices_psize to char array as done already for
>> high_slices_psize. This allows to increase the number of low
>> slices to 64 on the 8xx.
>>
>
> Maybe update the subject to "make low slice also a bitmap".Also indicate
> that the bitmap functions optimize the operation if the bitmapsize les
> <= long ?

v3 doesn't use bitmap functions anymore for low_slices. In this version,
only low_slices_psize has been reworked to allow up to 64 slices instead
of 16. I have kept low_slices as is (ie as a u64), hence allowing up to
64 slices, which is big enough.

>
> Also switch the 8xx to higher value in the another patch?

One separate patch just for changing the value of SLICE_LOW_SHIFT from
28 to 26 on the 8xx ?


>
>> Signed-off-by: Christophe Leroy <[email protected]>
>> ---
>> v2: Usign slice_bitmap_xxx() macros instead of bitmap_xxx() functions.
>> v3: keep low_slices as a u64, this allows 64 slices which is enough.
>>
>> arch/powerpc/include/asm/book3s/64/mmu.h | 3 +-
>> arch/powerpc/include/asm/mmu-8xx.h | 7 +++-
>> arch/powerpc/include/asm/paca.h | 2 +-
>> arch/powerpc/include/asm/slice.h | 1 -
>> arch/powerpc/include/asm/slice_32.h | 2 ++
>> arch/powerpc/include/asm/slice_64.h | 2 ++
>> arch/powerpc/kernel/paca.c | 3 +-
>> arch/powerpc/mm/hash_utils_64.c | 13 ++++----
>> arch/powerpc/mm/slb_low.S | 8 +++--
>> arch/powerpc/mm/slice.c | 57 +++++++++++++++++---------------
>> 10 files changed, 56 insertions(+), 42 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
>> index c9448e19847a..b076a2d74c69 100644
>> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
>> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
>> @@ -91,7 +91,8 @@ typedef struct {
>> struct npu_context *npu_context;
>>
>> #ifdef CONFIG_PPC_MM_SLICES
>> - u64 low_slices_psize; /* SLB page size encodings */
>> + /* SLB page size encodings*/
>> + unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
>> unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
>> unsigned long slb_addr_limit;
>> #else
>> diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
>> index 5f89b6010453..5f37ba06b56c 100644
>> --- a/arch/powerpc/include/asm/mmu-8xx.h
>> +++ b/arch/powerpc/include/asm/mmu-8xx.h
>> @@ -164,6 +164,11 @@
>> */
>> #define SPRN_M_TW 799
>>
>> +#ifdef CONFIG_PPC_MM_SLICES
>> +#include <asm/slice_32.h>
>> +#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
>> +#endif
>> +
>> #ifndef __ASSEMBLY__
>> typedef struct {
>> unsigned int id;
>> @@ -171,7 +176,7 @@ typedef struct {
>> unsigned long vdso_base;
>> #ifdef CONFIG_PPC_MM_SLICES
>> u16 user_psize; /* page size index */
>> - u64 low_slices_psize; /* page size encodings */
>> + unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
>> unsigned char high_slices_psize[0];
>> unsigned long slb_addr_limit;
>> #endif
>> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
>> index 23ac7fc0af23..a3e531fe9ac7 100644
>> --- a/arch/powerpc/include/asm/paca.h
>> +++ b/arch/powerpc/include/asm/paca.h
>> @@ -141,7 +141,7 @@ struct paca_struct {
>> #ifdef CONFIG_PPC_BOOK3S
>> mm_context_id_t mm_ctx_id;
>> #ifdef CONFIG_PPC_MM_SLICES
>> - u64 mm_ctx_low_slices_psize;
>> + unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
>> unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
>> unsigned long mm_ctx_slb_addr_limit;
>> #else
>> diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
>> index 2b4b70de7e71..b67ba8faa507 100644
>> --- a/arch/powerpc/include/asm/slice.h
>> +++ b/arch/powerpc/include/asm/slice.h
>> @@ -16,7 +16,6 @@
>> #define HAVE_ARCH_UNMAPPED_AREA
>> #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
>>
>> -#define SLICE_LOW_SHIFT 28
>> #define SLICE_LOW_TOP (0x100000000ull)
>> #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
>> #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
>> diff --git a/arch/powerpc/include/asm/slice_32.h b/arch/powerpc/include/asm/slice_32.h
>> index 7e27c0dfb913..349187c20100 100644
>> --- a/arch/powerpc/include/asm/slice_32.h
>> +++ b/arch/powerpc/include/asm/slice_32.h
>> @@ -2,6 +2,8 @@
>> #ifndef _ASM_POWERPC_SLICE_32_H
>> #define _ASM_POWERPC_SLICE_32_H
>>
>> +#define SLICE_LOW_SHIFT 26 /* 64 slices */
>> +
>> #define SLICE_HIGH_SHIFT 0
>> #define SLICE_NUM_HIGH 0ul
>> #define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
>> diff --git a/arch/powerpc/include/asm/slice_64.h b/arch/powerpc/include/asm/slice_64.h
>> index 9d1c97b83010..0959475239c6 100644
>> --- a/arch/powerpc/include/asm/slice_64.h
>> +++ b/arch/powerpc/include/asm/slice_64.h
>> @@ -2,6 +2,8 @@
>> #ifndef _ASM_POWERPC_SLICE_64_H
>> #define _ASM_POWERPC_SLICE_64_H
>>
>> +#define SLICE_LOW_SHIFT 28
>> +
>
> You are moving the LOW_SHIFT here, may be you can fix that up in earlier
> patch as reviewed there.

Ok

Christophe

>
>> #define SLICE_HIGH_SHIFT 40
>> #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
>> #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
>> diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
>> index d6597038931d..8e1566bf82b8 100644
>> --- a/arch/powerpc/kernel/paca.c
>> +++ b/arch/powerpc/kernel/paca.c
>> @@ -264,7 +264,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
>> #ifdef CONFIG_PPC_MM_SLICES
>> VM_BUG_ON(!mm->context.slb_addr_limit);
>> get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
>> - get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
>> + memcpy(&get_paca()->mm_ctx_low_slices_psize,
>> + &context->low_slices_psize, sizeof(context->low_slices_psize));
>> memcpy(&get_paca()->mm_ctx_high_slices_psize,
>> &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
>> #else /* CONFIG_PPC_MM_SLICES */
>> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
>> index 655a5a9a183d..da696565b969 100644
>> --- a/arch/powerpc/mm/hash_utils_64.c
>> +++ b/arch/powerpc/mm/hash_utils_64.c
>> @@ -1097,19 +1097,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
>> #ifdef CONFIG_PPC_MM_SLICES
>> static unsigned int get_paca_psize(unsigned long addr)
>> {
>> - u64 lpsizes;
>> - unsigned char *hpsizes;
>> + unsigned char *psizes;
>> unsigned long index, mask_index;
>>
>> if (addr < SLICE_LOW_TOP) {
>> - lpsizes = get_paca()->mm_ctx_low_slices_psize;
>> + psizes = get_paca()->mm_ctx_low_slices_psize;
>> index = GET_LOW_SLICE_INDEX(addr);
>> - return (lpsizes >> (index * 4)) & 0xF;
>> + } else {
>> + psizes = get_paca()->mm_ctx_high_slices_psize;
>> + index = GET_HIGH_SLICE_INDEX(addr);
>> }
>> - hpsizes = get_paca()->mm_ctx_high_slices_psize;
>> - index = GET_HIGH_SLICE_INDEX(addr);
>> mask_index = index & 0x1;
>> - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
>> + return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
>> }
>>
>> #else
>> diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
>> index 2cf5ef3fc50d..2c7c717fd2ea 100644
>> --- a/arch/powerpc/mm/slb_low.S
>> +++ b/arch/powerpc/mm/slb_low.S
>> @@ -200,10 +200,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
>> 5:
>> /*
>> * Handle lpsizes
>> - * r9 is get_paca()->context.low_slices_psize, r11 is index
>> + * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
>> */
>> - ld r9,PACALOWSLICESPSIZE(r13)
>> - mr r11,r10
>> + srdi r11,r10,1 /* index */
>> + addi r9,r11,PACALOWSLICESPSIZE
>> + lbzx r9,r13,r9 /* r9 is lpsizes[r11] */
>> + rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */
>> 6:
>> sldi r11,r11,2 /* index * 4 */
>> /* Extract the psize and multiply to get an array offset */
>> diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
>> index 549704dfa777..3d573a038d42 100644
>> --- a/arch/powerpc/mm/slice.c
>> +++ b/arch/powerpc/mm/slice.c
>> @@ -148,18 +148,20 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
>> static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
>> unsigned long high_limit)
>> {
>> - unsigned char *hpsizes;
>> + unsigned char *hpsizes, *lpsizes;
>> int index, mask_index;
>> unsigned long i;
>> - u64 lpsizes;
>>
>> ret->low_slices = 0;
>> slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>>
>> lpsizes = mm->context.low_slices_psize;
>> - for (i = 0; i < SLICE_NUM_LOW; i++)
>> - if (((lpsizes >> (i * 4)) & 0xf) == psize)
>> + for (i = 0; i < SLICE_NUM_LOW; i++) {
>> + mask_index = i & 0x1;
>> + index = i >> 1;
>> + if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
>> ret->low_slices |= 1u << i;
>> + }
>>
>> if (high_limit <= SLICE_LOW_TOP)
>> return;
>> @@ -211,8 +213,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
>> {
>> int index, mask_index;
>> /* Write the new slice psize bits */
>> - unsigned char *hpsizes;
>> - u64 lpsizes;
>> + unsigned char *hpsizes, *lpsizes;
>> unsigned long i, flags;
>>
>> slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
>> @@ -225,12 +226,13 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
>>
>> lpsizes = mm->context.low_slices_psize;
>> for (i = 0; i < SLICE_NUM_LOW; i++)
>> - if (mask.low_slices & (1u << i))
>> - lpsizes = (lpsizes & ~(0xful << (i * 4))) |
>> - (((unsigned long)psize) << (i * 4));
>> -
>> - /* Assign the value back */
>> - mm->context.low_slices_psize = lpsizes;
>> + if (mask.low_slices & (1u << i)) {
>> + mask_index = i & 0x1;
>> + index = i >> 1;
>> + lpsizes[index] = (lpsizes[index] &
>> + ~(0xf << (mask_index * 4))) |
>> + (((unsigned long)psize) << (mask_index * 4));
>> + }
>>
>> hpsizes = mm->context.high_slices_psize;
>> for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
>> @@ -629,7 +631,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
>>
>> unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
>> {
>> - unsigned char *hpsizes;
>> + unsigned char *psizes;
>> int index, mask_index;
>>
>> /*
>> @@ -643,15 +645,14 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
>> #endif
>> }
>> if (addr < SLICE_LOW_TOP) {
>> - u64 lpsizes;
>> - lpsizes = mm->context.low_slices_psize;
>> + psizes = mm->context.low_slices_psize;
>> index = GET_LOW_SLICE_INDEX(addr);
>> - return (lpsizes >> (index * 4)) & 0xf;
>> + } else {
>> + psizes = mm->context.high_slices_psize;
>> + index = GET_HIGH_SLICE_INDEX(addr);
>> }
>> - hpsizes = mm->context.high_slices_psize;
>> - index = GET_HIGH_SLICE_INDEX(addr);
>> mask_index = index & 0x1;
>> - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
>> + return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
>> }
>> EXPORT_SYMBOL_GPL(get_slice_psize);
>>
>> @@ -672,8 +673,8 @@ EXPORT_SYMBOL_GPL(get_slice_psize);
>> void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
>> {
>> int index, mask_index;
>> - unsigned char *hpsizes;
>> - unsigned long flags, lpsizes;
>> + unsigned char *hpsizes, *lpsizes;
>> + unsigned long flags;
>> unsigned int old_psize;
>> int i;
>>
>> @@ -691,12 +692,14 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
>> wmb();
>>
>> lpsizes = mm->context.low_slices_psize;
>> - for (i = 0; i < SLICE_NUM_LOW; i++)
>> - if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
>> - lpsizes = (lpsizes & ~(0xful << (i * 4))) |
>> - (((unsigned long)psize) << (i * 4));
>> - /* Assign the value back */
>> - mm->context.low_slices_psize = lpsizes;
>> + for (i = 0; i < SLICE_NUM_LOW; i++) {
>> + mask_index = i & 0x1;
>> + index = i >> 1;
>> + if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
>> + lpsizes[index] = (lpsizes[index] &
>> + ~(0xf << (mask_index * 4))) |
>> + (((unsigned long)psize) << (mask_index * 4));
>> + }
>>
>> hpsizes = mm->context.high_slices_psize;
>> for (i = 0; i < SLICE_NUM_HIGH; i++) {
>> --
>> 2.13.3

2018-02-10 13:00:13

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v3 4/5] powerpc/mm: Allow up to 64 low slices



Le 29/01/2018 à 07:29, Aneesh Kumar K.V a écrit :
> Christophe Leroy <[email protected]> writes:
>
>> While the implementation of the "slices" address space allows
>> a significant amount of high slices, it limits the number of
>> low slices to 16 due to the use of a single u64 low_slices_psize
>> element in struct mm_context_t
>>
>> On the 8xx, the minimum slice size is the size of the area
>> covered by a single PMD entry, ie 4M in 4K pages mode and 64M in
>> 16K pages mode. This means we could have at least 64 slices.
>>
>> In order to override this limitation, this patch switches the
>> handling of low_slices_psize to char array as done already for
>> high_slices_psize. This allows to increase the number of low
>> slices to 64 on the 8xx.
>>
>
> Maybe update the subject to "make low slice also a bitmap".Also indicate
> that the bitmap functions optimize the operation if the bitmapsize les
> <= long ?

As explained below above the ---, this new version don't do that
anymore, as 64 slices is enough.

>
> Also switch the 8xx to higher value in the another patch?

Ok

Christophe

>
>> Signed-off-by: Christophe Leroy <[email protected]>
>> ---
>> v2: Usign slice_bitmap_xxx() macros instead of bitmap_xxx() functions.
>> v3: keep low_slices as a u64, this allows 64 slices which is enough.
>>
>> arch/powerpc/include/asm/book3s/64/mmu.h | 3 +-
>> arch/powerpc/include/asm/mmu-8xx.h | 7 +++-
>> arch/powerpc/include/asm/paca.h | 2 +-
>> arch/powerpc/include/asm/slice.h | 1 -
>> arch/powerpc/include/asm/slice_32.h | 2 ++
>> arch/powerpc/include/asm/slice_64.h | 2 ++
>> arch/powerpc/kernel/paca.c | 3 +-
>> arch/powerpc/mm/hash_utils_64.c | 13 ++++----
>> arch/powerpc/mm/slb_low.S | 8 +++--
>> arch/powerpc/mm/slice.c | 57 +++++++++++++++++---------------
>> 10 files changed, 56 insertions(+), 42 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
>> index c9448e19847a..b076a2d74c69 100644
>> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
>> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
>> @@ -91,7 +91,8 @@ typedef struct {
>> struct npu_context *npu_context;
>>
>> #ifdef CONFIG_PPC_MM_SLICES
>> - u64 low_slices_psize; /* SLB page size encodings */
>> + /* SLB page size encodings*/
>> + unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
>> unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
>> unsigned long slb_addr_limit;
>> #else
>> diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
>> index 5f89b6010453..5f37ba06b56c 100644
>> --- a/arch/powerpc/include/asm/mmu-8xx.h
>> +++ b/arch/powerpc/include/asm/mmu-8xx.h
>> @@ -164,6 +164,11 @@
>> */
>> #define SPRN_M_TW 799
>>
>> +#ifdef CONFIG_PPC_MM_SLICES
>> +#include <asm/slice_32.h>
>> +#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
>> +#endif
>> +
>> #ifndef __ASSEMBLY__
>> typedef struct {
>> unsigned int id;
>> @@ -171,7 +176,7 @@ typedef struct {
>> unsigned long vdso_base;
>> #ifdef CONFIG_PPC_MM_SLICES
>> u16 user_psize; /* page size index */
>> - u64 low_slices_psize; /* page size encodings */
>> + unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
>> unsigned char high_slices_psize[0];
>> unsigned long slb_addr_limit;
>> #endif
>> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
>> index 23ac7fc0af23..a3e531fe9ac7 100644
>> --- a/arch/powerpc/include/asm/paca.h
>> +++ b/arch/powerpc/include/asm/paca.h
>> @@ -141,7 +141,7 @@ struct paca_struct {
>> #ifdef CONFIG_PPC_BOOK3S
>> mm_context_id_t mm_ctx_id;
>> #ifdef CONFIG_PPC_MM_SLICES
>> - u64 mm_ctx_low_slices_psize;
>> + unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
>> unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
>> unsigned long mm_ctx_slb_addr_limit;
>> #else
>> diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
>> index 2b4b70de7e71..b67ba8faa507 100644
>> --- a/arch/powerpc/include/asm/slice.h
>> +++ b/arch/powerpc/include/asm/slice.h
>> @@ -16,7 +16,6 @@
>> #define HAVE_ARCH_UNMAPPED_AREA
>> #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
>>
>> -#define SLICE_LOW_SHIFT 28
>> #define SLICE_LOW_TOP (0x100000000ull)
>> #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
>> #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
>> diff --git a/arch/powerpc/include/asm/slice_32.h b/arch/powerpc/include/asm/slice_32.h
>> index 7e27c0dfb913..349187c20100 100644
>> --- a/arch/powerpc/include/asm/slice_32.h
>> +++ b/arch/powerpc/include/asm/slice_32.h
>> @@ -2,6 +2,8 @@
>> #ifndef _ASM_POWERPC_SLICE_32_H
>> #define _ASM_POWERPC_SLICE_32_H
>>
>> +#define SLICE_LOW_SHIFT 26 /* 64 slices */
>> +
>> #define SLICE_HIGH_SHIFT 0
>> #define SLICE_NUM_HIGH 0ul
>> #define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
>> diff --git a/arch/powerpc/include/asm/slice_64.h b/arch/powerpc/include/asm/slice_64.h
>> index 9d1c97b83010..0959475239c6 100644
>> --- a/arch/powerpc/include/asm/slice_64.h
>> +++ b/arch/powerpc/include/asm/slice_64.h
>> @@ -2,6 +2,8 @@
>> #ifndef _ASM_POWERPC_SLICE_64_H
>> #define _ASM_POWERPC_SLICE_64_H
>>
>> +#define SLICE_LOW_SHIFT 28
>> +
>
> You are moving the LOW_SHIFT here, may be you can fix that up in earlier
> patch as reviewed there.

Yes, done in earlier patch now.

>
>> #define SLICE_HIGH_SHIFT 40
>> #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
>> #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
>> diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
>> index d6597038931d..8e1566bf82b8 100644
>> --- a/arch/powerpc/kernel/paca.c
>> +++ b/arch/powerpc/kernel/paca.c
>> @@ -264,7 +264,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
>> #ifdef CONFIG_PPC_MM_SLICES
>> VM_BUG_ON(!mm->context.slb_addr_limit);
>> get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
>> - get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
>> + memcpy(&get_paca()->mm_ctx_low_slices_psize,
>> + &context->low_slices_psize, sizeof(context->low_slices_psize));
>> memcpy(&get_paca()->mm_ctx_high_slices_psize,
>> &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
>> #else /* CONFIG_PPC_MM_SLICES */
>> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
>> index 655a5a9a183d..da696565b969 100644
>> --- a/arch/powerpc/mm/hash_utils_64.c
>> +++ b/arch/powerpc/mm/hash_utils_64.c
>> @@ -1097,19 +1097,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
>> #ifdef CONFIG_PPC_MM_SLICES
>> static unsigned int get_paca_psize(unsigned long addr)
>> {
>> - u64 lpsizes;
>> - unsigned char *hpsizes;
>> + unsigned char *psizes;
>> unsigned long index, mask_index;
>>
>> if (addr < SLICE_LOW_TOP) {
>> - lpsizes = get_paca()->mm_ctx_low_slices_psize;
>> + psizes = get_paca()->mm_ctx_low_slices_psize;
>> index = GET_LOW_SLICE_INDEX(addr);
>> - return (lpsizes >> (index * 4)) & 0xF;
>> + } else {
>> + psizes = get_paca()->mm_ctx_high_slices_psize;
>> + index = GET_HIGH_SLICE_INDEX(addr);
>> }
>> - hpsizes = get_paca()->mm_ctx_high_slices_psize;
>> - index = GET_HIGH_SLICE_INDEX(addr);
>> mask_index = index & 0x1;
>> - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
>> + return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
>> }
>>
>> #else
>> diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
>> index 2cf5ef3fc50d..2c7c717fd2ea 100644
>> --- a/arch/powerpc/mm/slb_low.S
>> +++ b/arch/powerpc/mm/slb_low.S
>> @@ -200,10 +200,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
>> 5:
>> /*
>> * Handle lpsizes
>> - * r9 is get_paca()->context.low_slices_psize, r11 is index
>> + * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
>> */
>> - ld r9,PACALOWSLICESPSIZE(r13)
>> - mr r11,r10
>> + srdi r11,r10,1 /* index */
>> + addi r9,r11,PACALOWSLICESPSIZE
>> + lbzx r9,r13,r9 /* r9 is lpsizes[r11] */
>> + rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */
>> 6:
>> sldi r11,r11,2 /* index * 4 */
>> /* Extract the psize and multiply to get an array offset */
>> diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
>> index 549704dfa777..3d573a038d42 100644
>> --- a/arch/powerpc/mm/slice.c
>> +++ b/arch/powerpc/mm/slice.c
>> @@ -148,18 +148,20 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
>> static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
>> unsigned long high_limit)
>> {
>> - unsigned char *hpsizes;
>> + unsigned char *hpsizes, *lpsizes;
>> int index, mask_index;
>> unsigned long i;
>> - u64 lpsizes;
>>
>> ret->low_slices = 0;
>> slice_bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
>>
>> lpsizes = mm->context.low_slices_psize;
>> - for (i = 0; i < SLICE_NUM_LOW; i++)
>> - if (((lpsizes >> (i * 4)) & 0xf) == psize)
>> + for (i = 0; i < SLICE_NUM_LOW; i++) {
>> + mask_index = i & 0x1;
>> + index = i >> 1;
>> + if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
>> ret->low_slices |= 1u << i;
>> + }
>>
>> if (high_limit <= SLICE_LOW_TOP)
>> return;
>> @@ -211,8 +213,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
>> {
>> int index, mask_index;
>> /* Write the new slice psize bits */
>> - unsigned char *hpsizes;
>> - u64 lpsizes;
>> + unsigned char *hpsizes, *lpsizes;
>> unsigned long i, flags;
>>
>> slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
>> @@ -225,12 +226,13 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
>>
>> lpsizes = mm->context.low_slices_psize;
>> for (i = 0; i < SLICE_NUM_LOW; i++)
>> - if (mask.low_slices & (1u << i))
>> - lpsizes = (lpsizes & ~(0xful << (i * 4))) |
>> - (((unsigned long)psize) << (i * 4));
>> -
>> - /* Assign the value back */
>> - mm->context.low_slices_psize = lpsizes;
>> + if (mask.low_slices & (1u << i)) {
>> + mask_index = i & 0x1;
>> + index = i >> 1;
>> + lpsizes[index] = (lpsizes[index] &
>> + ~(0xf << (mask_index * 4))) |
>> + (((unsigned long)psize) << (mask_index * 4));
>> + }
>>
>> hpsizes = mm->context.high_slices_psize;
>> for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
>> @@ -629,7 +631,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
>>
>> unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
>> {
>> - unsigned char *hpsizes;
>> + unsigned char *psizes;
>> int index, mask_index;
>>
>> /*
>> @@ -643,15 +645,14 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
>> #endif
>> }
>> if (addr < SLICE_LOW_TOP) {
>> - u64 lpsizes;
>> - lpsizes = mm->context.low_slices_psize;
>> + psizes = mm->context.low_slices_psize;
>> index = GET_LOW_SLICE_INDEX(addr);
>> - return (lpsizes >> (index * 4)) & 0xf;
>> + } else {
>> + psizes = mm->context.high_slices_psize;
>> + index = GET_HIGH_SLICE_INDEX(addr);
>> }
>> - hpsizes = mm->context.high_slices_psize;
>> - index = GET_HIGH_SLICE_INDEX(addr);
>> mask_index = index & 0x1;
>> - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
>> + return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
>> }
>> EXPORT_SYMBOL_GPL(get_slice_psize);
>>
>> @@ -672,8 +673,8 @@ EXPORT_SYMBOL_GPL(get_slice_psize);
>> void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
>> {
>> int index, mask_index;
>> - unsigned char *hpsizes;
>> - unsigned long flags, lpsizes;
>> + unsigned char *hpsizes, *lpsizes;
>> + unsigned long flags;
>> unsigned int old_psize;
>> int i;
>>
>> @@ -691,12 +692,14 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
>> wmb();
>>
>> lpsizes = mm->context.low_slices_psize;
>> - for (i = 0; i < SLICE_NUM_LOW; i++)
>> - if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
>> - lpsizes = (lpsizes & ~(0xful << (i * 4))) |
>> - (((unsigned long)psize) << (i * 4));
>> - /* Assign the value back */
>> - mm->context.low_slices_psize = lpsizes;
>> + for (i = 0; i < SLICE_NUM_LOW; i++) {
>> + mask_index = i & 0x1;
>> + index = i >> 1;
>> + if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
>> + lpsizes[index] = (lpsizes[index] &
>> + ~(0xf << (mask_index * 4))) |
>> + (((unsigned long)psize) << (mask_index * 4));
>> + }
>>
>> hpsizes = mm->context.high_slices_psize;
>> for (i = 0; i < SLICE_NUM_HIGH; i++) {
>> --
>> 2.13.3