2020-09-01 02:51:29

by Alex Shi

[permalink] [raw]
Subject: [PATCH v3 1/3] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags

pageblock_flags is used as long, since every pageblock_flags is just 4
bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
flags. It would cause long waiting for sync.

If we could change the pageblock_flags variable as char, we could use
char size cmpxchg, which just sync up with 2 pageblock flags. it could
relief much false sharing in cmpxchg.

With this and next patch, we could see mmtests/thpscale get slight fast
on my 4 cores box, and cmpxchg retry times is reduced.

pageblock pageblock pageblock rc2 rc2 rc2
16 16-2 16-3 a b c
Duration User 14.81 15.24 14.55 14.76 14.97 14.38
Duration System 84.44 88.38 90.64 100.43 89.15 88.89
Duration Elapsed 98.83 99.06 99.81 100.30 99.24 99.14

rc2 is 5.9-rc2 kernel, pageblock is 5.9-rc2 + this patchset

Signed-off-by: Alex Shi <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
include/linux/mmzone.h | 6 +++---
include/linux/pageblock-flags.h | 2 +-
mm/page_alloc.c | 38 +++++++++++++++++++-------------------
3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8379432f4f2f..be676e659fb7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -437,7 +437,7 @@ struct zone {
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
* In SPARSEMEM, this map is stored in struct mem_section
*/
- unsigned long *pageblock_flags;
+ unsigned char *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */

/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1159,7 @@ struct mem_section_usage {
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
#endif
/* See declaration of similar field in struct zone */
- unsigned long pageblock_flags[0];
+ unsigned char pageblock_flags[0];
};

void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1212,7 @@ struct mem_section {
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif

-static inline unsigned long *section_to_usemap(struct mem_section *ms)
+static inline unsigned char *section_to_usemap(struct mem_section *ms)
{
return ms->usage->pageblock_flags;
}
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fff52ad370c1..d189441568eb 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -54,7 +54,7 @@ enum pageblock_bits {
/* Forward declaration */
struct page;

-unsigned long get_pfnblock_flags_mask(struct page *page,
+unsigned char get_pfnblock_flags_mask(struct page *page,
unsigned long pfn,
unsigned long mask);

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fab5e97dc9ca..81e96d4d9c42 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
#endif

/* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned long *get_pageblock_bitmap(struct page *page,
+static inline unsigned char *get_pageblock_bitmap(struct page *page,
unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
@@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
* Return: pageblock_bits flags
*/
static __always_inline
-unsigned long __get_pfnblock_flags_mask(struct page *page,
+unsigned char __get_pfnblock_flags_mask(struct page *page,
unsigned long pfn,
unsigned long mask)
{
- unsigned long *bitmap;
- unsigned long bitidx, word_bitidx;
- unsigned long word;
+ unsigned char *bitmap;
+ unsigned long bitidx, byte_bitidx;
+ unsigned char byte;

bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- word_bitidx = bitidx / BITS_PER_LONG;
- bitidx &= (BITS_PER_LONG-1);
+ byte_bitidx = bitidx / BITS_PER_BYTE;
+ bitidx &= (BITS_PER_BYTE-1);

- word = bitmap[word_bitidx];
- return (word >> bitidx) & mask;
+ byte = bitmap[byte_bitidx];
+ return (byte >> bitidx) & mask;
}

-unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
unsigned long mask)
{
return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -513,29 +513,29 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long pfn,
unsigned long mask)
{
- unsigned long *bitmap;
- unsigned long bitidx, word_bitidx;
- unsigned long old_word, word;
+ unsigned char *bitmap;
+ unsigned long bitidx, byte_bitidx;
+ unsigned char old_byte, byte;

BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));

bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- word_bitidx = bitidx / BITS_PER_LONG;
- bitidx &= (BITS_PER_LONG-1);
+ byte_bitidx = bitidx / BITS_PER_BYTE;
+ bitidx &= (BITS_PER_BYTE-1);

VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);

mask <<= bitidx;
flags <<= bitidx;

- word = READ_ONCE(bitmap[word_bitidx]);
+ byte = READ_ONCE(bitmap[byte_bitidx]);
for (;;) {
- old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
- if (word == old_word)
+ old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
+ if (byte == old_byte)
break;
- word = old_word;
+ byte = old_byte;
}
}

--
1.8.3.1


2020-09-01 02:52:25

by Alex Shi

[permalink] [raw]
Subject: [PATCH v3 2/3] mm/pageblock: remove false sharing in pageblock_flags

Current pageblock_flags is only 4 bits, so it has to share a char size
in cmpxchg when get set, the false sharing cause perf drop.

If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
the only cost is half char per pageblock, which is half char per 128MB
on x86, 4 chars in 1 GB.

Signed-off-by: Alex Shi <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
include/linux/pageblock-flags.h | 2 +-
mm/page_alloc.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index d189441568eb..f785c9d6d68c 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -25,7 +25,7 @@ enum pageblock_bits {
* Assume the bits will always align on a word. If this assumption
* changes then get/set pageblock needs updating.
*/
- NR_PAGEBLOCK_BITS
+ NR_PAGEBLOCK_BITS = BITS_PER_BYTE
};

#ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 81e96d4d9c42..60342e764090 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -517,7 +517,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long bitidx, byte_bitidx;
unsigned char old_byte, byte;

- BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
+ BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));

bitmap = get_pageblock_bitmap(page, pfn);
--
1.8.3.1

2020-09-01 02:54:15

by Alex Shi

[permalink] [raw]
Subject: [PATCH v3 3/3] mm/armv6: work around armv6 cmpxchg support issue

Armv6 can not simulate cmpxchg1 func, so we have to use cmpxchg4 on it.

arm-linux-gnueabi-ld: mm/page_alloc.o: in function `set_pfnblock_flags_mask':
(.text+0x32b4): undefined reference to `__bad_cmpxchg'
arm-linux-gnueabi-ld: (.text+0x32e0): undefined reference to `__bad_cmpxchg'

Reported-by: kernel test robot <[email protected]>
Signed-off-by: Alex Shi <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Baolin Wang <[email protected]>
Cc: Russell King <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
---
include/linux/mmzone.h | 15 ++++++++++++---
mm/page_alloc.c | 24 ++++++++++++------------
2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index be676e659fb7..4c91ca807473 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -406,6 +406,15 @@ enum zone_type {

#ifndef __GENERATING_BOUNDS_H

+/* cmpxchg only support 32-bits operands on ARMv6. */
+#ifdef CONFIG_CPU_V6
+#define BITS_PER_FLAGS BITS_PER_LONG
+typedef unsigned long pageblockflags_t;
+#else
+#define BITS_PER_FLAGS BITS_PER_BYTE
+typedef unsigned char pageblockflags_t;
+#endif
+
struct zone {
/* Read-mostly fields */

@@ -437,7 +446,7 @@ struct zone {
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
* In SPARSEMEM, this map is stored in struct mem_section
*/
- unsigned char *pageblock_flags;
+ pageblockflags_t *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */

/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1168,7 @@ struct mem_section_usage {
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
#endif
/* See declaration of similar field in struct zone */
- unsigned char pageblock_flags[0];
+ pageblockflags_t pageblock_flags[0];
};

void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1221,7 @@ struct mem_section {
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif

-static inline unsigned char *section_to_usemap(struct mem_section *ms)
+static inline pageblockflags_t *section_to_usemap(struct mem_section *ms)
{
return ms->usage->pageblock_flags;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 60342e764090..9a41c5dc78eb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
#endif

/* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned char *get_pageblock_bitmap(struct page *page,
+static inline pageblockflags_t *get_pageblock_bitmap(struct page *page,
unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
@@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
* Return: pageblock_bits flags
*/
static __always_inline
-unsigned char __get_pfnblock_flags_mask(struct page *page,
+pageblockflags_t __get_pfnblock_flags_mask(struct page *page,
unsigned long pfn,
unsigned long mask)
{
- unsigned char *bitmap;
+ pageblockflags_t *bitmap;
unsigned long bitidx, byte_bitidx;
- unsigned char byte;
+ pageblockflags_t byte;

bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- byte_bitidx = bitidx / BITS_PER_BYTE;
- bitidx &= (BITS_PER_BYTE-1);
+ byte_bitidx = bitidx / BITS_PER_FLAGS;
+ bitidx &= (BITS_PER_FLAGS - 1);

byte = bitmap[byte_bitidx];
return (byte >> bitidx) & mask;
}

-unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+pageblockflags_t get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
unsigned long mask)
{
return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -513,17 +513,17 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long pfn,
unsigned long mask)
{
- unsigned char *bitmap;
+ pageblockflags_t *bitmap;
unsigned long bitidx, byte_bitidx;
- unsigned char old_byte, byte;
+ pageblockflags_t old_byte, byte;

- BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE);
+ BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_FLAGS);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));

bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- byte_bitidx = bitidx / BITS_PER_BYTE;
- bitidx &= (BITS_PER_BYTE-1);
+ byte_bitidx = bitidx / BITS_PER_FLAGS;
+ bitidx &= (BITS_PER_FLAGS - 1);

VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);

--
1.8.3.1

2020-09-01 06:33:36

by Alex Shi

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] mm/armv6: work around armv6 cmpxchg support issue

seems there are couples archs can not do cmpxchg1
So update the patch here. And it's easy to fix if more arch issue find here.

From cdf98ae7b5e83bb7210c927d4749f62fee4ed115 Mon Sep 17 00:00:00 2001
From: Alex Shi <[email protected]>
Date: Mon, 31 Aug 2020 15:41:20 +0800
Subject: [PATCH v4 3/3] mm/pageblock: work around multiple arch's cmpxchg
support issue

Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use
cmpxchg4 on it.

arm-linux-gnueabi-ld: mm/page_alloc.o: in function `set_pfnblock_flags_mask':
(.text+0x32b4): undefined reference to `__bad_cmpxchg'
arm-linux-gnueabi-ld: (.text+0x32e0): undefined reference to `__bad_cmpxchg'

Reported-by: kernel test robot <[email protected]>
Signed-off-by: Alex Shi <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Baolin Wang <[email protected]>
Cc: Russell King <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
---
include/linux/mmzone.h | 20 +++++++++++++++++---
mm/page_alloc.c | 24 ++++++++++++------------
2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index be676e659fb7..364b29ed99b3 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -406,6 +406,20 @@ enum zone_type {

#ifndef __GENERATING_BOUNDS_H

+/*
+ * cmpxchg only support 32-bits operands on the following archs ARMv6, SPARC32
+ * sh2, XTENSA.
+ */
+#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_SH2) || \
+ defined(CONFIG_SPARC32) || defined(CONFIG_XTENSA)
+
+#define BITS_PER_FLAGS BITS_PER_LONG
+typedef unsigned long pageblockflags_t;
+#else
+#define BITS_PER_FLAGS BITS_PER_BYTE
+typedef unsigned char pageblockflags_t;
+#endif
+
struct zone {
/* Read-mostly fields */

@@ -437,7 +451,7 @@ struct zone {
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
* In SPARSEMEM, this map is stored in struct mem_section
*/
- unsigned char *pageblock_flags;
+ pageblockflags_t *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */

/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1173,7 @@ struct mem_section_usage {
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
#endif
/* See declaration of similar field in struct zone */
- unsigned char pageblock_flags[0];
+ pageblockflags_t pageblock_flags[0];
};

void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1226,7 @@ struct mem_section {
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif

-static inline unsigned char *section_to_usemap(struct mem_section *ms)
+static inline pageblockflags_t *section_to_usemap(struct mem_section *ms)
{
return ms->usage->pageblock_flags;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 60342e764090..9a41c5dc78eb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
#endif

/* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned char *get_pageblock_bitmap(struct page *page,
+static inline pageblockflags_t *get_pageblock_bitmap(struct page *page,
unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
@@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
* Return: pageblock_bits flags
*/
static __always_inline
-unsigned char __get_pfnblock_flags_mask(struct page *page,
+pageblockflags_t __get_pfnblock_flags_mask(struct page *page,
unsigned long pfn,
unsigned long mask)
{
- unsigned char *bitmap;
+ pageblockflags_t *bitmap;
unsigned long bitidx, byte_bitidx;
- unsigned char byte;
+ pageblockflags_t byte;

bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- byte_bitidx = bitidx / BITS_PER_BYTE;
- bitidx &= (BITS_PER_BYTE-1);
+ byte_bitidx = bitidx / BITS_PER_FLAGS;
+ bitidx &= (BITS_PER_FLAGS - 1);

byte = bitmap[byte_bitidx];
return (byte >> bitidx) & mask;
}

-unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+pageblockflags_t get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
unsigned long mask)
{
return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -513,17 +513,17 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long pfn,
unsigned long mask)
{
- unsigned char *bitmap;
+ pageblockflags_t *bitmap;
unsigned long bitidx, byte_bitidx;
- unsigned char old_byte, byte;
+ pageblockflags_t old_byte, byte;

- BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE);
+ BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_FLAGS);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));

bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- byte_bitidx = bitidx / BITS_PER_BYTE;
- bitidx &= (BITS_PER_BYTE-1);
+ byte_bitidx = bitidx / BITS_PER_FLAGS;
+ bitidx &= (BITS_PER_FLAGS - 1);

VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);

--
1.8.3.1

2020-09-01 13:28:44

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] mm/armv6: work around armv6 cmpxchg support issue

On Tue, Sep 01, 2020 at 02:30:51PM +0800, Alex Shi wrote:
> seems there are couples archs can not do cmpxchg1
> So update the patch here. And it's easy to fix if more arch issue find here.

> +/*
> + * cmpxchg only support 32-bits operands on the following archs ARMv6, SPARC32
> + * sh2, XTENSA.
> + */
> +#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_SH2) || \
> + defined(CONFIG_SPARC32) || defined(CONFIG_XTENSA)

Looks like we need a HAVE_CMPXCHG_BYTE in Kconfig to parallel
HAVE_CMPXCHG_DOUBLE.

2020-09-01 17:07:06

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH v3 1/3] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags

On 9/1/20 4:50 AM, Alex Shi wrote:
> pageblock_flags is used as long, since every pageblock_flags is just 4
> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
> flags. It would cause long waiting for sync.
>
> If we could change the pageblock_flags variable as char, we could use
> char size cmpxchg, which just sync up with 2 pageblock flags. it could
> relief much false sharing in cmpxchg.
>
> With this and next patch, we could see mmtests/thpscale get slight fast
> on my 4 cores box, and cmpxchg retry times is reduced.
>
> pageblock pageblock pageblock rc2 rc2 rc2
> 16 16-2 16-3 a b c
> Duration User 14.81 15.24 14.55 14.76 14.97 14.38
> Duration System 84.44 88.38 90.64 100.43 89.15 88.89
> Duration Elapsed 98.83 99.06 99.81 100.30 99.24 99.14

The large variance in these numbers suggest that 3 iterations are not enough to
conclude a statistically significant difference. You'd need more iterations and
calculate at least mean+variance.

> rc2 is 5.9-rc2 kernel, pageblock is 5.9-rc2 + this patchset
>
> Signed-off-by: Alex Shi <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Mel Gorman <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> ---
> include/linux/mmzone.h | 6 +++---
> include/linux/pageblock-flags.h | 2 +-
> mm/page_alloc.c | 38 +++++++++++++++++++-------------------
> 3 files changed, 23 insertions(+), 23 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 8379432f4f2f..be676e659fb7 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -437,7 +437,7 @@ struct zone {
> * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
> * In SPARSEMEM, this map is stored in struct mem_section
> */
> - unsigned long *pageblock_flags;
> + unsigned char *pageblock_flags;
> #endif /* CONFIG_SPARSEMEM */
>
> /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
> @@ -1159,7 +1159,7 @@ struct mem_section_usage {
> DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
> #endif
> /* See declaration of similar field in struct zone */
> - unsigned long pageblock_flags[0];
> + unsigned char pageblock_flags[0];
> };
>
> void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
> @@ -1212,7 +1212,7 @@ struct mem_section {
> extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
> #endif
>
> -static inline unsigned long *section_to_usemap(struct mem_section *ms)
> +static inline unsigned char *section_to_usemap(struct mem_section *ms)
> {
> return ms->usage->pageblock_flags;
> }
> diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
> index fff52ad370c1..d189441568eb 100644
> --- a/include/linux/pageblock-flags.h
> +++ b/include/linux/pageblock-flags.h
> @@ -54,7 +54,7 @@ enum pageblock_bits {
> /* Forward declaration */
> struct page;
>
> -unsigned long get_pfnblock_flags_mask(struct page *page,
> +unsigned char get_pfnblock_flags_mask(struct page *page,
> unsigned long pfn,
> unsigned long mask);
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index fab5e97dc9ca..81e96d4d9c42 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
> #endif
>
> /* Return a pointer to the bitmap storing bits affecting a block of pages */
> -static inline unsigned long *get_pageblock_bitmap(struct page *page,
> +static inline unsigned char *get_pageblock_bitmap(struct page *page,
> unsigned long pfn)
> {
> #ifdef CONFIG_SPARSEMEM
> @@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
> * Return: pageblock_bits flags
> */
> static __always_inline
> -unsigned long __get_pfnblock_flags_mask(struct page *page,
> +unsigned char __get_pfnblock_flags_mask(struct page *page,
> unsigned long pfn,
> unsigned long mask)
> {
> - unsigned long *bitmap;
> - unsigned long bitidx, word_bitidx;
> - unsigned long word;
> + unsigned char *bitmap;
> + unsigned long bitidx, byte_bitidx;
> + unsigned char byte;
>
> bitmap = get_pageblock_bitmap(page, pfn);
> bitidx = pfn_to_bitidx(page, pfn);
> - word_bitidx = bitidx / BITS_PER_LONG;
> - bitidx &= (BITS_PER_LONG-1);
> + byte_bitidx = bitidx / BITS_PER_BYTE;
> + bitidx &= (BITS_PER_BYTE-1);
>
> - word = bitmap[word_bitidx];
> - return (word >> bitidx) & mask;
> + byte = bitmap[byte_bitidx];
> + return (byte >> bitidx) & mask;
> }
>
> -unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
> +unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
> unsigned long mask)
> {
> return __get_pfnblock_flags_mask(page, pfn, mask);
> @@ -513,29 +513,29 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
> unsigned long pfn,
> unsigned long mask)
> {
> - unsigned long *bitmap;
> - unsigned long bitidx, word_bitidx;
> - unsigned long old_word, word;
> + unsigned char *bitmap;
> + unsigned long bitidx, byte_bitidx;
> + unsigned char old_byte, byte;
>
> BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
> BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
>
> bitmap = get_pageblock_bitmap(page, pfn);
> bitidx = pfn_to_bitidx(page, pfn);
> - word_bitidx = bitidx / BITS_PER_LONG;
> - bitidx &= (BITS_PER_LONG-1);
> + byte_bitidx = bitidx / BITS_PER_BYTE;
> + bitidx &= (BITS_PER_BYTE-1);
>
> VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
>
> mask <<= bitidx;
> flags <<= bitidx;
>
> - word = READ_ONCE(bitmap[word_bitidx]);
> + byte = READ_ONCE(bitmap[byte_bitidx]);
> for (;;) {
> - old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
> - if (word == old_word)
> + old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
> + if (byte == old_byte)
> break;
> - word = old_word;
> + byte = old_byte;
> }
> }
>
>

2020-09-03 06:43:43

by Alex Shi

[permalink] [raw]
Subject: Re: [PATCH v3 1/3] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags



在 2020/9/2 上午1:06, Vlastimil Babka 写道:
>>
>> pageblock pageblock pageblock rc2 rc2 rc2
>> 16 16-2 16-3 a b c
>> Duration User 14.81 15.24 14.55 14.76 14.97 14.38
>> Duration System 84.44 88.38 90.64 100.43 89.15 88.89
>> Duration Elapsed 98.83 99.06 99.81 100.30 99.24 99.14
> The large variance in these numbers suggest that 3 iterations are not enough to
> conclude a statistically significant difference. You'd need more iterations and
> calculate at least mean+variance.
>

on the machine I did seeing much variation more on Amean. but the trace event would
be more straight. It could reduce the hit_cmpxchg from thousand time to hundreds or less.

Thanks
Alex

diff --git a/include/trace/events/pageblock.h b/include/trace/events/pageblock.h
new file mode 100644
index 000000000000..003c2d716f82
--- /dev/null
+++ b/include/trace/events/pageblock.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM pageblock
+
+#if !defined(_TRACE_PAGEBLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGEBLOCK_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(hit_cmpxchg,
+
+ TP_PROTO(char byte),
+
+ TP_ARGS(byte),
+
+ TP_STRUCT__entry(
+ __field(char, byte)
+ ),
+
+ TP_fast_assign(
+ __entry->byte = byte;
+ ),
+
+ TP_printk("%d", __entry->byte)
+);
+
+#endif /* _TRACE_PAGE_ISOLATION_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 60342e764090..2422dec00484 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -509,6 +509,9 @@ static __always_inline int get_pfnblock_migratetype(struct page *page, unsigned
* @pfn: The target page frame number
* @mask: mask of bits that the caller is interested in
*/
+#define CREATE_TRACE_POINTS
+#include <trace/events/pageblock.h>
+
void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long pfn,
unsigned long mask)
@@ -536,6 +539,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
if (byte == old_byte)
break;
byte = old_byte;
+ trace_hit_cmpxchg(byte);
}
}

2020-09-03 06:49:03

by Alex Shi

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] mm/armv6: work around armv6 cmpxchg support issue



?? 2020/9/1 ????9:17, Matthew Wilcox д??:
> On Tue, Sep 01, 2020 at 02:30:51PM +0800, Alex Shi wrote:
>> seems there are couples archs can not do cmpxchg1
>> So update the patch here. And it's easy to fix if more arch issue find here.
>
>> +/*
>> + * cmpxchg only support 32-bits operands on the following archs ARMv6, SPARC32
>> + * sh2, XTENSA.
>> + */
>> +#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_SH2) || \
>> + defined(CONFIG_SPARC32) || defined(CONFIG_XTENSA)
>
> Looks like we need a HAVE_CMPXCHG_BYTE in Kconfig to parallel
> HAVE_CMPXCHG_DOUBLE.
>

Thanks for reminder! Compare the HAVE_CMPXCHG_BYTE, NO_CMPXCHG_BYTE would be
better for less code change.

I will send the v4 for a bit more change on patch 2.

Thanks!
Alex