pageblock_flags is used as long, since every pageblock_flags is just 4
bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
flags. It would cause long waiting for sync.
If we could change the pageblock_flags variable as char, we could use
char size cmpxchg, which just sync up with 2 pageblock flags. it could
relief much false sharing in cmpxchg.
Signed-off-by: Alex Shi <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Alexander Duyck <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
include/linux/mmzone.h | 6 +++---
include/linux/pageblock-flags.h | 2 +-
mm/page_alloc.c | 24 +++++++++++++-----------
3 files changed, 17 insertions(+), 15 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0ed520954843..c92d6d24527d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -438,7 +438,7 @@ struct zone {
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
* In SPARSEMEM, this map is stored in struct mem_section
*/
- unsigned long *pageblock_flags;
+ unsigned char *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1159,7 @@ struct mem_section_usage {
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
#endif
/* See declaration of similar field in struct zone */
- unsigned long pageblock_flags[0];
+ unsigned char pageblock_flags[0];
};
void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1212,7 @@ struct mem_section {
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif
-static inline unsigned long *section_to_usemap(struct mem_section *ms)
+static inline unsigned char *section_to_usemap(struct mem_section *ms)
{
return ms->usage->pageblock_flags;
}
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fff52ad370c1..d189441568eb 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -54,7 +54,7 @@ enum pageblock_bits {
/* Forward declaration */
struct page;
-unsigned long get_pfnblock_flags_mask(struct page *page,
+unsigned char get_pfnblock_flags_mask(struct page *page,
unsigned long pfn,
unsigned long mask);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 66d45e9cc358..142803d1f49b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -447,7 +447,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
#endif
/* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned long *get_pageblock_bitmap(struct page *page,
+static inline unsigned char *get_pageblock_bitmap(struct page *page,
unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
@@ -467,6 +467,8 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
}
+#define BITS_PER_CHAR 8
+
/**
* get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages
* @page: The page within the block of interest
@@ -476,24 +478,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
* Return: pageblock_bits flags
*/
static __always_inline
-unsigned long __get_pfnblock_flags_mask(struct page *page,
+unsigned char __get_pfnblock_flags_mask(struct page *page,
unsigned long pfn,
unsigned long mask)
{
- unsigned long *bitmap;
+ unsigned char *bitmap;
unsigned long bitidx, word_bitidx;
- unsigned long word;
+ unsigned char word;
bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- word_bitidx = bitidx / BITS_PER_LONG;
- bitidx &= (BITS_PER_LONG-1);
+ word_bitidx = bitidx / BITS_PER_CHAR;
+ bitidx &= (BITS_PER_CHAR-1);
word = bitmap[word_bitidx];
return (word >> bitidx) & mask;
}
-unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
unsigned long mask)
{
return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -515,17 +517,17 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long pfn,
unsigned long mask)
{
- unsigned long *bitmap;
+ unsigned char *bitmap;
unsigned long bitidx, word_bitidx;
- unsigned long old_word, word;
+ unsigned char old_word, word;
BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- word_bitidx = bitidx / BITS_PER_LONG;
- bitidx &= (BITS_PER_LONG-1);
+ word_bitidx = bitidx / BITS_PER_CHAR;
+ bitidx &= (BITS_PER_CHAR-1);
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
--
1.8.3.1
On Sun, Aug 16, 2020 at 11:47:56AM +0800, Alex Shi wrote:
> +++ b/mm/page_alloc.c
> @@ -467,6 +467,8 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
> return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
> }
>
> +#define BITS_PER_CHAR 8
include/linux/bits.h:#define BITS_PER_BYTE 8
> bitmap = get_pageblock_bitmap(page, pfn);
> bitidx = pfn_to_bitidx(page, pfn);
> - word_bitidx = bitidx / BITS_PER_LONG;
> - bitidx &= (BITS_PER_LONG-1);
> + word_bitidx = bitidx / BITS_PER_CHAR;
> + bitidx &= (BITS_PER_CHAR-1);
It's not a word any more. it's a byte.
Current pageblock_flags is only 4 bits, so it has to share a char size
in cmpxchg when get set, the false sharing cause perf drop.
If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
the only cost is half char per pageblock, which is half char per 128MB
on x86, 4 chars in 1 GB.
Signed-off-by: Alex Shi <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Alexander Duyck <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
include/linux/pageblock-flags.h | 2 +-
mm/page_alloc.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index d189441568eb..556fc2c0b392 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -25,7 +25,7 @@ enum pageblock_bits {
* Assume the bits will always align on a word. If this assumption
* changes then get/set pageblock needs updating.
*/
- NR_PAGEBLOCK_BITS
+ NR_PAGEBLOCK_BITS = 8
};
#ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 142803d1f49b..01c3fb822732 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -521,7 +521,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long bitidx, word_bitidx;
unsigned char old_word, word;
- BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
+ BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 8);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
bitmap = get_pageblock_bitmap(page, pfn);
--
1.8.3.1
On 16.08.20 05:47, Alex Shi wrote:
> pageblock_flags is used as long, since every pageblock_flags is just 4
> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
> flags. It would cause long waiting for sync.
>
> If we could change the pageblock_flags variable as char, we could use
> char size cmpxchg, which just sync up with 2 pageblock flags. it could
> relief much false sharing in cmpxchg.
Do you have any performance numbers to back your claims? IOW, do we care
at all?
--
Thanks,
David / dhildenb
?? 2020/8/16 ????12:09, Matthew Wilcox д??:
> On Sun, Aug 16, 2020 at 11:47:56AM +0800, Alex Shi wrote:
>> +++ b/mm/page_alloc.c
>> @@ -467,6 +467,8 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
>> return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
>> }
>>
>> +#define BITS_PER_CHAR 8
>
> include/linux/bits.h:#define BITS_PER_BYTE 8
Thank for reminder!
>
>> bitmap = get_pageblock_bitmap(page, pfn);
>> bitidx = pfn_to_bitidx(page, pfn);
>> - word_bitidx = bitidx / BITS_PER_LONG;
>> - bitidx &= (BITS_PER_LONG-1);
>> + word_bitidx = bitidx / BITS_PER_CHAR;
>> + bitidx &= (BITS_PER_CHAR-1);
>
> It's not a word any more. it's a byte.
>
Yes, will change this.
Thanks!
From 963425639f56c1ba1c997653a4ff6885c81dc0ab Mon Sep 17 00:00:00 2001
From: Alex Shi <[email protected]>
Date: Sat, 15 Aug 2020 22:54:17 +0800
Subject: [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in
pageblock flags
pageblock_flags is used as long, since every pageblock_flags is just 4
bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
flags. It would cause long waiting for sync.
If we could change the pageblock_flags variable as char, we could use
char size cmpxchg, which just sync up with 2 pageblock flags. it could
relief much false sharing in cmpxchg.
Signed-off-by: Alex Shi <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Alexander Duyck <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
include/linux/mmzone.h | 6 +++---
include/linux/pageblock-flags.h | 2 +-
mm/page_alloc.c | 38 +++++++++++++++++++-------------------
3 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0ed520954843..c92d6d24527d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -438,7 +438,7 @@ struct zone {
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
* In SPARSEMEM, this map is stored in struct mem_section
*/
- unsigned long *pageblock_flags;
+ unsigned char *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1159,7 @@ struct mem_section_usage {
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
#endif
/* See declaration of similar field in struct zone */
- unsigned long pageblock_flags[0];
+ unsigned char pageblock_flags[0];
};
void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1212,7 @@ struct mem_section {
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif
-static inline unsigned long *section_to_usemap(struct mem_section *ms)
+static inline unsigned char *section_to_usemap(struct mem_section *ms)
{
return ms->usage->pageblock_flags;
}
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fff52ad370c1..d189441568eb 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -54,7 +54,7 @@ enum pageblock_bits {
/* Forward declaration */
struct page;
-unsigned long get_pfnblock_flags_mask(struct page *page,
+unsigned char get_pfnblock_flags_mask(struct page *page,
unsigned long pfn,
unsigned long mask);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 66d45e9cc358..eb5cca7e8683 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -447,7 +447,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
#endif
/* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned long *get_pageblock_bitmap(struct page *page,
+static inline unsigned char *get_pageblock_bitmap(struct page *page,
unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
@@ -476,24 +476,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
* Return: pageblock_bits flags
*/
static __always_inline
-unsigned long __get_pfnblock_flags_mask(struct page *page,
+unsigned char __get_pfnblock_flags_mask(struct page *page,
unsigned long pfn,
unsigned long mask)
{
- unsigned long *bitmap;
- unsigned long bitidx, word_bitidx;
- unsigned long word;
+ unsigned char *bitmap;
+ unsigned long bitidx, byte_bitidx;
+ unsigned char byte;
bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- word_bitidx = bitidx / BITS_PER_LONG;
- bitidx &= (BITS_PER_LONG-1);
+ byte_bitidx = bitidx / BITS_PER_BYTE;
+ bitidx &= (BITS_PER_BYTE-1);
- word = bitmap[word_bitidx];
- return (word >> bitidx) & mask;
+ byte = bitmap[byte_bitidx];
+ return (byte >> bitidx) & mask;
}
-unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
unsigned long mask)
{
return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -515,29 +515,29 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long pfn,
unsigned long mask)
{
- unsigned long *bitmap;
- unsigned long bitidx, word_bitidx;
- unsigned long old_word, word;
+ unsigned char *bitmap;
+ unsigned long bitidx, byte_bitidx;
+ unsigned char old_byte, byte;
BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- word_bitidx = bitidx / BITS_PER_LONG;
- bitidx &= (BITS_PER_LONG-1);
+ byte_bitidx = bitidx / BITS_PER_BYTE;
+ bitidx &= (BITS_PER_BYTE-1);
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
mask <<= bitidx;
flags <<= bitidx;
- word = READ_ONCE(bitmap[word_bitidx]);
+ byte = READ_ONCE(bitmap[byte_bitidx]);
for (;;) {
- old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
- if (word == old_word)
+ old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
+ if (byte == old_byte)
break;
- word = old_word;
+ byte = old_byte;
}
}
--
1.8.3.1
在 2020/8/16 下午8:16, David Hildenbrand 写道:
> On 16.08.20 05:47, Alex Shi wrote:
>> pageblock_flags is used as long, since every pageblock_flags is just 4
>> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
>> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
>> flags. It would cause long waiting for sync.
>>
>> If we could change the pageblock_flags variable as char, we could use
>> char size cmpxchg, which just sync up with 2 pageblock flags. it could
>> relief much false sharing in cmpxchg.
>
> Do you have any performance numbers to back your claims? IOW, do we care
> at all?
>
>
Hi David,
Thanks for comments!
Not yet, I trace the following commit here, with this commit this hot path
looks be resolved.
Thanks
Alex
e380bebe4771548 mm, compaction: keep migration source private to a single compaction instance
if (!locked) {
locked = compact_trylock_irqsave(zone_lru_lock(zone),
&flags, cc);
- if (!locked)
+
+ /* Allow future scanning if the lock is contended */
+ if (!locked) {
+ clear_pageblock_skip(page);
break;
+ }
+
+ /* Try get exclusive access under lock */
+ if (!skip_updated) {
+ skip_updated = true;
+ if (test_and_set_skip(cc, page, low_pfn))
+ goto isolate_abort;
+ }
On Sun, Aug 16, 2020 at 11:47:56AM +0800, Alex Shi wrote:
>pageblock_flags is used as long, since every pageblock_flags is just 4
>bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
>that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
>flags. It would cause long waiting for sync.
>
>If we could change the pageblock_flags variable as char, we could use
>char size cmpxchg, which just sync up with 2 pageblock flags. it could
>relief much false sharing in cmpxchg.
>
If my understanding is correct, CPU reads data in the size of cacheline.
Define a variable a char or other, doesn't help on false sharing.
Correct me, if not.
--
Wei Yang
Help you, Help me
?? 2020/8/17 ????5:58, Wei Yang д??:
> On Sun, Aug 16, 2020 at 11:47:56AM +0800, Alex Shi wrote:
>> pageblock_flags is used as long, since every pageblock_flags is just 4
>> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
>> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
>> flags. It would cause long waiting for sync.
>>
>> If we could change the pageblock_flags variable as char, we could use
>> char size cmpxchg, which just sync up with 2 pageblock flags. it could
>> relief much false sharing in cmpxchg.
>>
>
> If my understanding is correct, CPU reads data in the size of cacheline.
> Define a variable a char or other, doesn't help on false sharing.
>
> Correct me, if not.
Right and not,
Cacheline false sharing is right. but after that, cmpxchg still need to compare
the pointed data, if the data is long, it need to compare long word and make sure
nothing changes in the long word. If we narrow the comparsion data to byte, cmpxchg
will just sync up on a byte. So it looks like there are 2 level false sharing here.
Thanks
Alex
for (;;) {
- old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
- if (word == old_word)
+ old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
+ if (byte == old_byte)
break;
- word = old_word;
+ byte = old_byte;
}