2016-11-15 15:55:47

by Vitaly Wool

[permalink] [raw]
Subject: [PATCH 0/3] z3fold: per-page spinlock and other smaller optimizations

Coming is the patchset with the per-page spinlock as the main
modification, and two smaller dependent patches, one of which
removes build error when the z3fold header size exceeds the
size of a chunk, and the other puts non-compacted pages to the
end of the unbuddied list.

Signed-off-by: Vitaly Wool <[email protected]>


2016-11-15 16:00:23

by Vitaly Wool

[permalink] [raw]
Subject: [PATCH 1/3] z3fold: use per-page spinlock

Most of z3fold operations are in-page, such as modifying z3fold
page header or moving z3fold objects within a page. Taking
per-pool spinlock to protect per-page objects is therefore
suboptimal, and the idea of having a per-page spinlock (or rwlock)
has been around for some time.

This patch implements raw spinlock-based per-page locking mechanism
which is lightweight enough to normally fit ok into the z3fold
header.

Changes from v1 [1]:
- custom locking mechanism changed to spinlocks
- no read/write locks, just per-page spinlock

Changes from v2 [2]:
- if a page is taken off its list by z3fold_alloc(), bail out from
z3fold_free() early

Changes from v3 [3]:
- spinlock changed to raw spinlock to avoid BUILD_BUG_ON trigger

[1] https://lkml.org/lkml/2016/11/5/59
[2] https://lkml.org/lkml/2016/11/8/400
[3] https://lkml.org/lkml/2016/11/9/146

Signed-off-by: Vitaly Wool <[email protected]>
---
mm/z3fold.c | 136 +++++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 97 insertions(+), 39 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index d2e8aec..7ad70fa 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -98,6 +98,7 @@ enum buddy {
* struct z3fold_header - z3fold page metadata occupying the first chunk of each
* z3fold page, except for HEADLESS pages
* @buddy: links the z3fold page into the relevant list in the pool
+ * @page_lock: per-page lock
* @first_chunks: the size of the first buddy in chunks, 0 if free
* @middle_chunks: the size of the middle buddy in chunks, 0 if free
* @last_chunks: the size of the last buddy in chunks, 0 if free
@@ -105,6 +106,7 @@ enum buddy {
*/
struct z3fold_header {
struct list_head buddy;
+ raw_spinlock_t page_lock;
unsigned short first_chunks;
unsigned short middle_chunks;
unsigned short last_chunks;
@@ -144,6 +146,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
clear_bit(PAGE_HEADLESS, &page->private);
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);

+ raw_spin_lock_init(&zhdr->page_lock);
zhdr->first_chunks = 0;
zhdr->middle_chunks = 0;
zhdr->last_chunks = 0;
@@ -159,6 +162,19 @@ static void free_z3fold_page(struct z3fold_header *zhdr)
__free_page(virt_to_page(zhdr));
}

+/* Lock a z3fold page */
+static inline void z3fold_page_lock(struct z3fold_header *zhdr)
+{
+ raw_spin_lock(&zhdr->page_lock);
+}
+
+/* Unlock a z3fold page */
+static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
+{
+ raw_spin_unlock(&zhdr->page_lock);
+}
+
+
/*
* Encodes the handle of a particular buddy within a z3fold page
* Pool lock should be held as this function accesses first_num
@@ -347,50 +363,60 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
bud = HEADLESS;
else {
chunks = size_to_chunks(size);
- spin_lock(&pool->lock);

/* First, try to find an unbuddied z3fold page. */
zhdr = NULL;
for_each_unbuddied_list(i, chunks) {
- if (!list_empty(&pool->unbuddied[i])) {
- zhdr = list_first_entry(&pool->unbuddied[i],
+ spin_lock(&pool->lock);
+ zhdr = list_first_entry_or_null(&pool->unbuddied[i],
struct z3fold_header, buddy);
- page = virt_to_page(zhdr);
- if (zhdr->first_chunks == 0) {
- if (zhdr->middle_chunks != 0 &&
- chunks >= zhdr->start_middle)
- bud = LAST;
- else
- bud = FIRST;
- } else if (zhdr->last_chunks == 0)
+ if (!zhdr) {
+ spin_unlock(&pool->lock);
+ continue;
+ }
+ list_del_init(&zhdr->buddy);
+ spin_unlock(&pool->lock);
+
+ page = virt_to_page(zhdr);
+ z3fold_page_lock(zhdr);
+ if (zhdr->first_chunks == 0) {
+ if (zhdr->middle_chunks != 0 &&
+ chunks >= zhdr->start_middle)
bud = LAST;
- else if (zhdr->middle_chunks == 0)
- bud = MIDDLE;
- else {
- pr_err("No free chunks in unbuddied\n");
- WARN_ON(1);
- continue;
- }
- list_del(&zhdr->buddy);
- goto found;
+ else
+ bud = FIRST;
+ } else if (zhdr->last_chunks == 0)
+ bud = LAST;
+ else if (zhdr->middle_chunks == 0)
+ bud = MIDDLE;
+ else {
+ spin_lock(&pool->lock);
+ list_add(&zhdr->buddy, &pool->buddied);
+ spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
+ pr_err("No free chunks in unbuddied\n");
+ WARN_ON(1);
+ continue;
}
+ goto found;
}
bud = FIRST;
- spin_unlock(&pool->lock);
}

/* Couldn't find unbuddied z3fold page, create new one */
page = alloc_page(gfp);
if (!page)
return -ENOMEM;
- spin_lock(&pool->lock);
+
atomic64_inc(&pool->pages_nr);
zhdr = init_z3fold_page(page);

if (bud == HEADLESS) {
set_bit(PAGE_HEADLESS, &page->private);
+ spin_lock(&pool->lock);
goto headless;
}
+ z3fold_page_lock(zhdr);

found:
if (bud == FIRST)
@@ -402,6 +428,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
zhdr->start_middle = zhdr->first_chunks + 1;
}

+ spin_lock(&pool->lock);
if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
zhdr->middle_chunks == 0) {
/* Add to unbuddied list */
@@ -421,6 +448,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,

*handle = encode_handle(zhdr, bud);
spin_unlock(&pool->lock);
+ if (bud != HEADLESS)
+ z3fold_page_unlock(zhdr);

return 0;
}
@@ -442,7 +471,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
struct page *page;
enum buddy bud;

- spin_lock(&pool->lock);
zhdr = handle_to_z3fold_header(handle);
page = virt_to_page(zhdr);

@@ -450,6 +478,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
/* HEADLESS page stored */
bud = HEADLESS;
} else {
+ z3fold_page_lock(zhdr);
bud = handle_to_buddy(handle);

switch (bud) {
@@ -466,37 +495,59 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
default:
pr_err("%s: unknown bud %d\n", __func__, bud);
WARN_ON(1);
- spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
return;
}
}

if (test_bit(UNDER_RECLAIM, &page->private)) {
/* z3fold page is under reclaim, reclaim will free */
- spin_unlock(&pool->lock);
+ if (bud != HEADLESS)
+ z3fold_page_unlock(zhdr);
return;
}

/* Remove from existing buddy list */
- if (bud != HEADLESS)
- list_del(&zhdr->buddy);
+ if (bud != HEADLESS) {
+ spin_lock(&pool->lock);
+ /*
+ * this object may have been removed from its list by
+ * z3fold_alloc(). In that case we just do nothing,
+ * z3fold_alloc() will allocate an object and add the page
+ * to the relevant list.
+ */
+ if (!list_empty(&zhdr->buddy)) {
+ list_del(&zhdr->buddy);
+ } else {
+ spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
+ return;
+ }
+ spin_unlock(&pool->lock);
+ }

if (bud == HEADLESS ||
(zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
zhdr->last_chunks == 0)) {
/* z3fold page is empty, free */
+ spin_lock(&pool->lock);
list_del(&page->lru);
+ spin_unlock(&pool->lock);
clear_bit(PAGE_HEADLESS, &page->private);
+ if (bud != HEADLESS)
+ z3fold_page_unlock(zhdr);
free_z3fold_page(zhdr);
atomic64_dec(&pool->pages_nr);
} else {
z3fold_compact_page(zhdr);
/* Add to the unbuddied list */
+ spin_lock(&pool->lock);
freechunks = num_free_chunks(zhdr);
list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+ spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
}

- spin_unlock(&pool->lock);
}

/**
@@ -557,6 +608,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
zhdr = page_address(page);
if (!test_bit(PAGE_HEADLESS, &page->private)) {
list_del(&zhdr->buddy);
+ spin_unlock(&pool->lock);
+ z3fold_page_lock(zhdr);
/*
* We need encode the handles before unlocking, since
* we can race with free that will set
@@ -571,13 +624,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
middle_handle = encode_handle(zhdr, MIDDLE);
if (zhdr->last_chunks)
last_handle = encode_handle(zhdr, LAST);
+ z3fold_page_unlock(zhdr);
} else {
first_handle = encode_handle(zhdr, HEADLESS);
last_handle = middle_handle = 0;
+ spin_unlock(&pool->lock);
}

- spin_unlock(&pool->lock);
-
/* Issue the eviction callback(s) */
if (middle_handle) {
ret = pool->ops->evict(pool, middle_handle);
@@ -595,7 +648,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
goto next;
}
next:
- spin_lock(&pool->lock);
+ if (!test_bit(PAGE_HEADLESS, &page->private))
+ z3fold_page_lock(zhdr);
clear_bit(UNDER_RECLAIM, &page->private);
if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
(zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
@@ -605,19 +659,22 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
* return success.
*/
clear_bit(PAGE_HEADLESS, &page->private);
+ if (!test_bit(PAGE_HEADLESS, &page->private))
+ z3fold_page_unlock(zhdr);
free_z3fold_page(zhdr);
atomic64_dec(&pool->pages_nr);
- spin_unlock(&pool->lock);
return 0;
} else if (!test_bit(PAGE_HEADLESS, &page->private)) {
if (zhdr->first_chunks != 0 &&
zhdr->last_chunks != 0 &&
zhdr->middle_chunks != 0) {
/* Full, add to buddied list */
+ spin_lock(&pool->lock);
list_add(&zhdr->buddy, &pool->buddied);
} else {
z3fold_compact_page(zhdr);
/* add to unbuddied list */
+ spin_lock(&pool->lock);
freechunks = num_free_chunks(zhdr);
list_add(&zhdr->buddy,
&pool->unbuddied[freechunks]);
@@ -628,6 +685,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
list_add(&page->lru, &pool->lru);
}
spin_unlock(&pool->lock);
+ if (!test_bit(PAGE_HEADLESS, &page->private))
+ z3fold_page_unlock(zhdr);
return -EAGAIN;
}

@@ -648,7 +707,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
void *addr;
enum buddy buddy;

- spin_lock(&pool->lock);
zhdr = handle_to_z3fold_header(handle);
addr = zhdr;
page = virt_to_page(zhdr);
@@ -656,6 +714,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
if (test_bit(PAGE_HEADLESS, &page->private))
goto out;

+ z3fold_page_lock(zhdr);
buddy = handle_to_buddy(handle);
switch (buddy) {
case FIRST:
@@ -674,8 +733,9 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
addr = NULL;
break;
}
+
+ z3fold_page_unlock(zhdr);
out:
- spin_unlock(&pool->lock);
return addr;
}

@@ -690,19 +750,17 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
struct page *page;
enum buddy buddy;

- spin_lock(&pool->lock);
zhdr = handle_to_z3fold_header(handle);
page = virt_to_page(zhdr);

- if (test_bit(PAGE_HEADLESS, &page->private)) {
- spin_unlock(&pool->lock);
+ if (test_bit(PAGE_HEADLESS, &page->private))
return;
- }

+ z3fold_page_lock(zhdr);
buddy = handle_to_buddy(handle);
if (buddy == MIDDLE)
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
- spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
}

/**
--
2.4.2

2016-11-15 16:00:38

by Vitaly Wool

[permalink] [raw]
Subject: [PATCH 2/3] z3fold: don't fail kernel build if z3fold_header is too big

Currently the whole kernel build will be stopped if the size of
struct z3fold_header is greater than the size of one chunk, which
is 64 bytes by default. This may stand in the way of automated
test/debug builds so let's remove that and just fail the z3fold
initialization in such case instead.

Signed-off-by: Vitaly Wool <[email protected]>
---
mm/z3fold.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 7ad70fa..ffd9353 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -870,10 +870,15 @@ MODULE_ALIAS("zpool-z3fold");

static int __init init_z3fold(void)
{
- /* Make sure the z3fold header will fit in one chunk */
- BUILD_BUG_ON(sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED);
- zpool_register_driver(&z3fold_zpool_driver);
+ /* Fail the initialization if z3fold header won't fit in one chunk */
+ if (sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED) {
+ pr_err("z3fold: z3fold_header size (%d) is bigger than "
+ "the chunk size (%d), can't proceed\n",
+ sizeof(struct z3fold_header) , ZHDR_SIZE_ALIGNED);
+ return -E2BIG;
+ }

+ zpool_register_driver(&z3fold_zpool_driver);
return 0;
}

--
2.4.2

2016-11-15 16:00:51

by Vitaly Wool

[permalink] [raw]
Subject: [PATCH 3/3] z3fold: discourage use of pages that weren't compacted

If a z3fold page couldn't be compacted, we don't want it to be
used for next object allocation in the first place. It makes more
sense to add it to the end of the relevant unbuddied list. If that
page gets compacted later, it will be added to the beginning of
the list then.

This simple idea gives 5-7% improvement in randrw fio tests and
about 10% improvement in fio sequential read/write.

Signed-off-by: Vitaly Wool <[email protected]>
---
mm/z3fold.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index ffd9353..e282ba0 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -539,11 +539,19 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
free_z3fold_page(zhdr);
atomic64_dec(&pool->pages_nr);
} else {
- z3fold_compact_page(zhdr);
+ int compacted = z3fold_compact_page(zhdr);
/* Add to the unbuddied list */
spin_lock(&pool->lock);
freechunks = num_free_chunks(zhdr);
- list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+ /*
+ * If the page has been compacted, we want to use it
+ * in the first place.
+ */
+ if (compacted)
+ list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+ else
+ list_add_tail(&zhdr->buddy,
+ &pool->unbuddied[freechunks]);
spin_unlock(&pool->lock);
z3fold_page_unlock(zhdr);
}
@@ -672,12 +680,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
spin_lock(&pool->lock);
list_add(&zhdr->buddy, &pool->buddied);
} else {
- z3fold_compact_page(zhdr);
+ int compacted = z3fold_compact_page(zhdr);
/* add to unbuddied list */
spin_lock(&pool->lock);
freechunks = num_free_chunks(zhdr);
- list_add(&zhdr->buddy,
- &pool->unbuddied[freechunks]);
+ if (compacted)
+ list_add(&zhdr->buddy,
+ &pool->unbuddied[freechunks]);
+ else
+ list_add_tail(&zhdr->buddy,
+ &pool->unbuddied[freechunks]);
}
}

--
2.4.2

2016-11-25 16:01:11

by Dan Streetman

[permalink] [raw]
Subject: Re: [PATCH 2/3] z3fold: don't fail kernel build if z3fold_header is too big

On Tue, Nov 15, 2016 at 11:00 AM, Vitaly Wool <[email protected]> wrote:
> Currently the whole kernel build will be stopped if the size of
> struct z3fold_header is greater than the size of one chunk, which
> is 64 bytes by default. This may stand in the way of automated
> test/debug builds so let's remove that and just fail the z3fold
> initialization in such case instead.
>
> Signed-off-by: Vitaly Wool <[email protected]>
> ---
> mm/z3fold.c | 11 ++++++++---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 7ad70fa..ffd9353 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -870,10 +870,15 @@ MODULE_ALIAS("zpool-z3fold");
>
> static int __init init_z3fold(void)
> {
> - /* Make sure the z3fold header will fit in one chunk */
> - BUILD_BUG_ON(sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED);

Nak. this is the wrong way to handle this. The build bug is there to
indicate to you that your patch makes the header too large, not as a
runtime check to disable everything.

The right way to handle it is to change the hardcoded assumption that
the header fits into a single chunk; e.g.:

#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
#define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)

then use ZHDR_CHUNKS in all places where it's currently assumed the
header is 1 chunk, e.g. in num_free_chunks:

if (zhdr->middle_chunks != 0) {
int nfree_before = zhdr->first_chunks ?
- 0 : zhdr->start_middle - 1;
+ 0 : zhdr->start_middle - ZHDR_CHUNKS;

after changing all needed places like that, the build bug isn't needed
anymore (unless we want to make sure the header isn't larger than some
arbitrary number N chunks)

> - zpool_register_driver(&z3fold_zpool_driver);
> + /* Fail the initialization if z3fold header won't fit in one chunk */
> + if (sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED) {
> + pr_err("z3fold: z3fold_header size (%d) is bigger than "
> + "the chunk size (%d), can't proceed\n",
> + sizeof(struct z3fold_header) , ZHDR_SIZE_ALIGNED);
> + return -E2BIG;
> + }
>
> + zpool_register_driver(&z3fold_zpool_driver);
> return 0;
> }
>
> --
> 2.4.2

2016-11-25 16:29:31

by Dan Streetman

[permalink] [raw]
Subject: Re: [PATCH 1/3] z3fold: use per-page spinlock

On Tue, Nov 15, 2016 at 11:00 AM, Vitaly Wool <[email protected]> wrote:
> Most of z3fold operations are in-page, such as modifying z3fold
> page header or moving z3fold objects within a page. Taking
> per-pool spinlock to protect per-page objects is therefore
> suboptimal, and the idea of having a per-page spinlock (or rwlock)
> has been around for some time.
>
> This patch implements raw spinlock-based per-page locking mechanism
> which is lightweight enough to normally fit ok into the z3fold
> header.
>
> Changes from v1 [1]:
> - custom locking mechanism changed to spinlocks
> - no read/write locks, just per-page spinlock
>
> Changes from v2 [2]:
> - if a page is taken off its list by z3fold_alloc(), bail out from
> z3fold_free() early
>
> Changes from v3 [3]:
> - spinlock changed to raw spinlock to avoid BUILD_BUG_ON trigger
>
> [1] https://lkml.org/lkml/2016/11/5/59
> [2] https://lkml.org/lkml/2016/11/8/400
> [3] https://lkml.org/lkml/2016/11/9/146
>
> Signed-off-by: Vitaly Wool <[email protected]>
> ---
> mm/z3fold.c | 136 +++++++++++++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 97 insertions(+), 39 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index d2e8aec..7ad70fa 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -98,6 +98,7 @@ enum buddy {
> * struct z3fold_header - z3fold page metadata occupying the first chunk of each
> * z3fold page, except for HEADLESS pages
> * @buddy: links the z3fold page into the relevant list in the pool
> + * @page_lock: per-page lock
> * @first_chunks: the size of the first buddy in chunks, 0 if free
> * @middle_chunks: the size of the middle buddy in chunks, 0 if free
> * @last_chunks: the size of the last buddy in chunks, 0 if free
> @@ -105,6 +106,7 @@ enum buddy {
> */
> struct z3fold_header {
> struct list_head buddy;
> + raw_spinlock_t page_lock;
> unsigned short first_chunks;
> unsigned short middle_chunks;
> unsigned short last_chunks;
> @@ -144,6 +146,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
> clear_bit(PAGE_HEADLESS, &page->private);
> clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
>
> + raw_spin_lock_init(&zhdr->page_lock);
> zhdr->first_chunks = 0;
> zhdr->middle_chunks = 0;
> zhdr->last_chunks = 0;
> @@ -159,6 +162,19 @@ static void free_z3fold_page(struct z3fold_header *zhdr)
> __free_page(virt_to_page(zhdr));
> }
>
> +/* Lock a z3fold page */
> +static inline void z3fold_page_lock(struct z3fold_header *zhdr)
> +{
> + raw_spin_lock(&zhdr->page_lock);
> +}
> +
> +/* Unlock a z3fold page */
> +static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
> +{
> + raw_spin_unlock(&zhdr->page_lock);
> +}
> +
> +
> /*
> * Encodes the handle of a particular buddy within a z3fold page
> * Pool lock should be held as this function accesses first_num
> @@ -347,50 +363,60 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
> bud = HEADLESS;
> else {
> chunks = size_to_chunks(size);
> - spin_lock(&pool->lock);
>
> /* First, try to find an unbuddied z3fold page. */
> zhdr = NULL;
> for_each_unbuddied_list(i, chunks) {
> - if (!list_empty(&pool->unbuddied[i])) {
> - zhdr = list_first_entry(&pool->unbuddied[i],
> + spin_lock(&pool->lock);
> + zhdr = list_first_entry_or_null(&pool->unbuddied[i],
> struct z3fold_header, buddy);
> - page = virt_to_page(zhdr);
> - if (zhdr->first_chunks == 0) {
> - if (zhdr->middle_chunks != 0 &&
> - chunks >= zhdr->start_middle)
> - bud = LAST;
> - else
> - bud = FIRST;
> - } else if (zhdr->last_chunks == 0)
> + if (!zhdr) {
> + spin_unlock(&pool->lock);
> + continue;
> + }
> + list_del_init(&zhdr->buddy);
> + spin_unlock(&pool->lock);
> +
> + page = virt_to_page(zhdr);
> + z3fold_page_lock(zhdr);
> + if (zhdr->first_chunks == 0) {
> + if (zhdr->middle_chunks != 0 &&
> + chunks >= zhdr->start_middle)
> bud = LAST;
> - else if (zhdr->middle_chunks == 0)
> - bud = MIDDLE;
> - else {
> - pr_err("No free chunks in unbuddied\n");
> - WARN_ON(1);
> - continue;
> - }
> - list_del(&zhdr->buddy);
> - goto found;
> + else
> + bud = FIRST;
> + } else if (zhdr->last_chunks == 0)
> + bud = LAST;
> + else if (zhdr->middle_chunks == 0)
> + bud = MIDDLE;
> + else {
> + spin_lock(&pool->lock);
> + list_add(&zhdr->buddy, &pool->buddied);
> + spin_unlock(&pool->lock);
> + z3fold_page_unlock(zhdr);
> + pr_err("No free chunks in unbuddied\n");
> + WARN_ON(1);
> + continue;
> }
> + goto found;
> }
> bud = FIRST;
> - spin_unlock(&pool->lock);
> }
>
> /* Couldn't find unbuddied z3fold page, create new one */
> page = alloc_page(gfp);
> if (!page)
> return -ENOMEM;
> - spin_lock(&pool->lock);
> +
> atomic64_inc(&pool->pages_nr);
> zhdr = init_z3fold_page(page);
>
> if (bud == HEADLESS) {
> set_bit(PAGE_HEADLESS, &page->private);
> + spin_lock(&pool->lock);
> goto headless;
> }
> + z3fold_page_lock(zhdr);
>
> found:
> if (bud == FIRST)
> @@ -402,6 +428,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
> zhdr->start_middle = zhdr->first_chunks + 1;
> }
>
> + spin_lock(&pool->lock);
> if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
> zhdr->middle_chunks == 0) {
> /* Add to unbuddied list */
> @@ -421,6 +448,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>
> *handle = encode_handle(zhdr, bud);
> spin_unlock(&pool->lock);
> + if (bud != HEADLESS)
> + z3fold_page_unlock(zhdr);
>
> return 0;
> }
> @@ -442,7 +471,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
> struct page *page;
> enum buddy bud;
>
> - spin_lock(&pool->lock);
> zhdr = handle_to_z3fold_header(handle);
> page = virt_to_page(zhdr);
>
> @@ -450,6 +478,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
> /* HEADLESS page stored */
> bud = HEADLESS;
> } else {
> + z3fold_page_lock(zhdr);
> bud = handle_to_buddy(handle);
>
> switch (bud) {
> @@ -466,37 +495,59 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
> default:
> pr_err("%s: unknown bud %d\n", __func__, bud);
> WARN_ON(1);
> - spin_unlock(&pool->lock);
> + z3fold_page_unlock(zhdr);
> return;
> }
> }
>
> if (test_bit(UNDER_RECLAIM, &page->private)) {
> /* z3fold page is under reclaim, reclaim will free */
> - spin_unlock(&pool->lock);
> + if (bud != HEADLESS)
> + z3fold_page_unlock(zhdr);
> return;
> }
>
> /* Remove from existing buddy list */
> - if (bud != HEADLESS)
> - list_del(&zhdr->buddy);
> + if (bud != HEADLESS) {
> + spin_lock(&pool->lock);
> + /*
> + * this object may have been removed from its list by
> + * z3fold_alloc(). In that case we just do nothing,
> + * z3fold_alloc() will allocate an object and add the page
> + * to the relevant list.
> + */

this is getting complicated enough that it may be better to switch to
kref counting to handle freeing of each page. That's going to be more
reliable as well as simpler code.

However, this code does seem correct as it is now, since
z3fold_alloc() is the *only* code that removes a page from its buddy
list without holding the page lock (right??).

> + if (!list_empty(&zhdr->buddy)) {
> + list_del(&zhdr->buddy);
> + } else {
> + spin_unlock(&pool->lock);
> + z3fold_page_unlock(zhdr);
> + return;
> + }
> + spin_unlock(&pool->lock);
> + }
>
> if (bud == HEADLESS ||
> (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
> zhdr->last_chunks == 0)) {
> /* z3fold page is empty, free */
> + spin_lock(&pool->lock);
> list_del(&page->lru);
> + spin_unlock(&pool->lock);
> clear_bit(PAGE_HEADLESS, &page->private);
> + if (bud != HEADLESS)
> + z3fold_page_unlock(zhdr);
> free_z3fold_page(zhdr);
> atomic64_dec(&pool->pages_nr);
> } else {
> z3fold_compact_page(zhdr);
> /* Add to the unbuddied list */
> + spin_lock(&pool->lock);
> freechunks = num_free_chunks(zhdr);
> list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> + spin_unlock(&pool->lock);
> + z3fold_page_unlock(zhdr);
> }
>
> - spin_unlock(&pool->lock);
> }
>
> /**
> @@ -557,6 +608,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)

while you're making these changes, can you fix another existing bug -
the main reclaim_page for loop assumes the lru list always has an
entry, but that's not guaranteed, can you change it to check
list_empty() before getting the last lru entry?

> zhdr = page_address(page);
> if (!test_bit(PAGE_HEADLESS, &page->private)) {
> list_del(&zhdr->buddy);
> + spin_unlock(&pool->lock);
> + z3fold_page_lock(zhdr);
> /*
> * We need encode the handles before unlocking, since
> * we can race with free that will set
> @@ -571,13 +624,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> middle_handle = encode_handle(zhdr, MIDDLE);
> if (zhdr->last_chunks)
> last_handle = encode_handle(zhdr, LAST);
> + z3fold_page_unlock(zhdr);
> } else {
> first_handle = encode_handle(zhdr, HEADLESS);
> last_handle = middle_handle = 0;
> + spin_unlock(&pool->lock);
> }
>
> - spin_unlock(&pool->lock);
> -
> /* Issue the eviction callback(s) */
> if (middle_handle) {
> ret = pool->ops->evict(pool, middle_handle);
> @@ -595,7 +648,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> goto next;
> }
> next:
> - spin_lock(&pool->lock);
> + if (!test_bit(PAGE_HEADLESS, &page->private))
> + z3fold_page_lock(zhdr);
> clear_bit(UNDER_RECLAIM, &page->private);
> if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
> (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
> @@ -605,19 +659,22 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> * return success.
> */
> clear_bit(PAGE_HEADLESS, &page->private);
> + if (!test_bit(PAGE_HEADLESS, &page->private))
> + z3fold_page_unlock(zhdr);

heh, well this definitely isn't correct :-)

probably should move that test_bit to before clearing the bit.

> free_z3fold_page(zhdr);
> atomic64_dec(&pool->pages_nr);
> - spin_unlock(&pool->lock);
> return 0;
> } else if (!test_bit(PAGE_HEADLESS, &page->private)) {

in this if-else, the case of HEADLESS && ret != 0 falls through
without taking the pool lock, which it's assumed to have held in the
next for loop and/or after exiting the for loop.

> if (zhdr->first_chunks != 0 &&
> zhdr->last_chunks != 0 &&
> zhdr->middle_chunks != 0) {
> /* Full, add to buddied list */
> + spin_lock(&pool->lock);
> list_add(&zhdr->buddy, &pool->buddied);
> } else {
> z3fold_compact_page(zhdr);
> /* add to unbuddied list */
> + spin_lock(&pool->lock);
> freechunks = num_free_chunks(zhdr);
> list_add(&zhdr->buddy,
> &pool->unbuddied[freechunks]);
> @@ -628,6 +685,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> list_add(&page->lru, &pool->lru);
> }
> spin_unlock(&pool->lock);
> + if (!test_bit(PAGE_HEADLESS, &page->private))
> + z3fold_page_unlock(zhdr);

this needs to be inside the for loop, otherwise you leave all the
failed-to-reclaim pages locked except the last after retries are done.

> return -EAGAIN;
> }
>
> @@ -648,7 +707,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
> void *addr;
> enum buddy buddy;
>
> - spin_lock(&pool->lock);
> zhdr = handle_to_z3fold_header(handle);
> addr = zhdr;
> page = virt_to_page(zhdr);
> @@ -656,6 +714,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
> if (test_bit(PAGE_HEADLESS, &page->private))
> goto out;
>
> + z3fold_page_lock(zhdr);
> buddy = handle_to_buddy(handle);
> switch (buddy) {
> case FIRST:
> @@ -674,8 +733,9 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
> addr = NULL;
> break;
> }
> +
> + z3fold_page_unlock(zhdr);
> out:
> - spin_unlock(&pool->lock);
> return addr;
> }
>
> @@ -690,19 +750,17 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
> struct page *page;
> enum buddy buddy;
>
> - spin_lock(&pool->lock);
> zhdr = handle_to_z3fold_header(handle);
> page = virt_to_page(zhdr);
>
> - if (test_bit(PAGE_HEADLESS, &page->private)) {
> - spin_unlock(&pool->lock);
> + if (test_bit(PAGE_HEADLESS, &page->private))
> return;
> - }
>
> + z3fold_page_lock(zhdr);
> buddy = handle_to_buddy(handle);
> if (buddy == MIDDLE)
> clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
> - spin_unlock(&pool->lock);
> + z3fold_page_unlock(zhdr);
> }
>
> /**
> --
> 2.4.2

2016-11-25 16:39:18

by Vitaly Wool

[permalink] [raw]
Subject: Re: [PATCH 2/3] z3fold: don't fail kernel build if z3fold_header is too big

On Fri, Nov 25, 2016 at 4:59 PM, Dan Streetman <[email protected]> wrote:
> On Tue, Nov 15, 2016 at 11:00 AM, Vitaly Wool <[email protected]> wrote:
>> Currently the whole kernel build will be stopped if the size of
>> struct z3fold_header is greater than the size of one chunk, which
>> is 64 bytes by default. This may stand in the way of automated
>> test/debug builds so let's remove that and just fail the z3fold
>> initialization in such case instead.
>>
>> Signed-off-by: Vitaly Wool <[email protected]>
>> ---
>> mm/z3fold.c | 11 ++++++++---
>> 1 file changed, 8 insertions(+), 3 deletions(-)
>>
>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>> index 7ad70fa..ffd9353 100644
>> --- a/mm/z3fold.c
>> +++ b/mm/z3fold.c
>> @@ -870,10 +870,15 @@ MODULE_ALIAS("zpool-z3fold");
>>
>> static int __init init_z3fold(void)
>> {
>> - /* Make sure the z3fold header will fit in one chunk */
>> - BUILD_BUG_ON(sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED);
>
> Nak. this is the wrong way to handle this. The build bug is there to
> indicate to you that your patch makes the header too large, not as a
> runtime check to disable everything.

Okay, let's agree to drop it.

> The right way to handle it is to change the hardcoded assumption that
> the header fits into a single chunk; e.g.:
>
> #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
> #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
>
> then use ZHDR_CHUNKS in all places where it's currently assumed the
> header is 1 chunk, e.g. in num_free_chunks:
>
> if (zhdr->middle_chunks != 0) {
> int nfree_before = zhdr->first_chunks ?
> - 0 : zhdr->start_middle - 1;
> + 0 : zhdr->start_middle - ZHDR_CHUNKS;
>
> after changing all needed places like that, the build bug isn't needed
> anymore (unless we want to make sure the header isn't larger than some
> arbitrary number N chunks)

That sounds overly complicated to me. I would rather use bit_spin_lock
as Arnd suggested. What would you say?

~vitaly

2016-11-25 18:26:56

by Dan Streetman

[permalink] [raw]
Subject: Re: [PATCH 3/3] z3fold: discourage use of pages that weren't compacted

On Tue, Nov 15, 2016 at 11:00 AM, Vitaly Wool <[email protected]> wrote:
> If a z3fold page couldn't be compacted, we don't want it to be
> used for next object allocation in the first place.

why? !compacted can only mean 1) already compact or 2) middle chunks
is mapped. #1 is as good compaction-wise as the page can get, so do
you mean that if a page couldn't be compacted because of #2, we
shouldn't use it for next allocation? if so, that isn't quite what
this patch does.

> It makes more
> sense to add it to the end of the relevant unbuddied list. If that
> page gets compacted later, it will be added to the beginning of
> the list then.
>
> This simple idea gives 5-7% improvement in randrw fio tests and
> about 10% improvement in fio sequential read/write.

i don't understand why there is any improvement - the unbuddied lists
are grouped by the amount of free chunks, so all pages in a specific
unbuddied list should have exactly that number of free chunks
available, and it shouldn't matter if a page gets put into the front
or back...where is the performance improvement coming from?

>
> Signed-off-by: Vitaly Wool <[email protected]>
> ---
> mm/z3fold.c | 22 +++++++++++++++++-----
> 1 file changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index ffd9353..e282ba0 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -539,11 +539,19 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
> free_z3fold_page(zhdr);
> atomic64_dec(&pool->pages_nr);
> } else {
> - z3fold_compact_page(zhdr);
> + int compacted = z3fold_compact_page(zhdr);
> /* Add to the unbuddied list */
> spin_lock(&pool->lock);
> freechunks = num_free_chunks(zhdr);
> - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> + /*
> + * If the page has been compacted, we want to use it
> + * in the first place.
> + */
> + if (compacted)
> + list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> + else
> + list_add_tail(&zhdr->buddy,
> + &pool->unbuddied[freechunks]);
> spin_unlock(&pool->lock);
> z3fold_page_unlock(zhdr);
> }
> @@ -672,12 +680,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> spin_lock(&pool->lock);
> list_add(&zhdr->buddy, &pool->buddied);
> } else {
> - z3fold_compact_page(zhdr);
> + int compacted = z3fold_compact_page(zhdr);
> /* add to unbuddied list */
> spin_lock(&pool->lock);
> freechunks = num_free_chunks(zhdr);
> - list_add(&zhdr->buddy,
> - &pool->unbuddied[freechunks]);
> + if (compacted)
> + list_add(&zhdr->buddy,
> + &pool->unbuddied[freechunks]);
> + else
> + list_add_tail(&zhdr->buddy,
> + &pool->unbuddied[freechunks]);
> }
> }
>
> --
> 2.4.2

2016-11-25 18:34:33

by Dan Streetman

[permalink] [raw]
Subject: Re: [PATCH 2/3] z3fold: don't fail kernel build if z3fold_header is too big

On Fri, Nov 25, 2016 at 11:25 AM, Vitaly Wool <[email protected]> wrote:
> On Fri, Nov 25, 2016 at 4:59 PM, Dan Streetman <[email protected]> wrote:
>> On Tue, Nov 15, 2016 at 11:00 AM, Vitaly Wool <[email protected]> wrote:
>>> Currently the whole kernel build will be stopped if the size of
>>> struct z3fold_header is greater than the size of one chunk, which
>>> is 64 bytes by default. This may stand in the way of automated
>>> test/debug builds so let's remove that and just fail the z3fold
>>> initialization in such case instead.
>>>
>>> Signed-off-by: Vitaly Wool <[email protected]>
>>> ---
>>> mm/z3fold.c | 11 ++++++++---
>>> 1 file changed, 8 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>>> index 7ad70fa..ffd9353 100644
>>> --- a/mm/z3fold.c
>>> +++ b/mm/z3fold.c
>>> @@ -870,10 +870,15 @@ MODULE_ALIAS("zpool-z3fold");
>>>
>>> static int __init init_z3fold(void)
>>> {
>>> - /* Make sure the z3fold header will fit in one chunk */
>>> - BUILD_BUG_ON(sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED);
>>
>> Nak. this is the wrong way to handle this. The build bug is there to
>> indicate to you that your patch makes the header too large, not as a
>> runtime check to disable everything.
>
> Okay, let's agree to drop it.
>
>> The right way to handle it is to change the hardcoded assumption that
>> the header fits into a single chunk; e.g.:
>>
>> #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
>> #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
>>
>> then use ZHDR_CHUNKS in all places where it's currently assumed the
>> header is 1 chunk, e.g. in num_free_chunks:
>>
>> if (zhdr->middle_chunks != 0) {
>> int nfree_before = zhdr->first_chunks ?
>> - 0 : zhdr->start_middle - 1;
>> + 0 : zhdr->start_middle - ZHDR_CHUNKS;
>>
>> after changing all needed places like that, the build bug isn't needed
>> anymore (unless we want to make sure the header isn't larger than some
>> arbitrary number N chunks)
>
> That sounds overly complicated to me. I would rather use bit_spin_lock
> as Arnd suggested. What would you say?

using the correctly-calculated header size instead of a hardcoded
value is overly complicated? i don't agree with that...i'd say it
should have been done in the first place ;-)

bit spin locks are hard to debug and slower and should only be used
where space really is absolutely required to be minimal, which
definitely isn't the case here. this should use regular spin locks
and change the hardcoded assumption of zhdr size < chunk size (which
always was a bad assumption) to calculate it correctly. it's really
not that hard; there are only a few places where the offset position
of the chunks is calculated.


>
> ~vitaly

2016-11-26 09:12:21

by Vitaly Wool

[permalink] [raw]
Subject: Re: [PATCH 2/3] z3fold: don't fail kernel build if z3fold_header is too big

On Fri, Nov 25, 2016 at 7:33 PM, Dan Streetman <[email protected]> wrote:
> On Fri, Nov 25, 2016 at 11:25 AM, Vitaly Wool <[email protected]> wrote:
>> On Fri, Nov 25, 2016 at 4:59 PM, Dan Streetman <[email protected]> wrote:
>>> On Tue, Nov 15, 2016 at 11:00 AM, Vitaly Wool <[email protected]> wrote:
>>>> Currently the whole kernel build will be stopped if the size of
>>>> struct z3fold_header is greater than the size of one chunk, which
>>>> is 64 bytes by default. This may stand in the way of automated
>>>> test/debug builds so let's remove that and just fail the z3fold
>>>> initialization in such case instead.
>>>>
>>>> Signed-off-by: Vitaly Wool <[email protected]>
>>>> ---
>>>> mm/z3fold.c | 11 ++++++++---
>>>> 1 file changed, 8 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>>>> index 7ad70fa..ffd9353 100644
>>>> --- a/mm/z3fold.c
>>>> +++ b/mm/z3fold.c
>>>> @@ -870,10 +870,15 @@ MODULE_ALIAS("zpool-z3fold");
>>>>
>>>> static int __init init_z3fold(void)
>>>> {
>>>> - /* Make sure the z3fold header will fit in one chunk */
>>>> - BUILD_BUG_ON(sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED);
>>>
>>> Nak. this is the wrong way to handle this. The build bug is there to
>>> indicate to you that your patch makes the header too large, not as a
>>> runtime check to disable everything.
>>
>> Okay, let's agree to drop it.
>>
>>> The right way to handle it is to change the hardcoded assumption that
>>> the header fits into a single chunk; e.g.:
>>>
>>> #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
>>> #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
>>>
>>> then use ZHDR_CHUNKS in all places where it's currently assumed the
>>> header is 1 chunk, e.g. in num_free_chunks:
>>>
>>> if (zhdr->middle_chunks != 0) {
>>> int nfree_before = zhdr->first_chunks ?
>>> - 0 : zhdr->start_middle - 1;
>>> + 0 : zhdr->start_middle - ZHDR_CHUNKS;
>>>
>>> after changing all needed places like that, the build bug isn't needed
>>> anymore (unless we want to make sure the header isn't larger than some
>>> arbitrary number N chunks)
>>
>> That sounds overly complicated to me. I would rather use bit_spin_lock
>> as Arnd suggested. What would you say?
>
> using the correctly-calculated header size instead of a hardcoded
> value is overly complicated? i don't agree with that...i'd say it
> should have been done in the first place ;-)
>
> bit spin locks are hard to debug and slower and should only be used
> where space really is absolutely required to be minimal, which
> definitely isn't the case here. this should use regular spin locks
> and change the hardcoded assumption of zhdr size < chunk size (which
> always was a bad assumption) to calculate it correctly. it's really
> not that hard; there are only a few places where the offset position
> of the chunks is calculated.

I gave this a second thought after having run some quick benchmarking
using bit_spin_lock. The perofrmance drop is substantial, so your
suggestion is the way to go, thanks.

~vitaly

2016-11-28 14:14:50

by Vitaly Wool

[permalink] [raw]
Subject: Re: [PATCH 3/3] z3fold: discourage use of pages that weren't compacted

On Fri, Nov 25, 2016 at 7:25 PM, Dan Streetman <[email protected]> wrote:
> On Tue, Nov 15, 2016 at 11:00 AM, Vitaly Wool <[email protected]> wrote:
>> If a z3fold page couldn't be compacted, we don't want it to be
>> used for next object allocation in the first place.
>
> why? !compacted can only mean 1) already compact or 2) middle chunks
> is mapped. #1 is as good compaction-wise as the page can get, so do
> you mean that if a page couldn't be compacted because of #2, we
> shouldn't use it for next allocation? if so, that isn't quite what
> this patch does.
>
>> It makes more
>> sense to add it to the end of the relevant unbuddied list. If that
>> page gets compacted later, it will be added to the beginning of
>> the list then.
>>
>> This simple idea gives 5-7% improvement in randrw fio tests and
>> about 10% improvement in fio sequential read/write.
>
> i don't understand why there is any improvement - the unbuddied lists
> are grouped by the amount of free chunks, so all pages in a specific
> unbuddied list should have exactly that number of free chunks
> available, and it shouldn't matter if a page gets put into the front
> or back...where is the performance improvement coming from?

When the next attempt to compact this page comes, it's less likely
it's locked so the wait times are slightly lower in average.

~vitaly

>> Signed-off-by: Vitaly Wool <[email protected]>
>> ---
>> mm/z3fold.c | 22 +++++++++++++++++-----
>> 1 file changed, 17 insertions(+), 5 deletions(-)
>>
>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>> index ffd9353..e282ba0 100644
>> --- a/mm/z3fold.c
>> +++ b/mm/z3fold.c
>> @@ -539,11 +539,19 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>> free_z3fold_page(zhdr);
>> atomic64_dec(&pool->pages_nr);
>> } else {
>> - z3fold_compact_page(zhdr);
>> + int compacted = z3fold_compact_page(zhdr);
>> /* Add to the unbuddied list */
>> spin_lock(&pool->lock);
>> freechunks = num_free_chunks(zhdr);
>> - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>> + /*
>> + * If the page has been compacted, we want to use it
>> + * in the first place.
>> + */
>> + if (compacted)
>> + list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>> + else
>> + list_add_tail(&zhdr->buddy,
>> + &pool->unbuddied[freechunks]);
>> spin_unlock(&pool->lock);
>> z3fold_page_unlock(zhdr);
>> }
>> @@ -672,12 +680,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>> spin_lock(&pool->lock);
>> list_add(&zhdr->buddy, &pool->buddied);
>> } else {
>> - z3fold_compact_page(zhdr);
>> + int compacted = z3fold_compact_page(zhdr);
>> /* add to unbuddied list */
>> spin_lock(&pool->lock);
>> freechunks = num_free_chunks(zhdr);
>> - list_add(&zhdr->buddy,
>> - &pool->unbuddied[freechunks]);
>> + if (compacted)
>> + list_add(&zhdr->buddy,
>> + &pool->unbuddied[freechunks]);
>> + else
>> + list_add_tail(&zhdr->buddy,
>> + &pool->unbuddied[freechunks]);
>> }
>> }
>>
>> --
>> 2.4.2

2016-11-29 22:28:42

by Dan Streetman

[permalink] [raw]
Subject: Re: [PATCH 3/3] z3fold: discourage use of pages that weren't compacted

On Mon, Nov 28, 2016 at 9:14 AM, Vitaly Wool <[email protected]> wrote:
> On Fri, Nov 25, 2016 at 7:25 PM, Dan Streetman <[email protected]> wrote:
>> On Tue, Nov 15, 2016 at 11:00 AM, Vitaly Wool <[email protected]> wrote:
>>> If a z3fold page couldn't be compacted, we don't want it to be
>>> used for next object allocation in the first place.
>>
>> why? !compacted can only mean 1) already compact or 2) middle chunks
>> is mapped. #1 is as good compaction-wise as the page can get, so do
>> you mean that if a page couldn't be compacted because of #2, we
>> shouldn't use it for next allocation? if so, that isn't quite what
>> this patch does.
>>
>>> It makes more
>>> sense to add it to the end of the relevant unbuddied list. If that
>>> page gets compacted later, it will be added to the beginning of
>>> the list then.
>>>
>>> This simple idea gives 5-7% improvement in randrw fio tests and
>>> about 10% improvement in fio sequential read/write.
>>
>> i don't understand why there is any improvement - the unbuddied lists
>> are grouped by the amount of free chunks, so all pages in a specific
>> unbuddied list should have exactly that number of free chunks
>> available, and it shouldn't matter if a page gets put into the front
>> or back...where is the performance improvement coming from?
>
> When the next attempt to compact this page comes, it's less likely
> it's locked so the wait times are slightly lower in average.

which wait time? for a page with the middle chunk mapped compact
should exit immediately...?

>
> ~vitaly
>
>>> Signed-off-by: Vitaly Wool <[email protected]>
>>> ---
>>> mm/z3fold.c | 22 +++++++++++++++++-----
>>> 1 file changed, 17 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>>> index ffd9353..e282ba0 100644
>>> --- a/mm/z3fold.c
>>> +++ b/mm/z3fold.c
>>> @@ -539,11 +539,19 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>> free_z3fold_page(zhdr);
>>> atomic64_dec(&pool->pages_nr);
>>> } else {
>>> - z3fold_compact_page(zhdr);
>>> + int compacted = z3fold_compact_page(zhdr);
>>> /* Add to the unbuddied list */
>>> spin_lock(&pool->lock);
>>> freechunks = num_free_chunks(zhdr);
>>> - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>> + /*
>>> + * If the page has been compacted, we want to use it
>>> + * in the first place.
>>> + */
>>> + if (compacted)
>>> + list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>> + else
>>> + list_add_tail(&zhdr->buddy,
>>> + &pool->unbuddied[freechunks]);
>>> spin_unlock(&pool->lock);
>>> z3fold_page_unlock(zhdr);
>>> }
>>> @@ -672,12 +680,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>> spin_lock(&pool->lock);
>>> list_add(&zhdr->buddy, &pool->buddied);
>>> } else {
>>> - z3fold_compact_page(zhdr);
>>> + int compacted = z3fold_compact_page(zhdr);
>>> /* add to unbuddied list */
>>> spin_lock(&pool->lock);
>>> freechunks = num_free_chunks(zhdr);
>>> - list_add(&zhdr->buddy,
>>> - &pool->unbuddied[freechunks]);
>>> + if (compacted)
>>> + list_add(&zhdr->buddy,
>>> + &pool->unbuddied[freechunks]);
>>> + else
>>> + list_add_tail(&zhdr->buddy,
>>> + &pool->unbuddied[freechunks]);
>>> }
>>> }
>>>
>>> --
>>> 2.4.2