2016-12-26 00:30:33

by Vitaly Wool

[permalink] [raw]
Subject: [PATCH/RESEND 0/5] z3fold optimizations and fixes

This is a consolidation of z3fold optimizations and fixes done so far, revised after comments from Dan [1].
The coming patches are to be applied on top of the following commit:

commit 07cfe852286d5e314f8cd19781444e12a2b6cdf3
Author: zhong jiang <[email protected]>
Date: Tue Dec 20 11:53:40 2016 +1100

mm/z3fold.c: limit first_num to the actual range of possible buddy indexes

All the z3fold patches newer than this one are considered obsolete and should be substituted with this patch series. The coming patches have been verified with linux-next tree.

[1] https://lkml.org/lkml/2016/11/29/969


2016-12-26 00:33:34

by Vitaly Wool

[permalink] [raw]
Subject: [PATCH/RESEND 1/5] mm/z3fold.c: make pages_nr atomic

Convert pages_nr per-pool counter to atomic64_t so that we won't have
to care about locking for reading/updating it.

Signed-off-by: Vitaly Wool <[email protected]>
---
mm/z3fold.c | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 207e5dd..2273789 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -80,7 +80,7 @@ struct z3fold_pool {
struct list_head unbuddied[NCHUNKS];
struct list_head buddied;
struct list_head lru;
- u64 pages_nr;
+ atomic64_t pages_nr;
const struct z3fold_ops *ops;
struct zpool *zpool;
const struct zpool_ops *zpool_ops;
@@ -238,7 +238,7 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
INIT_LIST_HEAD(&pool->unbuddied[i]);
INIT_LIST_HEAD(&pool->buddied);
INIT_LIST_HEAD(&pool->lru);
- pool->pages_nr = 0;
+ atomic64_set(&pool->pages_nr, 0);
pool->ops = ops;
return pool;
}
@@ -350,7 +350,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
if (!page)
return -ENOMEM;
spin_lock(&pool->lock);
- pool->pages_nr++;
+ atomic64_inc(&pool->pages_nr);
zhdr = init_z3fold_page(page);

if (bud == HEADLESS) {
@@ -443,10 +443,9 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
return;
}

- if (bud != HEADLESS) {
- /* Remove from existing buddy list */
+ /* Remove from existing buddy list */
+ if (bud != HEADLESS)
list_del(&zhdr->buddy);
- }

if (bud == HEADLESS ||
(zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
@@ -455,7 +454,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
list_del(&page->lru);
clear_bit(PAGE_HEADLESS, &page->private);
free_z3fold_page(zhdr);
- pool->pages_nr--;
+ atomic64_dec(&pool->pages_nr);
} else {
z3fold_compact_page(zhdr);
/* Add to the unbuddied list */
@@ -573,7 +572,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
*/
clear_bit(PAGE_HEADLESS, &page->private);
free_z3fold_page(zhdr);
- pool->pages_nr--;
+ atomic64_dec(&pool->pages_nr);
spin_unlock(&pool->lock);
return 0;
} else if (!test_bit(PAGE_HEADLESS, &page->private)) {
@@ -676,12 +675,11 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
* z3fold_get_pool_size() - gets the z3fold pool size in pages
* @pool: pool whose size is being queried
*
- * Returns: size in pages of the given pool. The pool lock need not be
- * taken to access pages_nr.
+ * Returns: size in pages of the given pool.
*/
static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
{
- return pool->pages_nr;
+ return atomic64_read(&pool->pages_nr);
}

/*****************
--
2.4.2

2016-12-26 00:36:03

by Vitaly Wool

[permalink] [raw]
Subject: [PATCH/RESEND 2/5] mm/z3fold.c: extend compaction function

z3fold_compact_page() currently only handles the situation where there's a
single middle chunk within the z3fold page. However it may be worth it to
move middle chunk closer to either first or last chunk, whichever is
there, if the gap between them is big enough.

Basically compression ratio wise, it always makes sense to move middle
chunk as close as possible to another in-page z3fold object, because then
the third object can use all the remaining space. However, moving big
object just by one chunk will hurt performance without gaining much
compression ratio wise. So the gap between the middle object and the edge
object should be big enough to justify the move.

So this patch improves compression ratio because in-page compaction
becomes more comprehensive; this patch (which came as a surprise) also
increases performance in fio randrw tests (I am not 100% sure why, but
probably due to less actual page allocations on hot path due to denser
in-page allocation).

This patch adds the relevant code, using BIG_CHUNK_GAP define as a
threshold for middle chunk to be worth moving.

Signed-off-by: Vitaly Wool <[email protected]>
---
mm/z3fold.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 47 insertions(+), 13 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 2273789..d2e8aec 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -254,26 +254,60 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
kfree(pool);
}

+static inline void *mchunk_memmove(struct z3fold_header *zhdr,
+ unsigned short dst_chunk)
+{
+ void *beg = zhdr;
+ return memmove(beg + (dst_chunk << CHUNK_SHIFT),
+ beg + (zhdr->start_middle << CHUNK_SHIFT),
+ zhdr->middle_chunks << CHUNK_SHIFT);
+}
+
+#define BIG_CHUNK_GAP 3
/* Has to be called with lock held */
static int z3fold_compact_page(struct z3fold_header *zhdr)
{
struct page *page = virt_to_page(zhdr);
- void *beg = zhdr;
+ int ret = 0;
+
+ if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
+ goto out;

+ if (zhdr->middle_chunks != 0) {
+ if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+ mchunk_memmove(zhdr, 1); /* move to the beginning */
+ zhdr->first_chunks = zhdr->middle_chunks;
+ zhdr->middle_chunks = 0;
+ zhdr->start_middle = 0;
+ zhdr->first_num++;
+ ret = 1;
+ goto out;
+ }

- if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
- zhdr->middle_chunks != 0 &&
- zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
- memmove(beg + ZHDR_SIZE_ALIGNED,
- beg + (zhdr->start_middle << CHUNK_SHIFT),
- zhdr->middle_chunks << CHUNK_SHIFT);
- zhdr->first_chunks = zhdr->middle_chunks;
- zhdr->middle_chunks = 0;
- zhdr->start_middle = 0;
- zhdr->first_num++;
- return 1;
+ /*
+ * moving data is expensive, so let's only do that if
+ * there's substantial gain (at least BIG_CHUNK_GAP chunks)
+ */
+ if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
+ zhdr->start_middle > zhdr->first_chunks + BIG_CHUNK_GAP) {
+ mchunk_memmove(zhdr, zhdr->first_chunks + 1);
+ zhdr->start_middle = zhdr->first_chunks + 1;
+ ret = 1;
+ goto out;
+ }
+ if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
+ zhdr->middle_chunks + zhdr->last_chunks <=
+ NCHUNKS - zhdr->start_middle - BIG_CHUNK_GAP) {
+ unsigned short new_start = NCHUNKS - zhdr->last_chunks -
+ zhdr->middle_chunks;
+ mchunk_memmove(zhdr, new_start);
+ zhdr->start_middle = new_start;
+ ret = 1;
+ goto out;
+ }
}
- return 0;
+out:
+ return ret;
}

/**
--
2.4.2

2016-12-26 00:36:35

by Vitaly Wool

[permalink] [raw]
Subject: [PATCH/RESEND 2/5] mm/z3fold.c: extend compaction function

z3fold_compact_page() currently only handles the situation where there's a
single middle chunk within the z3fold page. However it may be worth it to
move middle chunk closer to either first or last chunk, whichever is
there, if the gap between them is big enough.

Basically compression ratio wise, it always makes sense to move middle
chunk as close as possible to another in-page z3fold object, because then
the third object can use all the remaining space. However, moving big
object just by one chunk will hurt performance without gaining much
compression ratio wise. So the gap between the middle object and the edge
object should be big enough to justify the move.

So this patch improves compression ratio because in-page compaction
becomes more comprehensive; this patch (which came as a surprise) also
increases performance in fio randrw tests (I am not 100% sure why, but
probably due to less actual page allocations on hot path due to denser
in-page allocation).

This patch adds the relevant code, using BIG_CHUNK_GAP define as a
threshold for middle chunk to be worth moving.

Signed-off-by: Vitaly Wool <[email protected]>
---
mm/z3fold.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 47 insertions(+), 13 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 2273789..d2e8aec 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -254,26 +254,60 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
kfree(pool);
}

+static inline void *mchunk_memmove(struct z3fold_header *zhdr,
+ unsigned short dst_chunk)
+{
+ void *beg = zhdr;
+ return memmove(beg + (dst_chunk << CHUNK_SHIFT),
+ beg + (zhdr->start_middle << CHUNK_SHIFT),
+ zhdr->middle_chunks << CHUNK_SHIFT);
+}
+
+#define BIG_CHUNK_GAP 3
/* Has to be called with lock held */
static int z3fold_compact_page(struct z3fold_header *zhdr)
{
struct page *page = virt_to_page(zhdr);
- void *beg = zhdr;
+ int ret = 0;
+
+ if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
+ goto out;

+ if (zhdr->middle_chunks != 0) {
+ if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+ mchunk_memmove(zhdr, 1); /* move to the beginning */
+ zhdr->first_chunks = zhdr->middle_chunks;
+ zhdr->middle_chunks = 0;
+ zhdr->start_middle = 0;
+ zhdr->first_num++;
+ ret = 1;
+ goto out;
+ }

- if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
- zhdr->middle_chunks != 0 &&
- zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
- memmove(beg + ZHDR_SIZE_ALIGNED,
- beg + (zhdr->start_middle << CHUNK_SHIFT),
- zhdr->middle_chunks << CHUNK_SHIFT);
- zhdr->first_chunks = zhdr->middle_chunks;
- zhdr->middle_chunks = 0;
- zhdr->start_middle = 0;
- zhdr->first_num++;
- return 1;
+ /*
+ * moving data is expensive, so let's only do that if
+ * there's substantial gain (at least BIG_CHUNK_GAP chunks)
+ */
+ if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
+ zhdr->start_middle > zhdr->first_chunks + BIG_CHUNK_GAP) {
+ mchunk_memmove(zhdr, zhdr->first_chunks + 1);
+ zhdr->start_middle = zhdr->first_chunks + 1;
+ ret = 1;
+ goto out;
+ }
+ if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
+ zhdr->middle_chunks + zhdr->last_chunks <=
+ NCHUNKS - zhdr->start_middle - BIG_CHUNK_GAP) {
+ unsigned short new_start = NCHUNKS - zhdr->last_chunks -
+ zhdr->middle_chunks;
+ mchunk_memmove(zhdr, new_start);
+ zhdr->start_middle = new_start;
+ ret = 1;
+ goto out;
+ }
}
- return 0;
+out:
+ return ret;
}

/**
--
2.4.2

2016-12-26 00:37:49

by Vitaly Wool

[permalink] [raw]
Subject: [PATCH/RESEND 3/5] z3fold: use per-page spinlock

Most of z3fold operations are in-page, such as modifying z3fold page
header or moving z3fold objects within a page. Taking per-pool spinlock
to protect per-page objects is therefore suboptimal, and the idea of
having a per-page spinlock (or rwlock) has been around for some time.

This patch implements raw spinlock-based per-page locking mechanism which
is lightweight enough to normally fit ok into the z3fold header.

Signed-off-by: Vitaly Wool <[email protected]>
---
mm/z3fold.c | 148 +++++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 106 insertions(+), 42 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index d2e8aec..28c0a2d 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -98,6 +98,7 @@ enum buddy {
* struct z3fold_header - z3fold page metadata occupying the first chunk of each
* z3fold page, except for HEADLESS pages
* @buddy: links the z3fold page into the relevant list in the pool
+ * @page_lock: per-page lock
* @first_chunks: the size of the first buddy in chunks, 0 if free
* @middle_chunks: the size of the middle buddy in chunks, 0 if free
* @last_chunks: the size of the last buddy in chunks, 0 if free
@@ -105,6 +106,7 @@ enum buddy {
*/
struct z3fold_header {
struct list_head buddy;
+ raw_spinlock_t page_lock;
unsigned short first_chunks;
unsigned short middle_chunks;
unsigned short last_chunks;
@@ -144,6 +146,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
clear_bit(PAGE_HEADLESS, &page->private);
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);

+ raw_spin_lock_init(&zhdr->page_lock);
zhdr->first_chunks = 0;
zhdr->middle_chunks = 0;
zhdr->last_chunks = 0;
@@ -159,6 +162,19 @@ static void free_z3fold_page(struct z3fold_header *zhdr)
__free_page(virt_to_page(zhdr));
}

+/* Lock a z3fold page */
+static inline void z3fold_page_lock(struct z3fold_header *zhdr)
+{
+ raw_spin_lock(&zhdr->page_lock);
+}
+
+/* Unlock a z3fold page */
+static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
+{
+ raw_spin_unlock(&zhdr->page_lock);
+}
+
+
/*
* Encodes the handle of a particular buddy within a z3fold page
* Pool lock should be held as this function accesses first_num
@@ -347,50 +363,60 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
bud = HEADLESS;
else {
chunks = size_to_chunks(size);
- spin_lock(&pool->lock);

/* First, try to find an unbuddied z3fold page. */
zhdr = NULL;
for_each_unbuddied_list(i, chunks) {
- if (!list_empty(&pool->unbuddied[i])) {
- zhdr = list_first_entry(&pool->unbuddied[i],
+ spin_lock(&pool->lock);
+ zhdr = list_first_entry_or_null(&pool->unbuddied[i],
struct z3fold_header, buddy);
- page = virt_to_page(zhdr);
- if (zhdr->first_chunks == 0) {
- if (zhdr->middle_chunks != 0 &&
- chunks >= zhdr->start_middle)
- bud = LAST;
- else
- bud = FIRST;
- } else if (zhdr->last_chunks == 0)
+ if (!zhdr) {
+ spin_unlock(&pool->lock);
+ continue;
+ }
+ list_del_init(&zhdr->buddy);
+ spin_unlock(&pool->lock);
+
+ page = virt_to_page(zhdr);
+ z3fold_page_lock(zhdr);
+ if (zhdr->first_chunks == 0) {
+ if (zhdr->middle_chunks != 0 &&
+ chunks >= zhdr->start_middle)
bud = LAST;
- else if (zhdr->middle_chunks == 0)
- bud = MIDDLE;
- else {
- pr_err("No free chunks in unbuddied\n");
- WARN_ON(1);
- continue;
- }
- list_del(&zhdr->buddy);
- goto found;
+ else
+ bud = FIRST;
+ } else if (zhdr->last_chunks == 0)
+ bud = LAST;
+ else if (zhdr->middle_chunks == 0)
+ bud = MIDDLE;
+ else {
+ spin_lock(&pool->lock);
+ list_add(&zhdr->buddy, &pool->buddied);
+ spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
+ pr_err("No free chunks in unbuddied\n");
+ WARN_ON(1);
+ continue;
}
+ goto found;
}
bud = FIRST;
- spin_unlock(&pool->lock);
}

/* Couldn't find unbuddied z3fold page, create new one */
page = alloc_page(gfp);
if (!page)
return -ENOMEM;
- spin_lock(&pool->lock);
+
atomic64_inc(&pool->pages_nr);
zhdr = init_z3fold_page(page);

if (bud == HEADLESS) {
set_bit(PAGE_HEADLESS, &page->private);
+ spin_lock(&pool->lock);
goto headless;
}
+ z3fold_page_lock(zhdr);

found:
if (bud == FIRST)
@@ -402,6 +428,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
zhdr->start_middle = zhdr->first_chunks + 1;
}

+ spin_lock(&pool->lock);
if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
zhdr->middle_chunks == 0) {
/* Add to unbuddied list */
@@ -421,6 +448,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,

*handle = encode_handle(zhdr, bud);
spin_unlock(&pool->lock);
+ if (bud != HEADLESS)
+ z3fold_page_unlock(zhdr);

return 0;
}
@@ -442,7 +471,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
struct page *page;
enum buddy bud;

- spin_lock(&pool->lock);
zhdr = handle_to_z3fold_header(handle);
page = virt_to_page(zhdr);

@@ -450,6 +478,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
/* HEADLESS page stored */
bud = HEADLESS;
} else {
+ z3fold_page_lock(zhdr);
bud = handle_to_buddy(handle);

switch (bud) {
@@ -466,37 +495,59 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
default:
pr_err("%s: unknown bud %d\n", __func__, bud);
WARN_ON(1);
- spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
return;
}
}

if (test_bit(UNDER_RECLAIM, &page->private)) {
/* z3fold page is under reclaim, reclaim will free */
- spin_unlock(&pool->lock);
+ if (bud != HEADLESS)
+ z3fold_page_unlock(zhdr);
return;
}

/* Remove from existing buddy list */
- if (bud != HEADLESS)
- list_del(&zhdr->buddy);
+ if (bud != HEADLESS) {
+ spin_lock(&pool->lock);
+ /*
+ * this object may have been removed from its list by
+ * z3fold_alloc(). In that case we just do nothing,
+ * z3fold_alloc() will allocate an object and add the page
+ * to the relevant list.
+ */
+ if (!list_empty(&zhdr->buddy)) {
+ list_del(&zhdr->buddy);
+ } else {
+ spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
+ return;
+ }
+ spin_unlock(&pool->lock);
+ }

if (bud == HEADLESS ||
(zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
zhdr->last_chunks == 0)) {
/* z3fold page is empty, free */
+ spin_lock(&pool->lock);
list_del(&page->lru);
+ spin_unlock(&pool->lock);
clear_bit(PAGE_HEADLESS, &page->private);
+ if (bud != HEADLESS)
+ z3fold_page_unlock(zhdr);
free_z3fold_page(zhdr);
atomic64_dec(&pool->pages_nr);
} else {
z3fold_compact_page(zhdr);
/* Add to the unbuddied list */
+ spin_lock(&pool->lock);
freechunks = num_free_chunks(zhdr);
list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+ spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
}

- spin_unlock(&pool->lock);
}

/**
@@ -543,12 +594,15 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;

spin_lock(&pool->lock);
- if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
- retries == 0) {
+ if (!pool->ops || !pool->ops->evict || retries == 0) {
spin_unlock(&pool->lock);
return -EINVAL;
}
for (i = 0; i < retries; i++) {
+ if (list_empty(&pool->lru)) {
+ spin_unlock(&pool->lock);
+ return -EINVAL;
+ }
page = list_last_entry(&pool->lru, struct page, lru);
list_del(&page->lru);

@@ -557,6 +611,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
zhdr = page_address(page);
if (!test_bit(PAGE_HEADLESS, &page->private)) {
list_del(&zhdr->buddy);
+ spin_unlock(&pool->lock);
+ z3fold_page_lock(zhdr);
/*
* We need encode the handles before unlocking, since
* we can race with free that will set
@@ -571,13 +627,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
middle_handle = encode_handle(zhdr, MIDDLE);
if (zhdr->last_chunks)
last_handle = encode_handle(zhdr, LAST);
+ z3fold_page_unlock(zhdr);
} else {
first_handle = encode_handle(zhdr, HEADLESS);
last_handle = middle_handle = 0;
+ spin_unlock(&pool->lock);
}

- spin_unlock(&pool->lock);
-
/* Issue the eviction callback(s) */
if (middle_handle) {
ret = pool->ops->evict(pool, middle_handle);
@@ -595,7 +651,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
goto next;
}
next:
- spin_lock(&pool->lock);
+ if (!test_bit(PAGE_HEADLESS, &page->private))
+ z3fold_page_lock(zhdr);
clear_bit(UNDER_RECLAIM, &page->private);
if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
(zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
@@ -604,26 +661,34 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
* All buddies are now free, free the z3fold page and
* return success.
*/
- clear_bit(PAGE_HEADLESS, &page->private);
+ if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
+ z3fold_page_unlock(zhdr);
free_z3fold_page(zhdr);
atomic64_dec(&pool->pages_nr);
- spin_unlock(&pool->lock);
return 0;
} else if (!test_bit(PAGE_HEADLESS, &page->private)) {
if (zhdr->first_chunks != 0 &&
zhdr->last_chunks != 0 &&
zhdr->middle_chunks != 0) {
/* Full, add to buddied list */
+ spin_lock(&pool->lock);
list_add(&zhdr->buddy, &pool->buddied);
+ spin_unlock(&pool->lock);
} else {
z3fold_compact_page(zhdr);
/* add to unbuddied list */
+ spin_lock(&pool->lock);
freechunks = num_free_chunks(zhdr);
list_add(&zhdr->buddy,
&pool->unbuddied[freechunks]);
+ spin_unlock(&pool->lock);
}
}

+ if (!test_bit(PAGE_HEADLESS, &page->private))
+ z3fold_page_unlock(zhdr);
+
+ spin_lock(&pool->lock);
/* add to beginning of LRU */
list_add(&page->lru, &pool->lru);
}
@@ -648,7 +713,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
void *addr;
enum buddy buddy;

- spin_lock(&pool->lock);
zhdr = handle_to_z3fold_header(handle);
addr = zhdr;
page = virt_to_page(zhdr);
@@ -656,6 +720,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
if (test_bit(PAGE_HEADLESS, &page->private))
goto out;

+ z3fold_page_lock(zhdr);
buddy = handle_to_buddy(handle);
switch (buddy) {
case FIRST:
@@ -674,8 +739,9 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
addr = NULL;
break;
}
+
+ z3fold_page_unlock(zhdr);
out:
- spin_unlock(&pool->lock);
return addr;
}

@@ -690,19 +756,17 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
struct page *page;
enum buddy buddy;

- spin_lock(&pool->lock);
zhdr = handle_to_z3fold_header(handle);
page = virt_to_page(zhdr);

- if (test_bit(PAGE_HEADLESS, &page->private)) {
- spin_unlock(&pool->lock);
+ if (test_bit(PAGE_HEADLESS, &page->private))
return;
- }

+ z3fold_page_lock(zhdr);
buddy = handle_to_buddy(handle);
if (buddy == MIDDLE)
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
- spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
}

/**
--
2.4.2

2016-12-26 00:39:44

by Vitaly Wool

[permalink] [raw]
Subject: [PATCH/RESEND 4/5] z3fold: fix header size related issues

Currently the whole kernel build will be stopped if the size of struct
z3fold_header is greater than the size of one chunk, which is 64 bytes by
default. This patch instead defines the offset for z3fold objects as the
size of the z3fold header in chunks.

Fixed also are the calculation of num_free_chunks() and the address to
move the middle chunk to in case of in-page compaction in
z3fold_compact_page().

Signed-off-by: Vitaly Wool <[email protected]>
---
mm/z3fold.c | 161 ++++++++++++++++++++++++++++++++----------------------------
1 file changed, 87 insertions(+), 74 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 28c0a2d..729a2da 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -34,29 +34,60 @@
/*****************
* Structures
*****************/
+struct z3fold_pool;
+struct z3fold_ops {
+ int (*evict)(struct z3fold_pool *pool, unsigned long handle);
+};
+
+enum buddy {
+ HEADLESS = 0,
+ FIRST,
+ MIDDLE,
+ LAST,
+ BUDDIES_MAX
+};
+
+/*
+ * struct z3fold_header - z3fold page metadata occupying the first chunk of each
+ * z3fold page, except for HEADLESS pages
+ * @buddy: links the z3fold page into the relevant list in the pool
+ * @page_lock: per-page lock
+ * @first_chunks: the size of the first buddy in chunks, 0 if free
+ * @middle_chunks: the size of the middle buddy in chunks, 0 if free
+ * @last_chunks: the size of the last buddy in chunks, 0 if free
+ * @first_num: the starting number (for the first handle)
+ */
+struct z3fold_header {
+ struct list_head buddy;
+ raw_spinlock_t page_lock;
+ unsigned short first_chunks;
+ unsigned short middle_chunks;
+ unsigned short last_chunks;
+ unsigned short start_middle;
+ unsigned short first_num:2;
+};
+
/*
* NCHUNKS_ORDER determines the internal allocation granularity, effectively
* adjusting internal fragmentation. It also determines the number of
* freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
- * allocation granularity will be in chunks of size PAGE_SIZE/64. As one chunk
- * in allocated page is occupied by z3fold header, NCHUNKS will be calculated
- * to 63 which shows the max number of free chunks in z3fold page, also there
- * will be 63 freelists per pool.
+ * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks
+ * in the beginning of an allocated page are occupied by z3fold header, so
+ * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y),
+ * which shows the max number of free chunks in z3fold page, also there will
+ * be 63, or 62, respectively, freelists per pool.
*/
#define NCHUNKS_ORDER 6

#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER)
#define CHUNK_SIZE (1 << CHUNK_SHIFT)
-#define ZHDR_SIZE_ALIGNED CHUNK_SIZE
+#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
+#define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
+#define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT)
#define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)

#define BUDDY_MASK (0x3)

-struct z3fold_pool;
-struct z3fold_ops {
- int (*evict)(struct z3fold_pool *pool, unsigned long handle);
-};
-
/**
* struct z3fold_pool - stores metadata for each z3fold pool
* @lock: protects all pool fields and first|last_chunk fields of any
@@ -86,33 +117,6 @@ struct z3fold_pool {
const struct zpool_ops *zpool_ops;
};

-enum buddy {
- HEADLESS = 0,
- FIRST,
- MIDDLE,
- LAST,
- BUDDIES_MAX
-};
-
-/*
- * struct z3fold_header - z3fold page metadata occupying the first chunk of each
- * z3fold page, except for HEADLESS pages
- * @buddy: links the z3fold page into the relevant list in the pool
- * @page_lock: per-page lock
- * @first_chunks: the size of the first buddy in chunks, 0 if free
- * @middle_chunks: the size of the middle buddy in chunks, 0 if free
- * @last_chunks: the size of the last buddy in chunks, 0 if free
- * @first_num: the starting number (for the first handle)
- */
-struct z3fold_header {
- struct list_head buddy;
- raw_spinlock_t page_lock;
- unsigned short first_chunks;
- unsigned short middle_chunks;
- unsigned short last_chunks;
- unsigned short start_middle;
- unsigned short first_num:2;
-};

/*
* Internal z3fold page flags
@@ -123,6 +127,7 @@ enum z3fold_page_flags {
MIDDLE_CHUNK_MAPPED,
};

+
/*****************
* Helpers
*****************/
@@ -220,9 +225,10 @@ static int num_free_chunks(struct z3fold_header *zhdr)
*/
if (zhdr->middle_chunks != 0) {
int nfree_before = zhdr->first_chunks ?
- 0 : zhdr->start_middle - 1;
+ 0 : zhdr->start_middle - ZHDR_CHUNKS;
int nfree_after = zhdr->last_chunks ?
- 0 : NCHUNKS - zhdr->start_middle - zhdr->middle_chunks;
+ 0 : TOTAL_CHUNKS -
+ (zhdr->start_middle + zhdr->middle_chunks);
nfree = max(nfree_before, nfree_after);
} else
nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
@@ -287,40 +293,47 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
int ret = 0;

if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
+ goto out; /* can't move middle chunk, it's used */
+
+ if (zhdr->middle_chunks == 0)
+ goto out; /* nothing to compact */
+
+ if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+ /* move to the beginning */
+ mchunk_memmove(zhdr, ZHDR_CHUNKS);
+ zhdr->first_chunks = zhdr->middle_chunks;
+ zhdr->middle_chunks = 0;
+ zhdr->start_middle = 0;
+ zhdr->first_num++;
+ ret = 1;
goto out;
+ }

- if (zhdr->middle_chunks != 0) {
- if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
- mchunk_memmove(zhdr, 1); /* move to the beginning */
- zhdr->first_chunks = zhdr->middle_chunks;
- zhdr->middle_chunks = 0;
- zhdr->start_middle = 0;
- zhdr->first_num++;
- ret = 1;
- goto out;
- }
-
- /*
- * moving data is expensive, so let's only do that if
- * there's substantial gain (at least BIG_CHUNK_GAP chunks)
- */
- if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
- zhdr->start_middle > zhdr->first_chunks + BIG_CHUNK_GAP) {
- mchunk_memmove(zhdr, zhdr->first_chunks + 1);
- zhdr->start_middle = zhdr->first_chunks + 1;
- ret = 1;
- goto out;
- }
- if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
- zhdr->middle_chunks + zhdr->last_chunks <=
- NCHUNKS - zhdr->start_middle - BIG_CHUNK_GAP) {
- unsigned short new_start = NCHUNKS - zhdr->last_chunks -
- zhdr->middle_chunks;
- mchunk_memmove(zhdr, new_start);
- zhdr->start_middle = new_start;
- ret = 1;
- goto out;
- }
+ /*
+ * moving data is expensive, so let's only do that if
+ * there's substantial gain (at least BIG_CHUNK_GAP chunks)
+ */
+ if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
+ zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
+ BIG_CHUNK_GAP) {
+ /* new_start: right after 1st chunk */
+ unsigned short new_start = zhdr->first_chunks + ZHDR_CHUNKS;
+ mchunk_memmove(zhdr, new_start);
+ zhdr->start_middle = new_start;
+ ret = 1;
+ goto out;
+ }
+ if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
+ TOTAL_CHUNKS -
+ (zhdr->last_chunks + zhdr->start_middle + zhdr->middle_chunks) >=
+ BIG_CHUNK_GAP) {
+ /* new_start: right before last chunk */
+ unsigned short new_start = TOTAL_CHUNKS -
+ (zhdr->last_chunks + zhdr->middle_chunks);
+ mchunk_memmove(zhdr, new_start);
+ zhdr->start_middle = new_start;
+ ret = 1;
+ goto out;
}
out:
return ret;
@@ -425,7 +438,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
zhdr->last_chunks = chunks;
else {
zhdr->middle_chunks = chunks;
- zhdr->start_middle = zhdr->first_chunks + 1;
+ zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
}

spin_lock(&pool->lock);
@@ -876,8 +889,8 @@ MODULE_ALIAS("zpool-z3fold");

static int __init init_z3fold(void)
{
- /* Make sure the z3fold header will fit in one chunk */
- BUILD_BUG_ON(sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED);
+ /* Make sure the z3fold header is not larger than the page size */
+ BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE);
zpool_register_driver(&z3fold_zpool_driver);

return 0;
--
2.4.2

2016-12-26 00:41:40

by Vitaly Wool

[permalink] [raw]
Subject: [PATCH/RESEND 5/5] z3fold: add kref refcounting

With both coming and already present locking optimizations,
introducing kref to reference-count z3fold objects is the right
thing to do. Moreover, it makes buddied list no longer necessary,
and allows for a simpler handling of headless pages.

Signed-off-by: Vitaly Wool <[email protected]>
---
mm/z3fold.c | 137 ++++++++++++++++++++++++++++++------------------------------
1 file changed, 68 insertions(+), 69 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 729a2da..4593493 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -52,6 +52,7 @@ enum buddy {
* z3fold page, except for HEADLESS pages
* @buddy: links the z3fold page into the relevant list in the pool
* @page_lock: per-page lock
+ * @refcount: reference cound for the z3fold page
* @first_chunks: the size of the first buddy in chunks, 0 if free
* @middle_chunks: the size of the middle buddy in chunks, 0 if free
* @last_chunks: the size of the last buddy in chunks, 0 if free
@@ -60,6 +61,7 @@ enum buddy {
struct z3fold_header {
struct list_head buddy;
raw_spinlock_t page_lock;
+ struct kref refcount;
unsigned short first_chunks;
unsigned short middle_chunks;
unsigned short last_chunks;
@@ -95,8 +97,6 @@ struct z3fold_header {
* @unbuddied: array of lists tracking z3fold pages that contain 2- buddies;
* the lists each z3fold page is added to depends on the size of
* its free region.
- * @buddied: list tracking the z3fold pages that contain 3 buddies;
- * these z3fold pages are full
* @lru: list tracking the z3fold pages in LRU order by most recently
* added buddy.
* @pages_nr: number of z3fold pages in the pool.
@@ -109,7 +109,6 @@ struct z3fold_header {
struct z3fold_pool {
spinlock_t lock;
struct list_head unbuddied[NCHUNKS];
- struct list_head buddied;
struct list_head lru;
atomic64_t pages_nr;
const struct z3fold_ops *ops;
@@ -162,9 +161,21 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
}

/* Resets the struct page fields and frees the page */
-static void free_z3fold_page(struct z3fold_header *zhdr)
+static void free_z3fold_page(struct page *page)
{
- __free_page(virt_to_page(zhdr));
+ __free_page(page);
+}
+
+static void release_z3fold_page(struct kref *ref)
+{
+ struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
+ refcount);
+ struct page *page = virt_to_page(zhdr);
+ if (!list_empty(&zhdr->buddy))
+ list_del(&zhdr->buddy);
+ if (!list_empty(&page->lru))
+ list_del(&page->lru);
+ free_z3fold_page(page);
}

/* Lock a z3fold page */
@@ -256,9 +267,9 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
if (!pool)
return NULL;
spin_lock_init(&pool->lock);
+ kref_init(&zhdr->refcount);
for_each_unbuddied_list(i, 0)
INIT_LIST_HEAD(&pool->unbuddied[i]);
- INIT_LIST_HEAD(&pool->buddied);
INIT_LIST_HEAD(&pool->lru);
atomic64_set(&pool->pages_nr, 0);
pool->ops = ops;
@@ -383,7 +394,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
spin_lock(&pool->lock);
zhdr = list_first_entry_or_null(&pool->unbuddied[i],
struct z3fold_header, buddy);
- if (!zhdr) {
+ if (!zhdr || !kref_get_unless_zero(&zhdr->refcount)) {
spin_unlock(&pool->lock);
continue;
}
@@ -403,10 +414,12 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
else if (zhdr->middle_chunks == 0)
bud = MIDDLE;
else {
+ z3fold_page_unlock(zhdr);
spin_lock(&pool->lock);
- list_add(&zhdr->buddy, &pool->buddied);
+ if (kref_put(&zhdr->refcount,
+ release_z3fold_page))
+ atomic64_dec(&pool->pages_nr);
spin_unlock(&pool->lock);
- z3fold_page_unlock(zhdr);
pr_err("No free chunks in unbuddied\n");
WARN_ON(1);
continue;
@@ -447,9 +460,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
/* Add to unbuddied list */
freechunks = num_free_chunks(zhdr);
list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
- } else {
- /* Add to buddied list */
- list_add(&zhdr->buddy, &pool->buddied);
}

headless:
@@ -515,50 +525,39 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)

if (test_bit(UNDER_RECLAIM, &page->private)) {
/* z3fold page is under reclaim, reclaim will free */
- if (bud != HEADLESS)
+ if (bud != HEADLESS) {
z3fold_page_unlock(zhdr);
- return;
- }
-
- /* Remove from existing buddy list */
- if (bud != HEADLESS) {
- spin_lock(&pool->lock);
- /*
- * this object may have been removed from its list by
- * z3fold_alloc(). In that case we just do nothing,
- * z3fold_alloc() will allocate an object and add the page
- * to the relevant list.
- */
- if (!list_empty(&zhdr->buddy)) {
- list_del(&zhdr->buddy);
- } else {
+ spin_lock(&pool->lock);
+ if (kref_put(&zhdr->refcount, release_z3fold_page))
+ atomic64_dec(&pool->pages_nr);
spin_unlock(&pool->lock);
- z3fold_page_unlock(zhdr);
- return;
}
- spin_unlock(&pool->lock);
+ return;
}

- if (bud == HEADLESS ||
- (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
- zhdr->last_chunks == 0)) {
- /* z3fold page is empty, free */
+ if (bud == HEADLESS) {
spin_lock(&pool->lock);
list_del(&page->lru);
spin_unlock(&pool->lock);
- clear_bit(PAGE_HEADLESS, &page->private);
- if (bud != HEADLESS)
- z3fold_page_unlock(zhdr);
- free_z3fold_page(zhdr);
+ free_z3fold_page(page);
atomic64_dec(&pool->pages_nr);
} else {
- z3fold_compact_page(zhdr);
- /* Add to the unbuddied list */
+ if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 ||
+ zhdr->last_chunks != 0) {
+ z3fold_compact_page(zhdr);
+ /* Add to the unbuddied list */
+ spin_lock(&pool->lock);
+ if (!list_empty(&zhdr->buddy))
+ list_del(&zhdr->buddy);
+ freechunks = num_free_chunks(zhdr);
+ list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+ spin_unlock(&pool->lock);
+ }
+ z3fold_page_unlock(zhdr);
spin_lock(&pool->lock);
- freechunks = num_free_chunks(zhdr);
- list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+ if (kref_put(&zhdr->refcount, release_z3fold_page))
+ atomic64_dec(&pool->pages_nr);
spin_unlock(&pool->lock);
- z3fold_page_unlock(zhdr);
}

}
@@ -617,13 +616,15 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
return -EINVAL;
}
page = list_last_entry(&pool->lru, struct page, lru);
- list_del(&page->lru);
+ list_del_init(&page->lru);

/* Protect z3fold page against free */
set_bit(UNDER_RECLAIM, &page->private);
zhdr = page_address(page);
if (!test_bit(PAGE_HEADLESS, &page->private)) {
- list_del(&zhdr->buddy);
+ if (!list_empty(&zhdr->buddy))
+ list_del_init(&zhdr->buddy);
+ kref_get(&zhdr->refcount);
spin_unlock(&pool->lock);
z3fold_page_lock(zhdr);
/*
@@ -664,30 +665,26 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
goto next;
}
next:
- if (!test_bit(PAGE_HEADLESS, &page->private))
- z3fold_page_lock(zhdr);
clear_bit(UNDER_RECLAIM, &page->private);
- if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
- (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
- zhdr->middle_chunks == 0)) {
- /*
- * All buddies are now free, free the z3fold page and
- * return success.
- */
- if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
+ if (test_bit(PAGE_HEADLESS, &page->private)) {
+ if (ret == 0) {
+ free_z3fold_page(page);
+ return 0;
+ }
+ } else {
+ z3fold_page_lock(zhdr);
+ if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
+ zhdr->middle_chunks == 0) {
z3fold_page_unlock(zhdr);
- free_z3fold_page(zhdr);
- atomic64_dec(&pool->pages_nr);
- return 0;
- } else if (!test_bit(PAGE_HEADLESS, &page->private)) {
- if (zhdr->first_chunks != 0 &&
- zhdr->last_chunks != 0 &&
- zhdr->middle_chunks != 0) {
- /* Full, add to buddied list */
spin_lock(&pool->lock);
- list_add(&zhdr->buddy, &pool->buddied);
+ if (kref_put(&zhdr->refcount,
+ release_z3fold_page))
+ atomic64_dec(&pool->pages_nr);
spin_unlock(&pool->lock);
- } else {
+ return 0;
+ } else if (zhdr->first_chunks == 0 ||
+ zhdr->last_chunks == 0 ||
+ zhdr->middle_chunks == 0) {
z3fold_compact_page(zhdr);
/* add to unbuddied list */
spin_lock(&pool->lock);
@@ -696,10 +693,12 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
&pool->unbuddied[freechunks]);
spin_unlock(&pool->lock);
}
- }
-
- if (!test_bit(PAGE_HEADLESS, &page->private))
z3fold_page_unlock(zhdr);
+ spin_lock(&pool->lock);
+ if (kref_put(&zhdr->refcount, release_z3fold_page))
+ atomic64_dec(&pool->pages_nr);
+ spin_unlock(&pool->lock);
+ }

spin_lock(&pool->lock);
/* add to beginning of LRU */
--
2.4.2