This patchet's main motivation is to account for a corner case in which
the read constrains imposed by the device are not respected by pblk. In
the process, it also refactors parts of the read ring buffer to simplify
creation/deletion operations and improve readability.
Note that the checkpatch warnings on missing comments for memory
barriers are false positives.
The code is also available in the for-4.20/pblk branch in the OCSSD github
project.
Thanks,
Javier
Javier González (4):
lightnvm: pblk: remove unused function
lightnvm: pblk: encapsulate rb pointer operations
lightnvm: pblk: move ring buffer alloc/free rb init
lightnvm: pblk: guarantee mw_cunits on read buffer
drivers/lightnvm/pblk-init.c | 21 +++------
drivers/lightnvm/pblk-rb.c | 100 +++++++++++++++++++-----------------------
drivers/lightnvm/pblk-write.c | 7 +--
drivers/lightnvm/pblk.h | 11 ++---
4 files changed, 57 insertions(+), 82 deletions(-)
--
2.7.4
pblk's read/write buffer currently takes a buffer and its size and uses
it to create the metadata around it to use it as a ring buffer. This
puts the responsibility of allocating/freeing ring buffer memory on the
ring buffer user. Instead, move it inside of the ring buffer helpers
(pblk-rb.c). This simplifies creation/destruction routines.
Signed-off-by: Javier González <[email protected]>
---
drivers/lightnvm/pblk-init.c | 18 +++------------
drivers/lightnvm/pblk-rb.c | 53 +++++++++++++++++++++++++++-----------------
drivers/lightnvm/pblk.h | 7 ++----
3 files changed, 38 insertions(+), 40 deletions(-)
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index caed18fabd35..549f13a58b33 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -185,17 +185,14 @@ static void pblk_rwb_free(struct pblk *pblk)
if (pblk_rb_tear_down_check(&pblk->rwb))
pblk_err(pblk, "write buffer error on tear down\n");
- pblk_rb_data_free(&pblk->rwb);
- vfree(pblk_rb_entries_ref(&pblk->rwb));
+ pblk_rb_free(&pblk->rwb);
}
static int pblk_rwb_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct pblk_rb_entry *entries;
- unsigned long nr_entries, buffer_size;
- unsigned int power_size, power_seg_sz;
+ unsigned long buffer_size;
int pgs_in_buffer;
pgs_in_buffer = max(geo->mw_cunits, geo->ws_opt) * geo->all_luns;
@@ -205,16 +202,7 @@ static int pblk_rwb_init(struct pblk *pblk)
else
buffer_size = pgs_in_buffer;
- nr_entries = pblk_rb_calculate_size(buffer_size);
-
- entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry)));
- if (!entries)
- return -ENOMEM;
-
- power_size = get_count_order(nr_entries);
- power_seg_sz = get_count_order(geo->csecs);
-
- return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz);
+ return pblk_rb_init(&pblk->rwb, buffer_size, geo->csecs);
}
/* Minimum pages needed within a lun */
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index e46d8cb9d28b..f653faa6a9ed 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -23,7 +23,7 @@
static DECLARE_RWSEM(pblk_rb_lock);
-void pblk_rb_data_free(struct pblk_rb *rb)
+static void pblk_rb_data_free(struct pblk_rb *rb)
{
struct pblk_rb_pages *p, *t;
@@ -36,22 +36,46 @@ void pblk_rb_data_free(struct pblk_rb *rb)
up_write(&pblk_rb_lock);
}
+void pblk_rb_free(struct pblk_rb *rb)
+{
+ pblk_rb_data_free(rb);
+ vfree(rb->entries);
+}
+
+/*
+ * pblk_rb_calculate_size -- calculate the size of the write buffer
+ */
+static unsigned int pblk_rb_calculate_size(unsigned int nr_entries)
+{
+ /* Alloc a write buffer that can at least fit 128 entries */
+ return (1 << max(get_count_order(nr_entries), 7));
+}
+
/*
* Initialize ring buffer. The data and metadata buffers must be previously
* allocated and their size must be a power of two
* (Documentation/core-api/circular-buffers.rst)
*/
-int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
- unsigned int power_size, unsigned int power_seg_sz)
+int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int seg_size)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
+ struct pblk_rb_entry *entries;
unsigned int init_entry = 0;
- unsigned int alloc_order = power_size;
unsigned int max_order = MAX_ORDER - 1;
- unsigned int order, iter;
+ unsigned int power_size, power_seg_sz;
+ unsigned int alloc_order, order, iter;
+ unsigned int nr_entries;
+
+ nr_entries = pblk_rb_calculate_size(size);
+ entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry)));
+ if (!entries)
+ return -ENOMEM;
+
+ power_size = get_count_order(size);
+ power_seg_sz = get_count_order(seg_size);
down_write(&pblk_rb_lock);
- rb->entries = rb_entry_base;
+ rb->entries = entries;
rb->seg_size = (1 << power_seg_sz);
rb->nr_entries = (1 << power_size);
rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
@@ -62,6 +86,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
INIT_LIST_HEAD(&rb->pages);
+ alloc_order = power_size;
if (alloc_order >= max_order) {
order = max_order;
iter = (1 << (alloc_order - max_order));
@@ -80,6 +105,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
if (!page_set) {
up_write(&pblk_rb_lock);
+ vfree(entries);
return -ENOMEM;
}
@@ -89,6 +115,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
kfree(page_set);
pblk_rb_data_free(rb);
up_write(&pblk_rb_lock);
+ vfree(entries);
return -ENOMEM;
}
kaddr = page_address(page_set->pages);
@@ -125,20 +152,6 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
return 0;
}
-/*
- * pblk_rb_calculate_size -- calculate the size of the write buffer
- */
-unsigned int pblk_rb_calculate_size(unsigned int nr_entries)
-{
- /* Alloc a write buffer that can at least fit 128 entries */
- return (1 << max(get_count_order(nr_entries), 7));
-}
-
-void *pblk_rb_entries_ref(struct pblk_rb *rb)
-{
- return rb->entries;
-}
-
static void clean_wctx(struct pblk_w_ctx *w_ctx)
{
int flags;
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 0ffc19cff697..17b27b395942 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -734,10 +734,7 @@ struct pblk_line_ws {
/*
* pblk ring buffer operations
*/
-int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
- unsigned int power_size, unsigned int power_seg_sz);
-unsigned int pblk_rb_calculate_size(unsigned int nr_entries);
-void *pblk_rb_entries_ref(struct pblk_rb *rb);
+int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int seg_sz);
int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
unsigned int nr_entries, unsigned int *pos);
int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
@@ -771,7 +768,7 @@ unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
int pblk_rb_tear_down_check(struct pblk_rb *rb);
int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos);
-void pblk_rb_data_free(struct pblk_rb *rb);
+void pblk_rb_free(struct pblk_rb *rb);
ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
/*
--
2.7.4
OCSSD 2.0 defines the amount of data that the host must buffer per chunk
to guarantee reads through the geometry field mw_cunits. This value is
the base that pblk uses to determine the size of its read buffer.
Currently, this size is set to be the closes power-of-2 to mw_cunits
times the number of parallel units available to the pblk instance for
each open line (currently one). When an entry (4KB) is put in the
buffer, the L2P table points to it. As the buffer wraps up, the L2P is
updated to point to addresses on the device, thus guaranteeing mw_cunits
at a chunk level.
However, given that pblk cannot write to the device under ws_min
(normally ws_opt), there might be a window in which the buffer starts
wrapping up and updating L2P entries before the mw_cunits value in a
chunk has been surpassed.
In order not to violate the mw_cunits constrain in this case, account
for ws_opt on the read buffer creation.
Signed-off-by: Javier González <[email protected]>
---
drivers/lightnvm/pblk-init.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 549f13a58b33..604bb743b92b 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -195,7 +195,8 @@ static int pblk_rwb_init(struct pblk *pblk)
unsigned long buffer_size;
int pgs_in_buffer;
- pgs_in_buffer = max(geo->mw_cunits, geo->ws_opt) * geo->all_luns;
+ pgs_in_buffer = (max(geo->mw_cunits, geo->ws_opt) + geo->ws_opt)
+ * geo->all_luns;
if (write_buffer_size && (write_buffer_size > pgs_in_buffer))
buffer_size = write_buffer_size;
--
2.7.4
pblk's read/write buffer is always a power-of-2, thus wrapping up the
buffer can be done with a bit mask. Since this is an implementation
detail internal to the write buffer, make a helper that hides pointer
increment + wrap, and allows to transparently relax this assumption in
the future.
Signed-off-by: Javier González <[email protected]>
---
drivers/lightnvm/pblk-rb.c | 21 +++++++++++++--------
drivers/lightnvm/pblk-write.c | 7 ++-----
drivers/lightnvm/pblk.h | 2 ++
3 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 82829e8151db..e46d8cb9d28b 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -169,6 +169,12 @@ static unsigned int pblk_rb_space(struct pblk_rb *rb)
return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
}
+unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
+ unsigned int nr_entries)
+{
+ return (p + nr_entries) & (rb->nr_entries - 1);
+}
+
/*
* Buffer count is calculated with respect to the submission entry signaling the
* entries that are available to send to the media
@@ -195,8 +201,7 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
subm = READ_ONCE(rb->subm);
/* Commit read means updating submission pointer */
- smp_store_release(&rb->subm,
- (subm + nr_entries) & (rb->nr_entries - 1));
+ smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries));
return subm;
}
@@ -229,7 +234,7 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
line = pblk_ppa_to_line(pblk, w_ctx->ppa);
kref_put(&line->ref, pblk_line_put);
clean_wctx(w_ctx);
- rb->l2p_update = (rb->l2p_update + 1) & (rb->nr_entries - 1);
+ rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1);
}
pblk_rl_out(&pblk->rl, user_io, gc_io);
@@ -408,7 +413,7 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
return 0;
/* Protect from read count */
- smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1));
+ smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries));
return 1;
}
@@ -432,7 +437,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
if (!__pblk_rb_may_write(rb, nr_entries, pos))
return 0;
- mem = (*pos + nr_entries) & (rb->nr_entries - 1);
+ mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries);
*io_ret = NVM_IO_DONE;
if (bio->bi_opf & REQ_PREFLUSH) {
@@ -572,7 +577,7 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
- pos = (pos + 1) & (rb->nr_entries - 1);
+ pos = pblk_rb_ptr_wrap(rb, pos, 1);
}
if (pad) {
@@ -652,7 +657,7 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
{
- unsigned int entry = pos & (rb->nr_entries - 1);
+ unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0);
return &rb->entries[entry].w_ctx;
}
@@ -698,7 +703,7 @@ unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
}
}
- sync = (sync + nr_entries) & (rb->nr_entries - 1);
+ sync = pblk_rb_ptr_wrap(rb, sync, nr_entries);
/* Protect from counts */
smp_store_release(&rb->sync, sync);
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index 277abc8633f7..fa8726493b39 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -140,12 +140,11 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
struct pblk_w_ctx *w_ctx;
struct ppa_addr ppa_l2p;
int flags;
- unsigned int pos, i;
+ unsigned int i;
spin_lock(&pblk->trans_lock);
- pos = sentry;
for (i = 0; i < nr_entries; i++) {
- entry = &rb->entries[pos];
+ entry = &rb->entries[pblk_rb_ptr_wrap(rb, sentry, i)];
w_ctx = &entry->w_ctx;
/* Check if the lba has been overwritten */
@@ -164,8 +163,6 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
*/
line = pblk_ppa_to_line(pblk, w_ctx->ppa);
kref_put(&line->ref, pblk_line_put);
-
- pos = (pos + 1) & (rb->nr_entries - 1);
}
spin_unlock(&pblk->trans_lock);
}
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index e70ece87a9f4..0ffc19cff697 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -760,6 +760,8 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries);
+unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
+ unsigned int nr_entries);
void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb);
--
2.7.4
Removed unused function in pblk-rb.c
Signed-off-by: Javier González <[email protected]>
---
drivers/lightnvm/pblk-rb.c | 26 --------------------------
drivers/lightnvm/pblk.h | 2 --
2 files changed, 28 deletions(-)
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index c26eab2ba8bd..82829e8151db 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -729,32 +729,6 @@ unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb)
return (submitted < to_flush) ? (to_flush - submitted) : 0;
}
-/*
- * Scan from the current position of the sync pointer to find the entry that
- * corresponds to the given ppa. This is necessary since write requests can be
- * completed out of order. The assumption is that the ppa is close to the sync
- * pointer thus the search will not take long.
- *
- * The caller of this function must guarantee that the sync pointer will no
- * reach the entry while it is using the metadata associated with it. With this
- * assumption in mind, there is no need to take the sync lock.
- */
-struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb,
- struct ppa_addr *ppa)
-{
- unsigned int sync, subm, count;
- unsigned int i;
-
- sync = READ_ONCE(rb->sync);
- subm = READ_ONCE(rb->subm);
- count = pblk_rb_ring_count(subm, sync, rb->nr_entries);
-
- for (i = 0; i < count; i++)
- sync = (sync + 1) & (rb->nr_entries - 1);
-
- return NULL;
-}
-
int pblk_rb_tear_down_check(struct pblk_rb *rb)
{
struct pblk_rb_entry *entry;
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 127ee99c3efc..e70ece87a9f4 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -760,8 +760,6 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries);
-struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb,
- struct ppa_addr *ppa);
void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb);
--
2.7.4
On 09/18/2018 10:03 AM, Javier González wrote:
> This patchet's main motivation is to account for a corner case in which
> the read constrains imposed by the device are not respected by pblk. In
> the process, it also refactors parts of the read ring buffer to simplify
> creation/deletion operations and improve readability.
>
> Note that the checkpatch warnings on missing comments for memory
> barriers are false positives.
>
> The code is also available in the for-4.20/pblk branch in the OCSSD github
> project.
>
> Thanks,
> Javier
>
> Javier González (4):
> lightnvm: pblk: remove unused function
> lightnvm: pblk: encapsulate rb pointer operations
> lightnvm: pblk: move ring buffer alloc/free rb init
> lightnvm: pblk: guarantee mw_cunits on read buffer
>
> drivers/lightnvm/pblk-init.c | 21 +++------
> drivers/lightnvm/pblk-rb.c | 100 +++++++++++++++++++-----------------------
> drivers/lightnvm/pblk-write.c | 7 +--
> drivers/lightnvm/pblk.h | 11 ++---
> 4 files changed, 57 insertions(+), 82 deletions(-)
>
Applied for 4.20.