Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752639AbdF3P6a (ORCPT ); Fri, 30 Jun 2017 11:58:30 -0400 Received: from mail-wm0-f45.google.com ([74.125.82.45]:34777 "EHLO mail-wm0-f45.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752484AbdF3P5E (ORCPT ); Fri, 30 Jun 2017 11:57:04 -0400 From: "=?UTF-8?q?Javier=20Gonz=C3=A1lez?=" X-Google-Original-From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= To: mb@lightnvm.io, axboe@fb.com Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org, =?UTF-8?q?Javier=20Gonz=C3=A1lez?= , =?UTF-8?q?Matias=20Bj=C3=B8rling?= Subject: [PATCH 07/10] lightnvm: pblk: remove target using async. I/Os Date: Fri, 30 Jun 2017 17:56:40 +0200 Message-Id: <1498838203-21539-8-git-send-email-javier@cnexlabs.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1498838203-21539-1-git-send-email-javier@cnexlabs.com> References: <1498838203-21539-1-git-send-email-javier@cnexlabs.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10616 Lines: 380 When removing a pblk instance, pad the current line using asynchronous I/O. This reduces the removal time from ~1 minute in the worst case to a couple of seconds. Signed-off-by: Javier González Signed-off-by: Matias Bjørling --- drivers/lightnvm/pblk-core.c | 5 +- drivers/lightnvm/pblk-init.c | 9 +++ drivers/lightnvm/pblk-rb.c | 8 ++ drivers/lightnvm/pblk-recovery.c | 165 ++++++++++++++++++++++----------------- drivers/lightnvm/pblk-write.c | 2 +- drivers/lightnvm/pblk.h | 8 ++ 6 files changed, 123 insertions(+), 74 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 74b8d9db05e1..e6f42cddc8ec 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -273,9 +273,10 @@ static void pblk_flush_writer(struct pblk *pblk) { pblk_rb_flush(&pblk->rwb); do { - if (!pblk_rb_read_count(&pblk->rwb)) + if (!pblk_rb_sync_count(&pblk->rwb)) break; + pblk_write_kick(pblk); schedule(); } while (1); } @@ -1350,6 +1351,7 @@ void pblk_pipeline_stop(struct pblk *pblk) return; } + flush_workqueue(pblk->bb_wq); pblk_line_close_meta_sync(pblk); spin_lock(&l_mg->free_lock); @@ -1547,6 +1549,7 @@ void pblk_line_close_meta_sync(struct pblk *pblk) } pblk_wait_for_meta(pblk); + flush_workqueue(pblk->close_wq); } static void pblk_line_should_sync_meta(struct pblk *pblk) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index b3fc310aa51c..025d8fe52154 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -841,6 +841,15 @@ static int pblk_writer_init(struct pblk *pblk) static void pblk_writer_stop(struct pblk *pblk) { + /* The pipeline must be stopped and the write buffer emptied before the + * write thread is stopped + */ + WARN(pblk_rb_read_count(&pblk->rwb), + "Stopping not fully persisted write buffer\n"); + + WARN(pblk_rb_sync_count(&pblk->rwb), + "Stopping not fully synced write buffer\n"); + if (pblk->writer_ts) kthread_stop(pblk->writer_ts); del_timer(&pblk->wtimer); diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 2dda874af890..7300be98e831 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -180,6 +180,14 @@ unsigned int pblk_rb_read_count(struct pblk_rb *rb) return pblk_rb_ring_count(mem, subm, rb->nr_entries); } +unsigned int pblk_rb_sync_count(struct pblk_rb *rb) +{ + unsigned int mem = READ_ONCE(rb->mem); + unsigned int sync = READ_ONCE(rb->sync); + + return pblk_rb_ring_count(mem, sync, rb->nr_entries); +} + unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) { unsigned int subm; diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 6d58659fa3da..0e48d3e4e143 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -327,47 +327,94 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line, return 0; } +static void pblk_recov_complete(struct kref *ref) +{ + struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref); + + complete(&pad_rq->wait); +} + +static void pblk_end_io_recov(struct nvm_rq *rqd) +{ + struct pblk_pad_rq *pad_rq = rqd->private; + struct pblk *pblk = pad_rq->pblk; + struct nvm_tgt_dev *dev = pblk->dev; + + kref_put(&pad_rq->ref, pblk_recov_complete); + nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); + pblk_free_rqd(pblk, rqd, WRITE); +} + static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p, int left_ppas) + int left_ppas) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct ppa_addr *ppa_list; struct pblk_sec_meta *meta_list; + struct pblk_pad_rq *pad_rq; struct nvm_rq *rqd; struct bio *bio; void *data; dma_addr_t dma_ppa_list, dma_meta_list; __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); u64 w_ptr = line->cur_sec; - int left_line_ppas = line->left_msecs; - int rq_ppas, rq_len; + int left_line_ppas, rq_ppas, rq_len; int i, j; int ret = 0; - DECLARE_COMPLETION_ONSTACK(wait); - - ppa_list = p.ppa_list; - meta_list = p.meta_list; - rqd = p.rqd; - data = p.data; - dma_ppa_list = p.dma_ppa_list; - dma_meta_list = p.dma_meta_list; + + spin_lock(&line->lock); + left_line_ppas = line->left_msecs; + spin_unlock(&line->lock); + + pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL); + if (!pad_rq) + return -ENOMEM; + + data = vzalloc(pblk->max_write_pgs * geo->sec_size); + if (!data) { + ret = -ENOMEM; + goto free_rq; + } + + pad_rq->pblk = pblk; + init_completion(&pad_rq->wait); + kref_init(&pad_rq->ref); next_pad_rq: rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); - if (!rq_ppas) - rq_ppas = pblk->min_write_pgs; + if (rq_ppas < pblk->min_write_pgs) { + pr_err("pblk: corrupted pad line %d\n", line->id); + goto free_rq; + } + rq_len = rq_ppas * geo->sec_size; + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); + if (!meta_list) { + ret = -ENOMEM; + goto free_data; + } + + ppa_list = (void *)(meta_list) + pblk_dma_meta_size; + dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + + rqd = pblk_alloc_rqd(pblk, WRITE); + if (IS_ERR(rqd)) { + ret = PTR_ERR(rqd); + goto fail_free_meta; + } + memset(rqd, 0, pblk_w_rq_size); + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); - if (IS_ERR(bio)) - return PTR_ERR(bio); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + goto fail_free_rqd; + } bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - memset(rqd, 0, pblk_g_rq_size); - rqd->bio = bio; rqd->opcode = NVM_OP_PWRITE; rqd->flags = pblk_set_progr_mode(pblk, WRITE); @@ -376,8 +423,8 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, rqd->ppa_list = ppa_list; rqd->dma_ppa_list = dma_ppa_list; rqd->dma_meta_list = dma_meta_list; - rqd->end_io = pblk_end_io_sync; - rqd->private = &wait; + rqd->end_io = pblk_end_io_recov; + rqd->private = pad_rq; for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; @@ -405,25 +452,41 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, } } + kref_get(&pad_rq->ref); + ret = pblk_submit_io(pblk, rqd); if (ret) { pr_err("pblk: I/O submission failed: %d\n", ret); - return ret; + goto free_data; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: L2P recovery write timed out\n"); - } atomic_dec(&pblk->inflight_io); - reinit_completion(&wait); left_line_ppas -= rq_ppas; left_ppas -= rq_ppas; - if (left_ppas > 0 && left_line_ppas) + if (left_ppas && left_line_ppas) goto next_pad_rq; - return 0; + kref_put(&pad_rq->ref, pblk_recov_complete); + + if (!wait_for_completion_io_timeout(&pad_rq->wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: pad write timed out\n"); + ret = -ETIME; + } + +free_rq: + kfree(pad_rq); +free_data: + vfree(data); + return ret; + +fail_free_rqd: + pblk_free_rqd(pblk, rqd, WRITE); +fail_free_meta: + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); + kfree(pad_rq); + return ret; } /* When this function is called, it means that not all upper pages have been @@ -555,7 +618,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line, if (pad_secs > line->left_msecs) pad_secs = line->left_msecs; - ret = pblk_recov_pad_oob(pblk, line, p, pad_secs); + ret = pblk_recov_pad_oob(pblk, line, pad_secs); if (ret) pr_err("pblk: OOB padding failed (err:%d)\n", ret); @@ -961,64 +1024,22 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) */ int pblk_recov_pad(struct pblk *pblk) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_line *line; struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct nvm_rq *rqd; - struct pblk_recov_alloc p; - struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; - void *data; int left_msecs; int ret = 0; - dma_addr_t dma_ppa_list, dma_meta_list; spin_lock(&l_mg->free_lock); line = l_mg->data_line; left_msecs = line->left_msecs; spin_unlock(&l_mg->free_lock); - rqd = pblk_alloc_rqd(pblk, READ); - if (IS_ERR(rqd)) - return PTR_ERR(rqd); - - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); - if (!meta_list) { - ret = -ENOMEM; - goto free_rqd; - } - - ppa_list = (void *)(meta_list) + pblk_dma_meta_size; - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; - - data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL); - if (!data) { - ret = -ENOMEM; - goto free_meta_list; - } - - p.ppa_list = ppa_list; - p.meta_list = meta_list; - p.rqd = rqd; - p.data = data; - p.dma_ppa_list = dma_ppa_list; - p.dma_meta_list = dma_meta_list; - - ret = pblk_recov_pad_oob(pblk, line, p, left_msecs); + ret = pblk_recov_pad_oob(pblk, line, left_msecs); if (ret) { pr_err("pblk: Tear down padding failed (%d)\n", ret); - goto free_data; + return ret; } pblk_line_close_meta(pblk, line); - -free_data: - kfree(data); -free_meta_list: - nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); -free_rqd: - pblk_free_rqd(pblk, rqd, READ); - return ret; } diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 8151bf4bb945..d62a8f4faaf4 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -190,7 +190,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) if (rqd->error) { pblk_log_write_err(pblk, rqd); - pr_err("pblk: metadata I/O failed\n"); + pr_err("pblk: metadata I/O failed. Line %d\n", line->id); } #ifdef CONFIG_NVM_DEBUG else diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index cdad2c9edbdf..bf5b73fb345f 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -111,6 +111,13 @@ struct pblk_g_ctx { void *private; }; +/* Pad context */ +struct pblk_pad_rq { + struct pblk *pblk; + struct completion wait; + struct kref ref; +}; + /* Recovery context */ struct pblk_rec_ctx { struct pblk *pblk; @@ -674,6 +681,7 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags); unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb); unsigned int pblk_rb_read_count(struct pblk_rb *rb); +unsigned int pblk_rb_sync_count(struct pblk_rb *rb); unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos); int pblk_rb_tear_down_check(struct pblk_rb *rb); -- 2.7.4