Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754289AbcCUKZ7 (ORCPT ); Mon, 21 Mar 2016 06:25:59 -0400 Received: from mail-wm0-f47.google.com ([74.125.82.47]:34497 "EHLO mail-wm0-f47.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751842AbcCUKZu (ORCPT ); Mon, 21 Mar 2016 06:25:50 -0400 Subject: Re: [PATCH v2] lightnvm: divide global reverse translation map into per lun To: Wenwei Tao , jg@lightnvm.io References: <1458453092-2184-1-git-send-email-ww.tao0320@gmail.com> Cc: linux-kernel@vger.kernel.org, linux-block@vger.kernel.org From: =?UTF-8?Q?Matias_Bj=c3=b8rling?= Message-ID: <56EFCC2B.5070809@lightnvm.io> Date: Mon, 21 Mar 2016 11:25:47 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.3.0 MIME-Version: 1.0 In-Reply-To: <1458453092-2184-1-git-send-email-ww.tao0320@gmail.com> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 14749 Lines: 482 On 03/20/2016 06:51 AM, Wenwei Tao wrote: > Divide the target's global reverse translation map into per lun, > to prepare support for the non-continuous lun target creation. > > Signed-off-by: Wenwei Tao > --- > > Changes since v1 > -fix merge/rebase mistake in rrpc_block_map_update(). > -remove variables poffset and lun_offset in rrpc structure > since no one use them now. > > drivers/lightnvm/rrpc.c | 181 +++++++++++++++++++++++++++-------------------- > drivers/lightnvm/rrpc.h | 9 ++- > include/linux/lightnvm.h | 1 + > 3 files changed, 111 insertions(+), 80 deletions(-) > > diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c > index 3ab6495..88e2ce5 100644 > --- a/drivers/lightnvm/rrpc.c > +++ b/drivers/lightnvm/rrpc.c > @@ -23,28 +23,35 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, > struct nvm_rq *rqd, unsigned long flags); > > #define rrpc_for_each_lun(rrpc, rlun, i) \ > - for ((i) = 0, rlun = &(rrpc)->luns[0]; \ > - (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)]) > + for ((i) = 0, rlun = &(rrpc)->luns[0]; \ > + (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)]) > + > +static inline u64 lun_poffset(struct nvm_lun *lun, struct nvm_dev *dev) > +{ > + return lun->id * dev->sec_per_lun; > +} > > static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a) > { > struct rrpc_block *rblk = a->rblk; > - unsigned int pg_offset; > + struct rrpc_lun *rlun = rblk->rlun; > + u64 pg_offset; > > - lockdep_assert_held(&rrpc->rev_lock); > + lockdep_assert_held(&rlun->rev_lock); > > if (a->addr == ADDR_EMPTY || !rblk) > return; > > spin_lock(&rblk->lock); > > - div_u64_rem(a->addr, rrpc->dev->sec_per_blk, &pg_offset); > + div_u64_rem(a->addr, rrpc->dev->sec_per_blk, (u32 *)&pg_offset); > WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages)); > rblk->nr_invalid_pages++; > > spin_unlock(&rblk->lock); > > - rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY; > + pg_offset = lun_poffset(rlun->parent, rrpc->dev); > + rlun->rev_trans_map[a->addr - pg_offset].addr = ADDR_EMPTY; > } > > static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba, > @@ -52,14 +59,15 @@ static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba, > { > sector_t i; > > - spin_lock(&rrpc->rev_lock); > for (i = slba; i < slba + len; i++) { > struct rrpc_addr *gp = &rrpc->trans_map[i]; > + struct rrpc_lun *rlun = gp->rblk->rlun; > > + spin_lock(&rlun->rev_lock); > rrpc_page_invalidate(rrpc, gp); > + spin_unlock(&rlun->rev_lock); > gp->rblk = NULL; > } > - spin_unlock(&rrpc->rev_lock); > } > > static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc, > @@ -116,15 +124,6 @@ static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk) > return (rblk->next_page == rrpc->dev->sec_per_blk); > } > > -/* Calculate relative addr for the given block, considering instantiated LUNs */ > -static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk) > -{ > - struct nvm_block *blk = rblk->parent; > - int lun_blk = blk->id % (rrpc->dev->blks_per_lun * rrpc->nr_luns); > - > - return lun_blk * rrpc->dev->sec_per_blk; > -} > - > /* Calculate global addr for the given block */ > static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk) > { > @@ -291,13 +290,14 @@ static void rrpc_end_sync_bio(struct bio *bio) > static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk) > { > struct request_queue *q = rrpc->dev->q; > + struct rrpc_lun *rlun = rblk->rlun; > struct rrpc_rev_addr *rev; > struct nvm_rq *rqd; > struct bio *bio; > struct page *page; > int slot; > int nr_sec_per_blk = rrpc->dev->sec_per_blk; > - u64 phys_addr; > + u64 phys_addr, poffset; > DECLARE_COMPLETION_ONSTACK(wait); > > if (bitmap_full(rblk->invalid_pages, nr_sec_per_blk)) > @@ -315,6 +315,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk) > return -ENOMEM; > } > > + poffset = lun_poffset(rlun->parent, rrpc->dev); > while ((slot = find_first_zero_bit(rblk->invalid_pages, > nr_sec_per_blk)) < nr_sec_per_blk) { > > @@ -322,23 +323,23 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk) > phys_addr = rblk->parent->id * nr_sec_per_blk + slot; > > try: > - spin_lock(&rrpc->rev_lock); > + spin_lock(&rlun->rev_lock); > /* Get logical address from physical to logical table */ > - rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset]; > + rev = &rlun->rev_trans_map[phys_addr - poffset]; > /* already updated by previous regular write */ > if (rev->addr == ADDR_EMPTY) { > - spin_unlock(&rrpc->rev_lock); > + spin_unlock(&rlun->rev_lock); > continue; > } > > rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1); > if (IS_ERR_OR_NULL(rqd)) { > - spin_unlock(&rrpc->rev_lock); > + spin_unlock(&rlun->rev_lock); > schedule(); > goto try; > } > > - spin_unlock(&rrpc->rev_lock); > + spin_unlock(&rlun->rev_lock); > > /* Perform read to do GC */ > bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr); > @@ -407,7 +408,7 @@ static void rrpc_block_gc(struct work_struct *work) > struct rrpc_block *rblk = gcb->rblk; > struct nvm_dev *dev = rrpc->dev; > struct nvm_lun *lun = rblk->parent->lun; > - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; > + struct rrpc_lun *rlun = lun->private; > > mempool_free(gcb, rrpc->gcb_pool); > pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id); > @@ -510,7 +511,7 @@ static void rrpc_gc_queue(struct work_struct *work) > struct rrpc_block *rblk = gcb->rblk; > struct nvm_lun *lun = rblk->parent->lun; > struct nvm_block *blk = rblk->parent; > - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; > + struct rrpc_lun *rlun = lun->private; > > spin_lock(&rlun->lock); > list_add_tail(&rblk->prio, &rlun->prio_list); > @@ -561,22 +562,25 @@ static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc) > static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr, > struct rrpc_block *rblk, u64 paddr) > { > + struct rrpc_lun *rlun = rblk->rlun; > struct rrpc_addr *gp; > struct rrpc_rev_addr *rev; > + u64 poffset = lun_poffset(rlun->parent, rrpc->dev); > > BUG_ON(laddr >= rrpc->nr_sects); > > gp = &rrpc->trans_map[laddr]; > - spin_lock(&rrpc->rev_lock); > + spin_lock(&rlun->rev_lock); > + > if (gp->rblk) > rrpc_page_invalidate(rrpc, gp); > > gp->addr = paddr; > gp->rblk = rblk; > > - rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset]; > + rev = &rlun->rev_trans_map[gp->addr - poffset]; > rev->addr = laddr; > - spin_unlock(&rrpc->rev_lock); > + spin_unlock(&rlun->rev_lock); > > return gp; > } > @@ -990,7 +994,6 @@ static int rrpc_gc_init(struct rrpc *rrpc) > > static void rrpc_map_free(struct rrpc *rrpc) > { > - vfree(rrpc->rev_trans_map); > vfree(rrpc->trans_map); > } > > @@ -998,8 +1001,8 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private) > { > struct rrpc *rrpc = (struct rrpc *)private; > struct nvm_dev *dev = rrpc->dev; > - struct rrpc_addr *addr = rrpc->trans_map + slba; > - struct rrpc_rev_addr *raddr = rrpc->rev_trans_map; > + struct rrpc_addr *addr; > + struct rrpc_rev_addr *raddr; > u64 elba = slba + nlb; > u64 i; > > @@ -1008,9 +1011,15 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private) > return -EINVAL; > } > > + slba -= rrpc->soffset >> (ilog2(dev->sec_size) - 9); > + addr = rrpc->trans_map + slba; > for (i = 0; i < nlb; i++) { > + struct rrpc_lun *rlun; > + struct nvm_lun *lun; > u64 pba = le64_to_cpu(entries[i]); > - unsigned int mod; > + u64 poffset; > + int lunid; > + > /* LNVM treats address-spaces as silos, LBA and PBA are > * equally large and zero-indexed. > */ > @@ -1026,10 +1035,18 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private) > if (!pba) > continue; > > - div_u64_rem(pba, rrpc->nr_sects, &mod); > + lunid = div_u64(pba, dev->sec_per_lun); > + lun = dev->mt->get_lun(dev, lunid); > + > + if (unlikely(!lun)) > + return -EINVAL; > + > + rlun = lun->private; > + raddr = rlun->rev_trans_map; > + poffset = lun_poffset(lun, dev); > > addr[i].addr = pba; > - raddr[mod].addr = slba + i; > + raddr[pba - poffset].addr = slba + i; > } > > return 0; > @@ -1048,17 +1065,10 @@ static int rrpc_map_init(struct rrpc *rrpc) > if (!rrpc->trans_map) > return -ENOMEM; > > - rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) > - * rrpc->nr_sects); > - if (!rrpc->rev_trans_map) > - return -ENOMEM; > - > for (i = 0; i < rrpc->nr_sects; i++) { > struct rrpc_addr *p = &rrpc->trans_map[i]; > - struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i]; > > p->addr = ADDR_EMPTY; > - r->addr = ADDR_EMPTY; > } > > if (!dev->ops->get_l2p_tbl) > @@ -1143,25 +1153,69 @@ static void rrpc_luns_free(struct rrpc *rrpc) > if (!lun) > break; > dev->mt->release_lun(dev, lun->id); > + vfree(rlun->rev_trans_map); > vfree(rlun->blocks); > } > > kfree(rrpc->luns); > + rrpc->luns = NULL; > +} > + > +static int rrpc_lun_init(struct rrpc *rrpc, struct rrpc_lun *rlun, > + struct nvm_lun *lun) > +{ > + struct nvm_dev *dev = rrpc->dev; > + int i; > + > + rlun->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) * > + dev->sec_per_lun); > + if (!rlun->rev_trans_map) > + return -ENOMEM; > + > + for (i = 0; i < dev->sec_per_lun; i++) { > + struct rrpc_rev_addr *r = &rlun->rev_trans_map[i]; > + > + r->addr = ADDR_EMPTY; > + } > + rlun->blocks = vzalloc(sizeof(struct rrpc_block) * dev->blks_per_lun); > + if (!rlun->blocks) { > + vfree(rlun->rev_trans_map); > + return -ENOMEM; > + } > + > + for (i = 0; i < dev->blks_per_lun; i++) { > + struct rrpc_block *rblk = &rlun->blocks[i]; > + struct nvm_block *blk = &lun->blocks[i]; > + > + rblk->parent = blk; > + rblk->rlun = rlun; > + INIT_LIST_HEAD(&rblk->prio); > + spin_lock_init(&rblk->lock); > + } > + > + rlun->rrpc = rrpc; > + lun->private = rlun; > + INIT_LIST_HEAD(&rlun->prio_list); > + INIT_LIST_HEAD(&rlun->open_list); > + INIT_LIST_HEAD(&rlun->closed_list); > + INIT_WORK(&rlun->ws_gc, rrpc_lun_gc); > + spin_lock_init(&rlun->lock); > + spin_lock_init(&rlun->rev_lock); > + > + return 0; > } > > static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end) > { > struct nvm_dev *dev = rrpc->dev; > struct rrpc_lun *rlun; > - int i, j, ret = -EINVAL; > + int i, ret = -EINVAL; > > if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) { > pr_err("rrpc: number of pages per block too high."); > return -EINVAL; > } > > - spin_lock_init(&rrpc->rev_lock); > - > rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun), > GFP_KERNEL); > if (!rrpc->luns) > @@ -1183,31 +1237,9 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end) > > rlun = &rrpc->luns[i]; > rlun->parent = lun; > - rlun->blocks = vzalloc(sizeof(struct rrpc_block) * > - rrpc->dev->blks_per_lun); > - if (!rlun->blocks) { > - ret = -ENOMEM; > + ret = rrpc_lun_init(rrpc, rlun, lun); > + if (ret) > goto err; > - } > - > - for (j = 0; j < rrpc->dev->blks_per_lun; j++) { > - struct rrpc_block *rblk = &rlun->blocks[j]; > - struct nvm_block *blk = &lun->blocks[j]; > - > - rblk->parent = blk; > - rblk->rlun = rlun; > - INIT_LIST_HEAD(&rblk->prio); > - spin_lock_init(&rblk->lock); > - } > - > - rlun->rrpc = rrpc; > - INIT_LIST_HEAD(&rlun->prio_list); > - INIT_LIST_HEAD(&rlun->open_list); > - INIT_LIST_HEAD(&rlun->closed_list); > - > - INIT_WORK(&rlun->ws_gc, rrpc_lun_gc); > - spin_lock_init(&rlun->lock); > - > rrpc->total_blocks += dev->blks_per_lun; > rrpc->nr_sects += dev->sec_per_lun; > > @@ -1215,6 +1247,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end) > > return 0; > err: > + rrpc_luns_free(rrpc); > return ret; > } > > @@ -1288,15 +1321,16 @@ static sector_t rrpc_capacity(void *private) > static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk) > { > struct nvm_dev *dev = rrpc->dev; > + struct rrpc_lun *rlun = rblk->rlun; > int offset; > struct rrpc_addr *laddr; > - u64 bpaddr, paddr, pladdr; > + u64 paddr, pladdr, poffset; > > - bpaddr = block_to_rel_addr(rrpc, rblk); > + poffset = lun_poffset(rlun->parent, dev); > for (offset = 0; offset < dev->sec_per_blk; offset++) { > - paddr = bpaddr + offset; > + paddr = block_to_addr(rrpc, rblk) + offset; > > - pladdr = rrpc->rev_trans_map[paddr].addr; > + pladdr = rlun->rev_trans_map[paddr - poffset].addr; > if (pladdr == ADDR_EMPTY) > continue; > > @@ -1405,9 +1439,6 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk, > goto err; > } > > - rrpc->poffset = dev->sec_per_lun * lun_begin; > - rrpc->lun_offset = lun_begin; > - > ret = rrpc_core_init(rrpc); > if (ret) { > pr_err("nvm: rrpc: could not initialize core\n"); > diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h > index 2653484..590f1b7 100644 > --- a/drivers/lightnvm/rrpc.h > +++ b/drivers/lightnvm/rrpc.h > @@ -87,6 +87,10 @@ struct rrpc_lun { > > struct work_struct ws_gc; > > + /* store a reverse map for garbage collection */ > + struct rrpc_rev_addr *rev_trans_map; > + spinlock_t rev_lock; > + > spinlock_t lock; > }; > > @@ -98,8 +102,6 @@ struct rrpc { > struct gendisk *disk; > > sector_t soffset; /* logical sector offset */ > - u64 poffset; /* physical page offset */ > - int lun_offset; > > int nr_luns; > struct rrpc_lun *luns; > @@ -124,9 +126,6 @@ struct rrpc { > * addresses are used when writing to the disk block device. > */ > struct rrpc_addr *trans_map; > - /* also store a reverse map for garbage collection */ > - struct rrpc_rev_addr *rev_trans_map; > - spinlock_t rev_lock; > > struct rrpc_inflight inflights; > > diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h > index cdcb2cc..419a3db 100644 > --- a/include/linux/lightnvm.h > +++ b/include/linux/lightnvm.h > @@ -276,6 +276,7 @@ struct nvm_lun { > spinlock_t lock; > > struct nvm_block *blocks; > + void *private; > }; > > enum { > It seems like there is a race-condition when GC hits. It panics on page_invalidation. You can test with the following fio script: [global] bs=4k ioengine=libaio iodepth=1 size=1G direct=1 rw=randwrite time_based runtime=300 [p1] numjobs=1 filename=/dev/test2