2016-03-20 05:51:56

by Wenwei Tao

[permalink] [raw]
Subject: [PATCH v2] lightnvm: divide global reverse translation map into per lun

Divide the target's global reverse translation map into per lun,
to prepare support for the non-continuous lun target creation.

Signed-off-by: Wenwei Tao <[email protected]>
---

Changes since v1
-fix merge/rebase mistake in rrpc_block_map_update().
-remove variables poffset and lun_offset in rrpc structure
since no one use them now.

drivers/lightnvm/rrpc.c | 181 +++++++++++++++++++++++++++--------------------
drivers/lightnvm/rrpc.h | 9 ++-
include/linux/lightnvm.h | 1 +
3 files changed, 111 insertions(+), 80 deletions(-)

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 3ab6495..88e2ce5 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -23,28 +23,35 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
struct nvm_rq *rqd, unsigned long flags);

#define rrpc_for_each_lun(rrpc, rlun, i) \
- for ((i) = 0, rlun = &(rrpc)->luns[0]; \
- (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
+ for ((i) = 0, rlun = &(rrpc)->luns[0]; \
+ (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
+
+static inline u64 lun_poffset(struct nvm_lun *lun, struct nvm_dev *dev)
+{
+ return lun->id * dev->sec_per_lun;
+}

static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
{
struct rrpc_block *rblk = a->rblk;
- unsigned int pg_offset;
+ struct rrpc_lun *rlun = rblk->rlun;
+ u64 pg_offset;

- lockdep_assert_held(&rrpc->rev_lock);
+ lockdep_assert_held(&rlun->rev_lock);

if (a->addr == ADDR_EMPTY || !rblk)
return;

spin_lock(&rblk->lock);

- div_u64_rem(a->addr, rrpc->dev->sec_per_blk, &pg_offset);
+ div_u64_rem(a->addr, rrpc->dev->sec_per_blk, (u32 *)&pg_offset);
WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
rblk->nr_invalid_pages++;

spin_unlock(&rblk->lock);

- rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
+ pg_offset = lun_poffset(rlun->parent, rrpc->dev);
+ rlun->rev_trans_map[a->addr - pg_offset].addr = ADDR_EMPTY;
}

static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
@@ -52,14 +59,15 @@ static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
{
sector_t i;

- spin_lock(&rrpc->rev_lock);
for (i = slba; i < slba + len; i++) {
struct rrpc_addr *gp = &rrpc->trans_map[i];
+ struct rrpc_lun *rlun = gp->rblk->rlun;

+ spin_lock(&rlun->rev_lock);
rrpc_page_invalidate(rrpc, gp);
+ spin_unlock(&rlun->rev_lock);
gp->rblk = NULL;
}
- spin_unlock(&rrpc->rev_lock);
}

static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
@@ -116,15 +124,6 @@ static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk)
return (rblk->next_page == rrpc->dev->sec_per_blk);
}

-/* Calculate relative addr for the given block, considering instantiated LUNs */
-static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
-{
- struct nvm_block *blk = rblk->parent;
- int lun_blk = blk->id % (rrpc->dev->blks_per_lun * rrpc->nr_luns);
-
- return lun_blk * rrpc->dev->sec_per_blk;
-}
-
/* Calculate global addr for the given block */
static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
{
@@ -291,13 +290,14 @@ static void rrpc_end_sync_bio(struct bio *bio)
static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
{
struct request_queue *q = rrpc->dev->q;
+ struct rrpc_lun *rlun = rblk->rlun;
struct rrpc_rev_addr *rev;
struct nvm_rq *rqd;
struct bio *bio;
struct page *page;
int slot;
int nr_sec_per_blk = rrpc->dev->sec_per_blk;
- u64 phys_addr;
+ u64 phys_addr, poffset;
DECLARE_COMPLETION_ONSTACK(wait);

if (bitmap_full(rblk->invalid_pages, nr_sec_per_blk))
@@ -315,6 +315,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
return -ENOMEM;
}

+ poffset = lun_poffset(rlun->parent, rrpc->dev);
while ((slot = find_first_zero_bit(rblk->invalid_pages,
nr_sec_per_blk)) < nr_sec_per_blk) {

@@ -322,23 +323,23 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
phys_addr = rblk->parent->id * nr_sec_per_blk + slot;

try:
- spin_lock(&rrpc->rev_lock);
+ spin_lock(&rlun->rev_lock);
/* Get logical address from physical to logical table */
- rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
+ rev = &rlun->rev_trans_map[phys_addr - poffset];
/* already updated by previous regular write */
if (rev->addr == ADDR_EMPTY) {
- spin_unlock(&rrpc->rev_lock);
+ spin_unlock(&rlun->rev_lock);
continue;
}

rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
if (IS_ERR_OR_NULL(rqd)) {
- spin_unlock(&rrpc->rev_lock);
+ spin_unlock(&rlun->rev_lock);
schedule();
goto try;
}

- spin_unlock(&rrpc->rev_lock);
+ spin_unlock(&rlun->rev_lock);

/* Perform read to do GC */
bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
@@ -407,7 +408,7 @@ static void rrpc_block_gc(struct work_struct *work)
struct rrpc_block *rblk = gcb->rblk;
struct nvm_dev *dev = rrpc->dev;
struct nvm_lun *lun = rblk->parent->lun;
- struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
+ struct rrpc_lun *rlun = lun->private;

mempool_free(gcb, rrpc->gcb_pool);
pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
@@ -510,7 +511,7 @@ static void rrpc_gc_queue(struct work_struct *work)
struct rrpc_block *rblk = gcb->rblk;
struct nvm_lun *lun = rblk->parent->lun;
struct nvm_block *blk = rblk->parent;
- struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
+ struct rrpc_lun *rlun = lun->private;

spin_lock(&rlun->lock);
list_add_tail(&rblk->prio, &rlun->prio_list);
@@ -561,22 +562,25 @@ static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
struct rrpc_block *rblk, u64 paddr)
{
+ struct rrpc_lun *rlun = rblk->rlun;
struct rrpc_addr *gp;
struct rrpc_rev_addr *rev;
+ u64 poffset = lun_poffset(rlun->parent, rrpc->dev);

BUG_ON(laddr >= rrpc->nr_sects);

gp = &rrpc->trans_map[laddr];
- spin_lock(&rrpc->rev_lock);
+ spin_lock(&rlun->rev_lock);
+
if (gp->rblk)
rrpc_page_invalidate(rrpc, gp);

gp->addr = paddr;
gp->rblk = rblk;

- rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
+ rev = &rlun->rev_trans_map[gp->addr - poffset];
rev->addr = laddr;
- spin_unlock(&rrpc->rev_lock);
+ spin_unlock(&rlun->rev_lock);

return gp;
}
@@ -990,7 +994,6 @@ static int rrpc_gc_init(struct rrpc *rrpc)

static void rrpc_map_free(struct rrpc *rrpc)
{
- vfree(rrpc->rev_trans_map);
vfree(rrpc->trans_map);
}

@@ -998,8 +1001,8 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
{
struct rrpc *rrpc = (struct rrpc *)private;
struct nvm_dev *dev = rrpc->dev;
- struct rrpc_addr *addr = rrpc->trans_map + slba;
- struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
+ struct rrpc_addr *addr;
+ struct rrpc_rev_addr *raddr;
u64 elba = slba + nlb;
u64 i;

@@ -1008,9 +1011,15 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
return -EINVAL;
}

+ slba -= rrpc->soffset >> (ilog2(dev->sec_size) - 9);
+ addr = rrpc->trans_map + slba;
for (i = 0; i < nlb; i++) {
+ struct rrpc_lun *rlun;
+ struct nvm_lun *lun;
u64 pba = le64_to_cpu(entries[i]);
- unsigned int mod;
+ u64 poffset;
+ int lunid;
+
/* LNVM treats address-spaces as silos, LBA and PBA are
* equally large and zero-indexed.
*/
@@ -1026,10 +1035,18 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
if (!pba)
continue;

- div_u64_rem(pba, rrpc->nr_sects, &mod);
+ lunid = div_u64(pba, dev->sec_per_lun);
+ lun = dev->mt->get_lun(dev, lunid);
+
+ if (unlikely(!lun))
+ return -EINVAL;
+
+ rlun = lun->private;
+ raddr = rlun->rev_trans_map;
+ poffset = lun_poffset(lun, dev);

addr[i].addr = pba;
- raddr[mod].addr = slba + i;
+ raddr[pba - poffset].addr = slba + i;
}

return 0;
@@ -1048,17 +1065,10 @@ static int rrpc_map_init(struct rrpc *rrpc)
if (!rrpc->trans_map)
return -ENOMEM;

- rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
- * rrpc->nr_sects);
- if (!rrpc->rev_trans_map)
- return -ENOMEM;
-
for (i = 0; i < rrpc->nr_sects; i++) {
struct rrpc_addr *p = &rrpc->trans_map[i];
- struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];

p->addr = ADDR_EMPTY;
- r->addr = ADDR_EMPTY;
}

if (!dev->ops->get_l2p_tbl)
@@ -1143,25 +1153,69 @@ static void rrpc_luns_free(struct rrpc *rrpc)
if (!lun)
break;
dev->mt->release_lun(dev, lun->id);
+ vfree(rlun->rev_trans_map);
vfree(rlun->blocks);
}

kfree(rrpc->luns);
+ rrpc->luns = NULL;
+}
+
+static int rrpc_lun_init(struct rrpc *rrpc, struct rrpc_lun *rlun,
+ struct nvm_lun *lun)
+{
+ struct nvm_dev *dev = rrpc->dev;
+ int i;
+
+ rlun->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) *
+ dev->sec_per_lun);
+ if (!rlun->rev_trans_map)
+ return -ENOMEM;
+
+ for (i = 0; i < dev->sec_per_lun; i++) {
+ struct rrpc_rev_addr *r = &rlun->rev_trans_map[i];
+
+ r->addr = ADDR_EMPTY;
+ }
+ rlun->blocks = vzalloc(sizeof(struct rrpc_block) * dev->blks_per_lun);
+ if (!rlun->blocks) {
+ vfree(rlun->rev_trans_map);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < dev->blks_per_lun; i++) {
+ struct rrpc_block *rblk = &rlun->blocks[i];
+ struct nvm_block *blk = &lun->blocks[i];
+
+ rblk->parent = blk;
+ rblk->rlun = rlun;
+ INIT_LIST_HEAD(&rblk->prio);
+ spin_lock_init(&rblk->lock);
+ }
+
+ rlun->rrpc = rrpc;
+ lun->private = rlun;
+ INIT_LIST_HEAD(&rlun->prio_list);
+ INIT_LIST_HEAD(&rlun->open_list);
+ INIT_LIST_HEAD(&rlun->closed_list);
+ INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
+ spin_lock_init(&rlun->lock);
+ spin_lock_init(&rlun->rev_lock);
+
+ return 0;
}

static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
{
struct nvm_dev *dev = rrpc->dev;
struct rrpc_lun *rlun;
- int i, j, ret = -EINVAL;
+ int i, ret = -EINVAL;

if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
pr_err("rrpc: number of pages per block too high.");
return -EINVAL;
}

- spin_lock_init(&rrpc->rev_lock);
-
rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
GFP_KERNEL);
if (!rrpc->luns)
@@ -1183,31 +1237,9 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)

rlun = &rrpc->luns[i];
rlun->parent = lun;
- rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
- rrpc->dev->blks_per_lun);
- if (!rlun->blocks) {
- ret = -ENOMEM;
+ ret = rrpc_lun_init(rrpc, rlun, lun);
+ if (ret)
goto err;
- }
-
- for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
- struct rrpc_block *rblk = &rlun->blocks[j];
- struct nvm_block *blk = &lun->blocks[j];
-
- rblk->parent = blk;
- rblk->rlun = rlun;
- INIT_LIST_HEAD(&rblk->prio);
- spin_lock_init(&rblk->lock);
- }
-
- rlun->rrpc = rrpc;
- INIT_LIST_HEAD(&rlun->prio_list);
- INIT_LIST_HEAD(&rlun->open_list);
- INIT_LIST_HEAD(&rlun->closed_list);
-
- INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
- spin_lock_init(&rlun->lock);
-
rrpc->total_blocks += dev->blks_per_lun;
rrpc->nr_sects += dev->sec_per_lun;

@@ -1215,6 +1247,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)

return 0;
err:
+ rrpc_luns_free(rrpc);
return ret;
}

@@ -1288,15 +1321,16 @@ static sector_t rrpc_capacity(void *private)
static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
{
struct nvm_dev *dev = rrpc->dev;
+ struct rrpc_lun *rlun = rblk->rlun;
int offset;
struct rrpc_addr *laddr;
- u64 bpaddr, paddr, pladdr;
+ u64 paddr, pladdr, poffset;

- bpaddr = block_to_rel_addr(rrpc, rblk);
+ poffset = lun_poffset(rlun->parent, dev);
for (offset = 0; offset < dev->sec_per_blk; offset++) {
- paddr = bpaddr + offset;
+ paddr = block_to_addr(rrpc, rblk) + offset;

- pladdr = rrpc->rev_trans_map[paddr].addr;
+ pladdr = rlun->rev_trans_map[paddr - poffset].addr;
if (pladdr == ADDR_EMPTY)
continue;

@@ -1405,9 +1439,6 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
goto err;
}

- rrpc->poffset = dev->sec_per_lun * lun_begin;
- rrpc->lun_offset = lun_begin;
-
ret = rrpc_core_init(rrpc);
if (ret) {
pr_err("nvm: rrpc: could not initialize core\n");
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index 2653484..590f1b7 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -87,6 +87,10 @@ struct rrpc_lun {

struct work_struct ws_gc;

+ /* store a reverse map for garbage collection */
+ struct rrpc_rev_addr *rev_trans_map;
+ spinlock_t rev_lock;
+
spinlock_t lock;
};

@@ -98,8 +102,6 @@ struct rrpc {
struct gendisk *disk;

sector_t soffset; /* logical sector offset */
- u64 poffset; /* physical page offset */
- int lun_offset;

int nr_luns;
struct rrpc_lun *luns;
@@ -124,9 +126,6 @@ struct rrpc {
* addresses are used when writing to the disk block device.
*/
struct rrpc_addr *trans_map;
- /* also store a reverse map for garbage collection */
- struct rrpc_rev_addr *rev_trans_map;
- spinlock_t rev_lock;

struct rrpc_inflight inflights;

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index cdcb2cc..419a3db 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -276,6 +276,7 @@ struct nvm_lun {
spinlock_t lock;

struct nvm_block *blocks;
+ void *private;
};

enum {
--
1.9.1


2016-03-21 10:25:59

by Matias Bjørling

[permalink] [raw]
Subject: Re: [PATCH v2] lightnvm: divide global reverse translation map into per lun

On 03/20/2016 06:51 AM, Wenwei Tao wrote:
> Divide the target's global reverse translation map into per lun,
> to prepare support for the non-continuous lun target creation.
>
> Signed-off-by: Wenwei Tao <[email protected]>
> ---
>
> Changes since v1
> -fix merge/rebase mistake in rrpc_block_map_update().
> -remove variables poffset and lun_offset in rrpc structure
> since no one use them now.
>
> drivers/lightnvm/rrpc.c | 181 +++++++++++++++++++++++++++--------------------
> drivers/lightnvm/rrpc.h | 9 ++-
> include/linux/lightnvm.h | 1 +
> 3 files changed, 111 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
> index 3ab6495..88e2ce5 100644
> --- a/drivers/lightnvm/rrpc.c
> +++ b/drivers/lightnvm/rrpc.c
> @@ -23,28 +23,35 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
> struct nvm_rq *rqd, unsigned long flags);
>
> #define rrpc_for_each_lun(rrpc, rlun, i) \
> - for ((i) = 0, rlun = &(rrpc)->luns[0]; \
> - (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
> + for ((i) = 0, rlun = &(rrpc)->luns[0]; \
> + (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
> +
> +static inline u64 lun_poffset(struct nvm_lun *lun, struct nvm_dev *dev)
> +{
> + return lun->id * dev->sec_per_lun;
> +}
>
> static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
> {
> struct rrpc_block *rblk = a->rblk;
> - unsigned int pg_offset;
> + struct rrpc_lun *rlun = rblk->rlun;
> + u64 pg_offset;
>
> - lockdep_assert_held(&rrpc->rev_lock);
> + lockdep_assert_held(&rlun->rev_lock);
>
> if (a->addr == ADDR_EMPTY || !rblk)
> return;
>
> spin_lock(&rblk->lock);
>
> - div_u64_rem(a->addr, rrpc->dev->sec_per_blk, &pg_offset);
> + div_u64_rem(a->addr, rrpc->dev->sec_per_blk, (u32 *)&pg_offset);
> WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
> rblk->nr_invalid_pages++;
>
> spin_unlock(&rblk->lock);
>
> - rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
> + pg_offset = lun_poffset(rlun->parent, rrpc->dev);
> + rlun->rev_trans_map[a->addr - pg_offset].addr = ADDR_EMPTY;
> }
>
> static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
> @@ -52,14 +59,15 @@ static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
> {
> sector_t i;
>
> - spin_lock(&rrpc->rev_lock);
> for (i = slba; i < slba + len; i++) {
> struct rrpc_addr *gp = &rrpc->trans_map[i];
> + struct rrpc_lun *rlun = gp->rblk->rlun;
>
> + spin_lock(&rlun->rev_lock);
> rrpc_page_invalidate(rrpc, gp);
> + spin_unlock(&rlun->rev_lock);
> gp->rblk = NULL;
> }
> - spin_unlock(&rrpc->rev_lock);
> }
>
> static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
> @@ -116,15 +124,6 @@ static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk)
> return (rblk->next_page == rrpc->dev->sec_per_blk);
> }
>
> -/* Calculate relative addr for the given block, considering instantiated LUNs */
> -static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
> -{
> - struct nvm_block *blk = rblk->parent;
> - int lun_blk = blk->id % (rrpc->dev->blks_per_lun * rrpc->nr_luns);
> -
> - return lun_blk * rrpc->dev->sec_per_blk;
> -}
> -
> /* Calculate global addr for the given block */
> static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
> {
> @@ -291,13 +290,14 @@ static void rrpc_end_sync_bio(struct bio *bio)
> static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
> {
> struct request_queue *q = rrpc->dev->q;
> + struct rrpc_lun *rlun = rblk->rlun;
> struct rrpc_rev_addr *rev;
> struct nvm_rq *rqd;
> struct bio *bio;
> struct page *page;
> int slot;
> int nr_sec_per_blk = rrpc->dev->sec_per_blk;
> - u64 phys_addr;
> + u64 phys_addr, poffset;
> DECLARE_COMPLETION_ONSTACK(wait);
>
> if (bitmap_full(rblk->invalid_pages, nr_sec_per_blk))
> @@ -315,6 +315,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
> return -ENOMEM;
> }
>
> + poffset = lun_poffset(rlun->parent, rrpc->dev);
> while ((slot = find_first_zero_bit(rblk->invalid_pages,
> nr_sec_per_blk)) < nr_sec_per_blk) {
>
> @@ -322,23 +323,23 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
> phys_addr = rblk->parent->id * nr_sec_per_blk + slot;
>
> try:
> - spin_lock(&rrpc->rev_lock);
> + spin_lock(&rlun->rev_lock);
> /* Get logical address from physical to logical table */
> - rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
> + rev = &rlun->rev_trans_map[phys_addr - poffset];
> /* already updated by previous regular write */
> if (rev->addr == ADDR_EMPTY) {
> - spin_unlock(&rrpc->rev_lock);
> + spin_unlock(&rlun->rev_lock);
> continue;
> }
>
> rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
> if (IS_ERR_OR_NULL(rqd)) {
> - spin_unlock(&rrpc->rev_lock);
> + spin_unlock(&rlun->rev_lock);
> schedule();
> goto try;
> }
>
> - spin_unlock(&rrpc->rev_lock);
> + spin_unlock(&rlun->rev_lock);
>
> /* Perform read to do GC */
> bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
> @@ -407,7 +408,7 @@ static void rrpc_block_gc(struct work_struct *work)
> struct rrpc_block *rblk = gcb->rblk;
> struct nvm_dev *dev = rrpc->dev;
> struct nvm_lun *lun = rblk->parent->lun;
> - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
> + struct rrpc_lun *rlun = lun->private;
>
> mempool_free(gcb, rrpc->gcb_pool);
> pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
> @@ -510,7 +511,7 @@ static void rrpc_gc_queue(struct work_struct *work)
> struct rrpc_block *rblk = gcb->rblk;
> struct nvm_lun *lun = rblk->parent->lun;
> struct nvm_block *blk = rblk->parent;
> - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
> + struct rrpc_lun *rlun = lun->private;
>
> spin_lock(&rlun->lock);
> list_add_tail(&rblk->prio, &rlun->prio_list);
> @@ -561,22 +562,25 @@ static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
> static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
> struct rrpc_block *rblk, u64 paddr)
> {
> + struct rrpc_lun *rlun = rblk->rlun;
> struct rrpc_addr *gp;
> struct rrpc_rev_addr *rev;
> + u64 poffset = lun_poffset(rlun->parent, rrpc->dev);
>
> BUG_ON(laddr >= rrpc->nr_sects);
>
> gp = &rrpc->trans_map[laddr];
> - spin_lock(&rrpc->rev_lock);
> + spin_lock(&rlun->rev_lock);
> +
> if (gp->rblk)
> rrpc_page_invalidate(rrpc, gp);
>
> gp->addr = paddr;
> gp->rblk = rblk;
>
> - rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
> + rev = &rlun->rev_trans_map[gp->addr - poffset];
> rev->addr = laddr;
> - spin_unlock(&rrpc->rev_lock);
> + spin_unlock(&rlun->rev_lock);
>
> return gp;
> }
> @@ -990,7 +994,6 @@ static int rrpc_gc_init(struct rrpc *rrpc)
>
> static void rrpc_map_free(struct rrpc *rrpc)
> {
> - vfree(rrpc->rev_trans_map);
> vfree(rrpc->trans_map);
> }
>
> @@ -998,8 +1001,8 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
> {
> struct rrpc *rrpc = (struct rrpc *)private;
> struct nvm_dev *dev = rrpc->dev;
> - struct rrpc_addr *addr = rrpc->trans_map + slba;
> - struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
> + struct rrpc_addr *addr;
> + struct rrpc_rev_addr *raddr;
> u64 elba = slba + nlb;
> u64 i;
>
> @@ -1008,9 +1011,15 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
> return -EINVAL;
> }
>
> + slba -= rrpc->soffset >> (ilog2(dev->sec_size) - 9);
> + addr = rrpc->trans_map + slba;
> for (i = 0; i < nlb; i++) {
> + struct rrpc_lun *rlun;
> + struct nvm_lun *lun;
> u64 pba = le64_to_cpu(entries[i]);
> - unsigned int mod;
> + u64 poffset;
> + int lunid;
> +
> /* LNVM treats address-spaces as silos, LBA and PBA are
> * equally large and zero-indexed.
> */
> @@ -1026,10 +1035,18 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
> if (!pba)
> continue;
>
> - div_u64_rem(pba, rrpc->nr_sects, &mod);
> + lunid = div_u64(pba, dev->sec_per_lun);
> + lun = dev->mt->get_lun(dev, lunid);
> +
> + if (unlikely(!lun))
> + return -EINVAL;
> +
> + rlun = lun->private;
> + raddr = rlun->rev_trans_map;
> + poffset = lun_poffset(lun, dev);
>
> addr[i].addr = pba;
> - raddr[mod].addr = slba + i;
> + raddr[pba - poffset].addr = slba + i;
> }
>
> return 0;
> @@ -1048,17 +1065,10 @@ static int rrpc_map_init(struct rrpc *rrpc)
> if (!rrpc->trans_map)
> return -ENOMEM;
>
> - rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
> - * rrpc->nr_sects);
> - if (!rrpc->rev_trans_map)
> - return -ENOMEM;
> -
> for (i = 0; i < rrpc->nr_sects; i++) {
> struct rrpc_addr *p = &rrpc->trans_map[i];
> - struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];
>
> p->addr = ADDR_EMPTY;
> - r->addr = ADDR_EMPTY;
> }
>
> if (!dev->ops->get_l2p_tbl)
> @@ -1143,25 +1153,69 @@ static void rrpc_luns_free(struct rrpc *rrpc)
> if (!lun)
> break;
> dev->mt->release_lun(dev, lun->id);
> + vfree(rlun->rev_trans_map);
> vfree(rlun->blocks);
> }
>
> kfree(rrpc->luns);
> + rrpc->luns = NULL;
> +}
> +
> +static int rrpc_lun_init(struct rrpc *rrpc, struct rrpc_lun *rlun,
> + struct nvm_lun *lun)
> +{
> + struct nvm_dev *dev = rrpc->dev;
> + int i;
> +
> + rlun->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) *
> + dev->sec_per_lun);
> + if (!rlun->rev_trans_map)
> + return -ENOMEM;
> +
> + for (i = 0; i < dev->sec_per_lun; i++) {
> + struct rrpc_rev_addr *r = &rlun->rev_trans_map[i];
> +
> + r->addr = ADDR_EMPTY;
> + }
> + rlun->blocks = vzalloc(sizeof(struct rrpc_block) * dev->blks_per_lun);
> + if (!rlun->blocks) {
> + vfree(rlun->rev_trans_map);
> + return -ENOMEM;
> + }
> +
> + for (i = 0; i < dev->blks_per_lun; i++) {
> + struct rrpc_block *rblk = &rlun->blocks[i];
> + struct nvm_block *blk = &lun->blocks[i];
> +
> + rblk->parent = blk;
> + rblk->rlun = rlun;
> + INIT_LIST_HEAD(&rblk->prio);
> + spin_lock_init(&rblk->lock);
> + }
> +
> + rlun->rrpc = rrpc;
> + lun->private = rlun;
> + INIT_LIST_HEAD(&rlun->prio_list);
> + INIT_LIST_HEAD(&rlun->open_list);
> + INIT_LIST_HEAD(&rlun->closed_list);
> + INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
> + spin_lock_init(&rlun->lock);
> + spin_lock_init(&rlun->rev_lock);
> +
> + return 0;
> }
>
> static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
> {
> struct nvm_dev *dev = rrpc->dev;
> struct rrpc_lun *rlun;
> - int i, j, ret = -EINVAL;
> + int i, ret = -EINVAL;
>
> if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
> pr_err("rrpc: number of pages per block too high.");
> return -EINVAL;
> }
>
> - spin_lock_init(&rrpc->rev_lock);
> -
> rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
> GFP_KERNEL);
> if (!rrpc->luns)
> @@ -1183,31 +1237,9 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
>
> rlun = &rrpc->luns[i];
> rlun->parent = lun;
> - rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
> - rrpc->dev->blks_per_lun);
> - if (!rlun->blocks) {
> - ret = -ENOMEM;
> + ret = rrpc_lun_init(rrpc, rlun, lun);
> + if (ret)
> goto err;
> - }
> -
> - for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
> - struct rrpc_block *rblk = &rlun->blocks[j];
> - struct nvm_block *blk = &lun->blocks[j];
> -
> - rblk->parent = blk;
> - rblk->rlun = rlun;
> - INIT_LIST_HEAD(&rblk->prio);
> - spin_lock_init(&rblk->lock);
> - }
> -
> - rlun->rrpc = rrpc;
> - INIT_LIST_HEAD(&rlun->prio_list);
> - INIT_LIST_HEAD(&rlun->open_list);
> - INIT_LIST_HEAD(&rlun->closed_list);
> -
> - INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
> - spin_lock_init(&rlun->lock);
> -
> rrpc->total_blocks += dev->blks_per_lun;
> rrpc->nr_sects += dev->sec_per_lun;
>
> @@ -1215,6 +1247,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
>
> return 0;
> err:
> + rrpc_luns_free(rrpc);
> return ret;
> }
>
> @@ -1288,15 +1321,16 @@ static sector_t rrpc_capacity(void *private)
> static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
> {
> struct nvm_dev *dev = rrpc->dev;
> + struct rrpc_lun *rlun = rblk->rlun;
> int offset;
> struct rrpc_addr *laddr;
> - u64 bpaddr, paddr, pladdr;
> + u64 paddr, pladdr, poffset;
>
> - bpaddr = block_to_rel_addr(rrpc, rblk);
> + poffset = lun_poffset(rlun->parent, dev);
> for (offset = 0; offset < dev->sec_per_blk; offset++) {
> - paddr = bpaddr + offset;
> + paddr = block_to_addr(rrpc, rblk) + offset;
>
> - pladdr = rrpc->rev_trans_map[paddr].addr;
> + pladdr = rlun->rev_trans_map[paddr - poffset].addr;
> if (pladdr == ADDR_EMPTY)
> continue;
>
> @@ -1405,9 +1439,6 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
> goto err;
> }
>
> - rrpc->poffset = dev->sec_per_lun * lun_begin;
> - rrpc->lun_offset = lun_begin;
> -
> ret = rrpc_core_init(rrpc);
> if (ret) {
> pr_err("nvm: rrpc: could not initialize core\n");
> diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
> index 2653484..590f1b7 100644
> --- a/drivers/lightnvm/rrpc.h
> +++ b/drivers/lightnvm/rrpc.h
> @@ -87,6 +87,10 @@ struct rrpc_lun {
>
> struct work_struct ws_gc;
>
> + /* store a reverse map for garbage collection */
> + struct rrpc_rev_addr *rev_trans_map;
> + spinlock_t rev_lock;
> +
> spinlock_t lock;
> };
>
> @@ -98,8 +102,6 @@ struct rrpc {
> struct gendisk *disk;
>
> sector_t soffset; /* logical sector offset */
> - u64 poffset; /* physical page offset */
> - int lun_offset;
>
> int nr_luns;
> struct rrpc_lun *luns;
> @@ -124,9 +126,6 @@ struct rrpc {
> * addresses are used when writing to the disk block device.
> */
> struct rrpc_addr *trans_map;
> - /* also store a reverse map for garbage collection */
> - struct rrpc_rev_addr *rev_trans_map;
> - spinlock_t rev_lock;
>
> struct rrpc_inflight inflights;
>
> diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
> index cdcb2cc..419a3db 100644
> --- a/include/linux/lightnvm.h
> +++ b/include/linux/lightnvm.h
> @@ -276,6 +276,7 @@ struct nvm_lun {
> spinlock_t lock;
>
> struct nvm_block *blocks;
> + void *private;
> };
>
> enum {
>

It seems like there is a race-condition when GC hits. It panics on
page_invalidation. You can test with the following fio script:

[global]
bs=4k
ioengine=libaio
iodepth=1
size=1G
direct=1
rw=randwrite
time_based
runtime=300

[p1]
numjobs=1
filename=/dev/test2

2016-03-22 10:17:14

by Wenwei Tao

[permalink] [raw]
Subject: Re: [PATCH v2] lightnvm: divide global reverse translation map into per lun

Seems it is the variable pg_offset in rrpc_page_invalidate that cause the panic.
It's a u64 variable we use its low 32bit to store the offset value,
leave the high 32bit in a undetermined status, this will cause
test_and_set_bit access unexpected memory, cause kernel panic.
It's should be initialized to zero before use.

I add some log to verify this assuption, bleow is the log

[ 485.260549] rrpc: paddr: 400 sec_per_blk: 100 offset: ffff880000000000
[ 485.260563] BUG: unable to handle kernel paging request at ffffba0000d4f230

offset value ffff880000000000 may be passed to test_and_set_bit instead of 0.

2016-03-21 18:25 GMT+08:00 Matias Bjørling <[email protected]>:
> On 03/20/2016 06:51 AM, Wenwei Tao wrote:
>>
>> Divide the target's global reverse translation map into per lun,
>> to prepare support for the non-continuous lun target creation.
>>
>> Signed-off-by: Wenwei Tao <[email protected]>
>> ---
>>
>> Changes since v1
>> -fix merge/rebase mistake in rrpc_block_map_update().
>> -remove variables poffset and lun_offset in rrpc structure
>> since no one use them now.
>>
>> drivers/lightnvm/rrpc.c | 181
>> +++++++++++++++++++++++++++--------------------
>> drivers/lightnvm/rrpc.h | 9 ++-
>> include/linux/lightnvm.h | 1 +
>> 3 files changed, 111 insertions(+), 80 deletions(-)
>>
>> diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
>> index 3ab6495..88e2ce5 100644
>> --- a/drivers/lightnvm/rrpc.c
>> +++ b/drivers/lightnvm/rrpc.c
>> @@ -23,28 +23,35 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct
>> bio *bio,
>> struct nvm_rq *rqd, unsigned long flags);
>>
>> #define rrpc_for_each_lun(rrpc, rlun, i) \
>> - for ((i) = 0, rlun = &(rrpc)->luns[0]; \
>> - (i) < (rrpc)->nr_luns; (i)++, rlun =
>> &(rrpc)->luns[(i)])
>> + for ((i) = 0, rlun = &(rrpc)->luns[0]; \
>> + (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
>> +
>> +static inline u64 lun_poffset(struct nvm_lun *lun, struct nvm_dev *dev)
>> +{
>> + return lun->id * dev->sec_per_lun;
>> +}
>>
>> static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
>> {
>> struct rrpc_block *rblk = a->rblk;
>> - unsigned int pg_offset;
>> + struct rrpc_lun *rlun = rblk->rlun;
>> + u64 pg_offset;
>>
>> - lockdep_assert_held(&rrpc->rev_lock);
>> + lockdep_assert_held(&rlun->rev_lock);
>>
>> if (a->addr == ADDR_EMPTY || !rblk)
>> return;
>>
>> spin_lock(&rblk->lock);
>>
>> - div_u64_rem(a->addr, rrpc->dev->sec_per_blk, &pg_offset);
>> + div_u64_rem(a->addr, rrpc->dev->sec_per_blk, (u32 *)&pg_offset);
>> WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
>> rblk->nr_invalid_pages++;
>>
>> spin_unlock(&rblk->lock);
>>
>> - rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
>> + pg_offset = lun_poffset(rlun->parent, rrpc->dev);
>> + rlun->rev_trans_map[a->addr - pg_offset].addr = ADDR_EMPTY;
>> }
>>
>> static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
>> @@ -52,14 +59,15 @@ static void rrpc_invalidate_range(struct rrpc *rrpc,
>> sector_t slba,
>> {
>> sector_t i;
>>
>> - spin_lock(&rrpc->rev_lock);
>> for (i = slba; i < slba + len; i++) {
>> struct rrpc_addr *gp = &rrpc->trans_map[i];
>> + struct rrpc_lun *rlun = gp->rblk->rlun;
>>
>> + spin_lock(&rlun->rev_lock);
>> rrpc_page_invalidate(rrpc, gp);
>> + spin_unlock(&rlun->rev_lock);
>> gp->rblk = NULL;
>> }
>> - spin_unlock(&rrpc->rev_lock);
>> }
>>
>> static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
>> @@ -116,15 +124,6 @@ static int block_is_full(struct rrpc *rrpc, struct
>> rrpc_block *rblk)
>> return (rblk->next_page == rrpc->dev->sec_per_blk);
>> }
>>
>> -/* Calculate relative addr for the given block, considering instantiated
>> LUNs */
>> -static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
>> -{
>> - struct nvm_block *blk = rblk->parent;
>> - int lun_blk = blk->id % (rrpc->dev->blks_per_lun * rrpc->nr_luns);
>> -
>> - return lun_blk * rrpc->dev->sec_per_blk;
>> -}
>> -
>> /* Calculate global addr for the given block */
>> static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
>> {
>> @@ -291,13 +290,14 @@ static void rrpc_end_sync_bio(struct bio *bio)
>> static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block
>> *rblk)
>> {
>> struct request_queue *q = rrpc->dev->q;
>> + struct rrpc_lun *rlun = rblk->rlun;
>> struct rrpc_rev_addr *rev;
>> struct nvm_rq *rqd;
>> struct bio *bio;
>> struct page *page;
>> int slot;
>> int nr_sec_per_blk = rrpc->dev->sec_per_blk;
>> - u64 phys_addr;
>> + u64 phys_addr, poffset;
>> DECLARE_COMPLETION_ONSTACK(wait);
>>
>> if (bitmap_full(rblk->invalid_pages, nr_sec_per_blk))
>> @@ -315,6 +315,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc,
>> struct rrpc_block *rblk)
>> return -ENOMEM;
>> }
>>
>> + poffset = lun_poffset(rlun->parent, rrpc->dev);
>> while ((slot = find_first_zero_bit(rblk->invalid_pages,
>> nr_sec_per_blk)) <
>> nr_sec_per_blk) {
>>
>> @@ -322,23 +323,23 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc,
>> struct rrpc_block *rblk)
>> phys_addr = rblk->parent->id * nr_sec_per_blk + slot;
>>
>> try:
>> - spin_lock(&rrpc->rev_lock);
>> + spin_lock(&rlun->rev_lock);
>> /* Get logical address from physical to logical table */
>> - rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
>> + rev = &rlun->rev_trans_map[phys_addr - poffset];
>> /* already updated by previous regular write */
>> if (rev->addr == ADDR_EMPTY) {
>> - spin_unlock(&rrpc->rev_lock);
>> + spin_unlock(&rlun->rev_lock);
>> continue;
>> }
>>
>> rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
>> if (IS_ERR_OR_NULL(rqd)) {
>> - spin_unlock(&rrpc->rev_lock);
>> + spin_unlock(&rlun->rev_lock);
>> schedule();
>> goto try;
>> }
>>
>> - spin_unlock(&rrpc->rev_lock);
>> + spin_unlock(&rlun->rev_lock);
>>
>> /* Perform read to do GC */
>> bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
>> @@ -407,7 +408,7 @@ static void rrpc_block_gc(struct work_struct *work)
>> struct rrpc_block *rblk = gcb->rblk;
>> struct nvm_dev *dev = rrpc->dev;
>> struct nvm_lun *lun = rblk->parent->lun;
>> - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
>> + struct rrpc_lun *rlun = lun->private;
>>
>> mempool_free(gcb, rrpc->gcb_pool);
>> pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
>> @@ -510,7 +511,7 @@ static void rrpc_gc_queue(struct work_struct *work)
>> struct rrpc_block *rblk = gcb->rblk;
>> struct nvm_lun *lun = rblk->parent->lun;
>> struct nvm_block *blk = rblk->parent;
>> - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
>> + struct rrpc_lun *rlun = lun->private;
>>
>> spin_lock(&rlun->lock);
>> list_add_tail(&rblk->prio, &rlun->prio_list);
>> @@ -561,22 +562,25 @@ static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc
>> *rrpc, int is_gc)
>> static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t
>> laddr,
>> struct rrpc_block *rblk, u64
>> paddr)
>> {
>> + struct rrpc_lun *rlun = rblk->rlun;
>> struct rrpc_addr *gp;
>> struct rrpc_rev_addr *rev;
>> + u64 poffset = lun_poffset(rlun->parent, rrpc->dev);
>>
>> BUG_ON(laddr >= rrpc->nr_sects);
>>
>> gp = &rrpc->trans_map[laddr];
>> - spin_lock(&rrpc->rev_lock);
>> + spin_lock(&rlun->rev_lock);
>> +
>> if (gp->rblk)
>> rrpc_page_invalidate(rrpc, gp);
>>
>> gp->addr = paddr;
>> gp->rblk = rblk;
>>
>> - rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
>> + rev = &rlun->rev_trans_map[gp->addr - poffset];
>> rev->addr = laddr;
>> - spin_unlock(&rrpc->rev_lock);
>> + spin_unlock(&rlun->rev_lock);
>>
>> return gp;
>> }
>> @@ -990,7 +994,6 @@ static int rrpc_gc_init(struct rrpc *rrpc)
>>
>> static void rrpc_map_free(struct rrpc *rrpc)
>> {
>> - vfree(rrpc->rev_trans_map);
>> vfree(rrpc->trans_map);
>> }
>>
>> @@ -998,8 +1001,8 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64
>> *entries, void *private)
>> {
>> struct rrpc *rrpc = (struct rrpc *)private;
>> struct nvm_dev *dev = rrpc->dev;
>> - struct rrpc_addr *addr = rrpc->trans_map + slba;
>> - struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
>> + struct rrpc_addr *addr;
>> + struct rrpc_rev_addr *raddr;
>> u64 elba = slba + nlb;
>> u64 i;
>>
>> @@ -1008,9 +1011,15 @@ static int rrpc_l2p_update(u64 slba, u32 nlb,
>> __le64 *entries, void *private)
>> return -EINVAL;
>> }
>>
>> + slba -= rrpc->soffset >> (ilog2(dev->sec_size) - 9);
>> + addr = rrpc->trans_map + slba;
>> for (i = 0; i < nlb; i++) {
>> + struct rrpc_lun *rlun;
>> + struct nvm_lun *lun;
>> u64 pba = le64_to_cpu(entries[i]);
>> - unsigned int mod;
>> + u64 poffset;
>> + int lunid;
>> +
>> /* LNVM treats address-spaces as silos, LBA and PBA are
>> * equally large and zero-indexed.
>> */
>> @@ -1026,10 +1035,18 @@ static int rrpc_l2p_update(u64 slba, u32 nlb,
>> __le64 *entries, void *private)
>> if (!pba)
>> continue;
>>
>> - div_u64_rem(pba, rrpc->nr_sects, &mod);
>> + lunid = div_u64(pba, dev->sec_per_lun);
>> + lun = dev->mt->get_lun(dev, lunid);
>> +
>> + if (unlikely(!lun))
>> + return -EINVAL;
>> +
>> + rlun = lun->private;
>> + raddr = rlun->rev_trans_map;
>> + poffset = lun_poffset(lun, dev);
>>
>> addr[i].addr = pba;
>> - raddr[mod].addr = slba + i;
>> + raddr[pba - poffset].addr = slba + i;
>> }
>>
>> return 0;
>> @@ -1048,17 +1065,10 @@ static int rrpc_map_init(struct rrpc *rrpc)
>> if (!rrpc->trans_map)
>> return -ENOMEM;
>>
>> - rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
>> - * rrpc->nr_sects);
>> - if (!rrpc->rev_trans_map)
>> - return -ENOMEM;
>> -
>> for (i = 0; i < rrpc->nr_sects; i++) {
>> struct rrpc_addr *p = &rrpc->trans_map[i];
>> - struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];
>>
>> p->addr = ADDR_EMPTY;
>> - r->addr = ADDR_EMPTY;
>> }
>>
>> if (!dev->ops->get_l2p_tbl)
>> @@ -1143,25 +1153,69 @@ static void rrpc_luns_free(struct rrpc *rrpc)
>> if (!lun)
>> break;
>> dev->mt->release_lun(dev, lun->id);
>> + vfree(rlun->rev_trans_map);
>> vfree(rlun->blocks);
>> }
>>
>> kfree(rrpc->luns);
>> + rrpc->luns = NULL;
>> +}
>> +
>> +static int rrpc_lun_init(struct rrpc *rrpc, struct rrpc_lun *rlun,
>> + struct nvm_lun *lun)
>> +{
>> + struct nvm_dev *dev = rrpc->dev;
>> + int i;
>> +
>> + rlun->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) *
>> + dev->sec_per_lun);
>> + if (!rlun->rev_trans_map)
>> + return -ENOMEM;
>> +
>> + for (i = 0; i < dev->sec_per_lun; i++) {
>> + struct rrpc_rev_addr *r = &rlun->rev_trans_map[i];
>> +
>> + r->addr = ADDR_EMPTY;
>> + }
>> + rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
>> dev->blks_per_lun);
>> + if (!rlun->blocks) {
>> + vfree(rlun->rev_trans_map);
>> + return -ENOMEM;
>> + }
>> +
>> + for (i = 0; i < dev->blks_per_lun; i++) {
>> + struct rrpc_block *rblk = &rlun->blocks[i];
>> + struct nvm_block *blk = &lun->blocks[i];
>> +
>> + rblk->parent = blk;
>> + rblk->rlun = rlun;
>> + INIT_LIST_HEAD(&rblk->prio);
>> + spin_lock_init(&rblk->lock);
>> + }
>> +
>> + rlun->rrpc = rrpc;
>> + lun->private = rlun;
>> + INIT_LIST_HEAD(&rlun->prio_list);
>> + INIT_LIST_HEAD(&rlun->open_list);
>> + INIT_LIST_HEAD(&rlun->closed_list);
>> + INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
>> + spin_lock_init(&rlun->lock);
>> + spin_lock_init(&rlun->rev_lock);
>> +
>> + return 0;
>> }
>>
>> static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
>> {
>> struct nvm_dev *dev = rrpc->dev;
>> struct rrpc_lun *rlun;
>> - int i, j, ret = -EINVAL;
>> + int i, ret = -EINVAL;
>>
>> if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG)
>> {
>> pr_err("rrpc: number of pages per block too high.");
>> return -EINVAL;
>> }
>>
>> - spin_lock_init(&rrpc->rev_lock);
>> -
>> rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
>>
>> GFP_KERNEL);
>> if (!rrpc->luns)
>> @@ -1183,31 +1237,9 @@ static int rrpc_luns_init(struct rrpc *rrpc, int
>> lun_begin, int lun_end)
>>
>> rlun = &rrpc->luns[i];
>> rlun->parent = lun;
>> - rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
>> - rrpc->dev->blks_per_lun);
>> - if (!rlun->blocks) {
>> - ret = -ENOMEM;
>> + ret = rrpc_lun_init(rrpc, rlun, lun);
>> + if (ret)
>> goto err;
>> - }
>> -
>> - for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
>> - struct rrpc_block *rblk = &rlun->blocks[j];
>> - struct nvm_block *blk = &lun->blocks[j];
>> -
>> - rblk->parent = blk;
>> - rblk->rlun = rlun;
>> - INIT_LIST_HEAD(&rblk->prio);
>> - spin_lock_init(&rblk->lock);
>> - }
>> -
>> - rlun->rrpc = rrpc;
>> - INIT_LIST_HEAD(&rlun->prio_list);
>> - INIT_LIST_HEAD(&rlun->open_list);
>> - INIT_LIST_HEAD(&rlun->closed_list);
>> -
>> - INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
>> - spin_lock_init(&rlun->lock);
>> -
>> rrpc->total_blocks += dev->blks_per_lun;
>> rrpc->nr_sects += dev->sec_per_lun;
>>
>> @@ -1215,6 +1247,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int
>> lun_begin, int lun_end)
>>
>> return 0;
>> err:
>> + rrpc_luns_free(rrpc);
>> return ret;
>> }
>>
>> @@ -1288,15 +1321,16 @@ static sector_t rrpc_capacity(void *private)
>> static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block
>> *rblk)
>> {
>> struct nvm_dev *dev = rrpc->dev;
>> + struct rrpc_lun *rlun = rblk->rlun;
>> int offset;
>> struct rrpc_addr *laddr;
>> - u64 bpaddr, paddr, pladdr;
>> + u64 paddr, pladdr, poffset;
>>
>> - bpaddr = block_to_rel_addr(rrpc, rblk);
>> + poffset = lun_poffset(rlun->parent, dev);
>> for (offset = 0; offset < dev->sec_per_blk; offset++) {
>> - paddr = bpaddr + offset;
>> + paddr = block_to_addr(rrpc, rblk) + offset;
>>
>> - pladdr = rrpc->rev_trans_map[paddr].addr;
>> + pladdr = rlun->rev_trans_map[paddr - poffset].addr;
>> if (pladdr == ADDR_EMPTY)
>> continue;
>>
>> @@ -1405,9 +1439,6 @@ static void *rrpc_init(struct nvm_dev *dev, struct
>> gendisk *tdisk,
>> goto err;
>> }
>>
>> - rrpc->poffset = dev->sec_per_lun * lun_begin;
>> - rrpc->lun_offset = lun_begin;
>> -
>> ret = rrpc_core_init(rrpc);
>> if (ret) {
>> pr_err("nvm: rrpc: could not initialize core\n");
>> diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
>> index 2653484..590f1b7 100644
>> --- a/drivers/lightnvm/rrpc.h
>> +++ b/drivers/lightnvm/rrpc.h
>> @@ -87,6 +87,10 @@ struct rrpc_lun {
>>
>> struct work_struct ws_gc;
>>
>> + /* store a reverse map for garbage collection */
>> + struct rrpc_rev_addr *rev_trans_map;
>> + spinlock_t rev_lock;
>> +
>> spinlock_t lock;
>> };
>>
>> @@ -98,8 +102,6 @@ struct rrpc {
>> struct gendisk *disk;
>>
>> sector_t soffset; /* logical sector offset */
>> - u64 poffset; /* physical page offset */
>> - int lun_offset;
>>
>> int nr_luns;
>> struct rrpc_lun *luns;
>> @@ -124,9 +126,6 @@ struct rrpc {
>> * addresses are used when writing to the disk block device.
>> */
>> struct rrpc_addr *trans_map;
>> - /* also store a reverse map for garbage collection */
>> - struct rrpc_rev_addr *rev_trans_map;
>> - spinlock_t rev_lock;
>>
>> struct rrpc_inflight inflights;
>>
>> diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
>> index cdcb2cc..419a3db 100644
>> --- a/include/linux/lightnvm.h
>> +++ b/include/linux/lightnvm.h
>> @@ -276,6 +276,7 @@ struct nvm_lun {
>> spinlock_t lock;
>>
>> struct nvm_block *blocks;
>> + void *private;
>> };
>>
>> enum {
>>
>
> It seems like there is a race-condition when GC hits. It panics on
> page_invalidation. You can test with the following fio script:
>
> [global]
> bs=4k
> ioengine=libaio
> iodepth=1
> size=1G
> direct=1
> rw=randwrite
> time_based
> runtime=300
>
> [p1]
> numjobs=1
> filename=/dev/test2
>