Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753931AbbHGOaK (ORCPT ); Fri, 7 Aug 2015 10:30:10 -0400 Received: from mail-wi0-f180.google.com ([209.85.212.180]:38509 "EHLO mail-wi0-f180.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753894AbbHGOaD (ORCPT ); Fri, 7 Aug 2015 10:30:03 -0400 From: "=?UTF-8?q?Matias=20Bj=C3=B8rling?=" X-Google-Original-From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= To: hch@infradead.org, axboe@fb.com, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvme@lists.infradead.org Cc: jg@lightnvm.io, Stephen.Bates@pmcs.com, keith.busch@intel.com, =?UTF-8?q?Matias=20Bj=C3=B8rling?= Subject: [PATCH v7 4/5] null_nvm: Lightnvm test driver Date: Fri, 7 Aug 2015 16:29:50 +0200 Message-Id: <1438957791-24632-5-git-send-email-mb@lightnvm.io> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1438957791-24632-1-git-send-email-mb@lightnvm.io> References: <1438957791-24632-1-git-send-email-mb@lightnvm.io> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 13182 Lines: 530 This driver implements the I/O flow for a LightNVM device driver. It does no transfers. It can be used to test setup/teardown of devices and evaluating performance of block managers and targets. The framework of the driver is derived from the null_blk module. Signed-off-by: Matias Bjørling --- drivers/lightnvm/Kconfig | 3 + drivers/lightnvm/Makefile | 1 + drivers/lightnvm/null_nvm.c | 481 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 485 insertions(+) create mode 100644 drivers/lightnvm/null_nvm.c diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 37b00ae..a80eef5 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -30,4 +30,7 @@ config NVM_BM_HB while keeping data placement and garbage collection decisions on the host. +config NVM_NULL_NVM + tristate "Null test LightNVM driver" + endif # NVM diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile index 9ff4669..1892d73 100644 --- a/drivers/lightnvm/Makefile +++ b/drivers/lightnvm/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_NVM) := core.o obj-$(CONFIG_NVM_RRPC) += rrpc.o obj-$(CONFIG_NVM_BM_HB) += bm_hb.o +obj-$(CONFIG_NVM_NULL_NVM) += null_nvm.o diff --git a/drivers/lightnvm/null_nvm.c b/drivers/lightnvm/null_nvm.c new file mode 100644 index 0000000..05f1cbd --- /dev/null +++ b/drivers/lightnvm/null_nvm.c @@ -0,0 +1,481 @@ +/* + * derived from Jens Axboe's block/null_blk.c + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +static struct kmem_cache *ppa_cache; +struct nulln_cmd { + struct llist_node ll_list; + struct request *rq; +}; + +struct nulln { + struct list_head list; + unsigned int index; + struct request_queue *q; + struct blk_mq_tag_set tag_set; + struct hrtimer timer; + char disk_name[DISK_NAME_LEN]; +}; + +static LIST_HEAD(nulln_list); +static struct mutex nulln_lock; +static int nulln_indexes; + +struct completion_queue { + struct llist_head list; + struct hrtimer timer; +}; + +/* + * These are per-cpu for now, they will need to be configured by the + * complete_queues parameter and appropriately mapped. + */ +static DEFINE_PER_CPU(struct completion_queue, completion_queues); + +enum { + NULL_IRQ_NONE = 0, + NULL_IRQ_SOFTIRQ = 1, + NULL_IRQ_TIMER = 2, +}; + +static int submit_queues; +module_param(submit_queues, int, S_IRUGO); +MODULE_PARM_DESC(submit_queues, "Number of submission queues"); + +static int home_node = NUMA_NO_NODE; +module_param(home_node, int, S_IRUGO); +MODULE_PARM_DESC(home_node, "Home node for the device"); + +static int null_param_store_val(const char *str, int *val, int min, int max) +{ + int ret, new_val; + + ret = kstrtoint(str, 10, &new_val); + if (ret) + return -EINVAL; + + if (new_val < min || new_val > max) + return -EINVAL; + + *val = new_val; + return 0; +} + +static int gb = 250; +module_param(gb, int, S_IRUGO); +MODULE_PARM_DESC(gb, "Size in GB"); + +static int bs = 4096; +module_param(bs, int, S_IRUGO); +MODULE_PARM_DESC(bs, "Block size (in bytes)"); + +static int nr_devices = 1; +module_param(nr_devices, int, S_IRUGO); +MODULE_PARM_DESC(nr_devices, "Number of devices to register"); + +static int irqmode = NULL_IRQ_SOFTIRQ; + +static int null_set_irqmode(const char *str, const struct kernel_param *kp) +{ + return null_param_store_val(str, &irqmode, NULL_IRQ_NONE, + NULL_IRQ_TIMER); +} + +static const struct kernel_param_ops null_irqmode_param_ops = { + .set = null_set_irqmode, + .get = param_get_int, +}; + +device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO); +MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); + +static int completion_nsec = 10000; +module_param(completion_nsec, int, S_IRUGO); +MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); + +static int hw_queue_depth = 64; +module_param(hw_queue_depth, int, S_IRUGO); +MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); + +static bool use_per_node_hctx; +module_param(use_per_node_hctx, bool, S_IRUGO); +MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); + +static int num_channels = 1; +module_param(num_channels, int, S_IRUGO); +MODULE_PARM_DESC(num_channels, "Number of channels to be exposed. Default: 1"); + +static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) +{ + struct completion_queue *cq; + struct llist_node *entry; + struct nulln_cmd *cmd; + + cq = &per_cpu(completion_queues, smp_processor_id()); + + while ((entry = llist_del_all(&cq->list)) != NULL) { + entry = llist_reverse_order(entry); + do { + cmd = container_of(entry, struct nulln_cmd, ll_list); + entry = entry->next; + blk_mq_end_request(cmd->rq, 0); + + if (cmd->rq) { + struct request_queue *q = cmd->rq->q; + + if (!q->mq_ops && blk_queue_stopped(q)) { + spin_lock(q->queue_lock); + if (blk_queue_stopped(q)) + blk_start_queue(q); + spin_unlock(q->queue_lock); + } + } + } while (entry); + } + + return HRTIMER_NORESTART; +} + +static void null_cmd_end_timer(struct nulln_cmd *cmd) +{ + struct completion_queue *cq = &per_cpu(completion_queues, get_cpu()); + + cmd->ll_list.next = NULL; + if (llist_add(&cmd->ll_list, &cq->list)) { + ktime_t kt = ktime_set(0, completion_nsec); + + hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED); + } + + put_cpu(); +} + +static void null_softirq_done_fn(struct request *rq) +{ + blk_mq_end_request(rq, 0); +} + +static inline void null_handle_cmd(struct nulln_cmd *cmd) +{ + /* Complete IO by inline, softirq or timer */ + switch (irqmode) { + case NULL_IRQ_SOFTIRQ: + case NULL_IRQ_NONE: + blk_mq_complete_request(cmd->rq); + break; + case NULL_IRQ_TIMER: + null_cmd_end_timer(cmd); + break; + } +} + +static int null_id(struct request_queue *q, struct nvm_id *id) +{ + sector_t size = gb * 1024 * 1024 * 1024ULL; + unsigned long per_chnl_size = + size / bs / num_channels; + struct nvm_id_chnl *chnl; + int i; + + id->ver_id = 0x1; + id->nvm_type = NVM_NVMT_BLK; + id->nchannels = num_channels; + + id->chnls = kmalloc_array(id->nchannels, sizeof(struct nvm_id_chnl), + GFP_KERNEL); + if (!id->chnls) + return -ENOMEM; + + for (i = 0; i < id->nchannels; i++) { + chnl = &id->chnls[i]; + chnl->queue_size = hw_queue_depth; + chnl->gran_read = bs; + chnl->gran_write = bs; + chnl->gran_erase = bs * 256; + chnl->oob_size = 0; + chnl->t_r = chnl->t_sqr = 25000; /* 25us */ + chnl->t_w = chnl->t_sqw = 500000; /* 500us */ + chnl->t_e = 1500000; /* 1.500us */ + chnl->io_sched = NVM_IOSCHED_CHANNEL; + chnl->laddr_begin = per_chnl_size * i; + chnl->laddr_end = per_chnl_size * (i + 1) - 1; + } + + return 0; +} + +static int null_get_features(struct request_queue *q, + struct nvm_get_features *gf) +{ + gf->rsp = NVM_RSP_L2P; + gf->ext = 0; + + return 0; +} + +static void null_end_io(struct request *rq, int error) +{ + struct nvm_rq *rqd = rq->end_io_data; + struct nvm_tgt_instance *ins = rqd->ins; + + ins->tt->end_io(rq->end_io_data, error); + + blk_put_request(rq); +} + +static int null_submit_io(struct request_queue *q, struct nvm_rq *rqd) +{ + struct request *rq; + struct bio *bio = rqd->bio; + + rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0); + if (IS_ERR(rq)) + return -ENOMEM; + + rq->cmd_type = REQ_TYPE_DRV_PRIV; + rq->__sector = bio->bi_iter.bi_sector; + rq->ioprio = bio_prio(bio); + + if (bio_has_data(bio)) + rq->nr_phys_segments = bio_phys_segments(q, bio); + + rq->__data_len = bio->bi_iter.bi_size; + rq->bio = rq->biotail = bio; + + rq->end_io_data = rqd; + + blk_execute_rq_nowait(q, NULL, rq, 0, null_end_io); + + return 0; +} + +static void *null_create_ppa_pool(struct request_queue *q) +{ + mempool_t *virtmem_pool; + + ppa_cache = kmem_cache_create("ppa_list", PAGE_SIZE, 0, 0, NULL); + if (!ppa_cache) { + pr_err("null_nvm: Unable to craete kmem cache\n"); + return NULL; + } + + virtmem_pool = mempool_create_slab_pool(64, ppa_cache); + if (!virtmem_pool) { + pr_err("null_nvm: Unable to create virtual memory pool\n"); + return NULL; + } + + return virtmem_pool; +} + +static void null_destroy_ppa_pool(void *pool) +{ + mempool_t *virtmem_pool = pool; + + mempool_destroy(virtmem_pool); +} + +static void *null_alloc_ppalist(struct request_queue *q, void *pool, + gfp_t mem_flags, dma_addr_t *dma_handler) +{ + + struct sector_t *ppa_list; + mempool_t *virtmem_pool = pool; + + ppa_list = mempool_alloc(virtmem_pool, mem_flags); + if (!ppa_list) { + pr_err("null_nvm: Unable to allocate virtual memory\n"); + return NULL; + } + + return ppa_list; +} + +static void null_free_ppalist(void *pool, void *ppa_list, + dma_addr_t dma_handler) +{ + mempool_t *virtmem_pool = pool; + + mempool_free(ppa_list, virtmem_pool); +} + +static struct nvm_dev_ops nulln_dev_ops = { + .identify = null_id, + + .get_features = null_get_features, + + .submit_io = null_submit_io, + + .create_ppa_pool = null_create_ppa_pool, + .destroy_ppa_pool = null_destroy_ppa_pool, + .alloc_ppalist = null_alloc_ppalist, + .free_ppalist = null_free_ppalist, + + /* Emulate nvme protocol */ + .max_phys_sect = 64, +}; + +static int null_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct nulln_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + + cmd->rq = bd->rq; + + blk_mq_start_request(bd->rq); + + null_handle_cmd(cmd); + return BLK_MQ_RQ_QUEUE_OK; +} + +static struct blk_mq_ops null_mq_ops = { + .queue_rq = null_queue_rq, + .map_queue = blk_mq_map_queue, + .complete = null_softirq_done_fn, +}; + +static void null_del_dev(struct nulln *nulln) +{ + list_del_init(&nulln->list); + + nvm_unregister(nulln->disk_name); + + blk_cleanup_queue(nulln->q); + blk_mq_free_tag_set(&nulln->tag_set); + kfree(nulln); +} + +static int null_add_dev(void) +{ + struct nulln *nulln; + int rv; + + nulln = kzalloc_node(sizeof(*nulln), GFP_KERNEL, home_node); + if (!nulln) { + rv = -ENOMEM; + goto out; + } + + if (use_per_node_hctx) + submit_queues = nr_online_nodes; + + nulln->tag_set.ops = &null_mq_ops; + nulln->tag_set.nr_hw_queues = submit_queues; + nulln->tag_set.queue_depth = hw_queue_depth; + nulln->tag_set.numa_node = home_node; + nulln->tag_set.cmd_size = sizeof(struct nulln_cmd); + nulln->tag_set.driver_data = nulln; + + rv = blk_mq_alloc_tag_set(&nulln->tag_set); + if (rv) + goto out_free_nulln; + + nulln->q = blk_mq_init_queue(&nulln->tag_set); + if (IS_ERR(nulln->q)) { + rv = -ENOMEM; + goto out_cleanup_tags; + } + + nulln->q->queuedata = nulln; + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nulln->q); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nulln->q); + + mutex_lock(&nulln_lock); + list_add_tail(&nulln->list, &nulln_list); + nulln->index = nulln_indexes++; + mutex_unlock(&nulln_lock); + + blk_queue_logical_block_size(nulln->q, bs); + blk_queue_physical_block_size(nulln->q, bs); + + sprintf(nulln->disk_name, "nulln%d", nulln->index); + + rv = nvm_register(nulln->q, nulln->disk_name, &nulln_dev_ops); + if (rv) + goto out_cleanup_blk_queue; + + return 0; + +out_cleanup_blk_queue: + blk_cleanup_queue(nulln->q); +out_cleanup_tags: + blk_mq_free_tag_set(&nulln->tag_set); +out_free_nulln: + kfree(nulln); +out: + return rv; +} + +static int __init null_init(void) +{ + unsigned int i; + + if (bs > PAGE_SIZE) { + pr_warn("null_nvm: invalid block size\n"); + pr_warn("null_nvm: defaults block size to %lu\n", PAGE_SIZE); + bs = PAGE_SIZE; + } + + if (use_per_node_hctx) { + if (submit_queues < nr_online_nodes) { + pr_warn("null_nvm: submit_queues param is set to %u.", + nr_online_nodes); + submit_queues = nr_online_nodes; + } + } else if (submit_queues > nr_cpu_ids) + submit_queues = nr_cpu_ids; + else if (!submit_queues) + submit_queues = 1; + + mutex_init(&nulln_lock); + + /* Initialize a separate list for each CPU for issuing softirqs */ + for_each_possible_cpu(i) { + struct completion_queue *cq = &per_cpu(completion_queues, i); + + init_llist_head(&cq->list); + + if (irqmode != NULL_IRQ_TIMER) + continue; + + hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cq->timer.function = null_cmd_timer_expired; + } + + for (i = 0; i < nr_devices; i++) { + if (null_add_dev()) + return -EINVAL; + } + + pr_info("null_nvm: module loaded\n"); + return 0; +} + +static void __exit null_exit(void) +{ + struct nulln *nulln; + + mutex_lock(&nulln_lock); + while (!list_empty(&nulln_list)) { + nulln = list_entry(nulln_list.next, struct nulln, list); + null_del_dev(nulln); + } + mutex_unlock(&nulln_lock); +} + +module_init(null_init); +module_exit(null_exit); + +MODULE_AUTHOR("Matias Bjorling "); +MODULE_LICENSE("GPL"); -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/