Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S934937AbdC3WN3 (ORCPT ); Thu, 30 Mar 2017 18:13:29 -0400 Received: from ale.deltatee.com ([207.54.116.67]:44304 "EHLO ale.deltatee.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755143AbdC3WNX (ORCPT ); Thu, 30 Mar 2017 18:13:23 -0400 From: Logan Gunthorpe To: Christoph Hellwig , Sagi Grimberg , "James E.J. Bottomley" , "Martin K. Petersen" , Jens Axboe , Steve Wise , Stephen Bates , Max Gurtovoy , Dan Williams , Keith Busch , Jason Gunthorpe Cc: linux-pci@vger.kernel.org, linux-scsi@vger.kernel.org, linux-nvme@lists.infradead.org, linux-rdma@vger.kernel.org, linux-nvdimm@ml01.01.org, linux-kernel@vger.kernel.org, Logan Gunthorpe Date: Thu, 30 Mar 2017 16:12:38 -0600 Message-Id: <1490911959-5146-8-git-send-email-logang@deltatee.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1490911959-5146-1-git-send-email-logang@deltatee.com> References: <1490911959-5146-1-git-send-email-logang@deltatee.com> X-SA-Exim-Connect-IP: 172.16.1.31 X-SA-Exim-Rcpt-To: hch@lst.de, sagi@grimberg.me, jejb@linux.vnet.ibm.com, martin.petersen@oracle.com, axboe@kernel.dk, swise@opengridcomputing.com, sbates@raithlin.com, maxg@mellanox.com, dan.j.williams@intel.com, keith.busch@intel.com, jgunthorpe@obsidianresearch.com, linux-nvme@lists.infradead.org, linux-nvdimm@lists.01.org, linux-pci@vger.kernel.org, linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org, logang@deltatee.com X-SA-Exim-Mail-From: gunthorp@deltatee.com Subject: [RFC 7/8] p2pmem: Support device removal X-SA-Exim-Version: 4.2.1 (built Mon, 26 Dec 2011 16:24:06 +0000) X-SA-Exim-Scanned: Yes (on ale.deltatee.com) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9897 Lines: 351 This patch creates a list of callbacks to notify users of this memory that the p2pmem device is going away or gone. In nvmet-rdma, we disconnect any queue using p2p memory. The remote side will then automatically reconnect in a couple seconds and regular system memory (or a different p2pmem device) will be used. Signed-off-by: Logan Gunthorpe Signed-off-by: Stephen Bates Signed-off-by: Steve Wise --- drivers/memory/p2pmem.c | 75 ++++++++++++++++++++++++++++++++--- drivers/nvme/target/rdma.c | 98 ++++++++++++++++++++++++++-------------------- include/linux/p2pmem.h | 19 +++++++-- 3 files changed, 140 insertions(+), 52 deletions(-) diff --git a/drivers/memory/p2pmem.c b/drivers/memory/p2pmem.c index 71741c2..499d42c 100644 --- a/drivers/memory/p2pmem.c +++ b/drivers/memory/p2pmem.c @@ -105,6 +105,21 @@ static void p2pmem_release(struct device *dev) kfree(p); } +struct remove_callback { + struct list_head list; + void (*callback)(void *context); + void *context; +}; + +static void p2pmem_remove(struct p2pmem_dev *p) +{ + struct remove_callback *remove_call, *tmp; + + p->alive = false; + list_for_each_entry_safe(remove_call, tmp, &p->remove_list, list) + remove_call->callback(remove_call->context); +} + /** * p2pmem_create() - create a new p2pmem device * @parent: the parent device to create it under @@ -123,6 +138,10 @@ struct p2pmem_dev *p2pmem_create(struct device *parent) return ERR_PTR(-ENOMEM); init_completion(&p->cmp); + mutex_init(&p->remove_mutex); + INIT_LIST_HEAD(&p->remove_list); + p->alive = true; + device_initialize(&p->dev); p->dev.class = p2pmem_class; p->dev.parent = parent; @@ -187,6 +206,7 @@ void p2pmem_unregister(struct p2pmem_dev *p) dev_info(&p->dev, "unregistered"); device_del(&p->dev); + p2pmem_remove(p); ida_simple_remove(&p2pmem_ida, p->id); put_device(&p->dev); } @@ -291,6 +311,9 @@ EXPORT_SYMBOL(p2pmem_add_pci_region); */ void *p2pmem_alloc(struct p2pmem_dev *p, size_t size) { + if (!p->alive) + return NULL; + return (void *)gen_pool_alloc(p->pool, size); } EXPORT_SYMBOL(p2pmem_alloc); @@ -349,6 +372,9 @@ static int upstream_bridges_match(struct device *p2pmem, struct pci_dev *p2p_up; struct pci_dev *dma_up; + if (!to_p2pmem(p2pmem)->alive) + return false; + p2p_up = get_upstream_switch_port(p2pmem); if (!p2p_up) { dev_warn(p2pmem, "p2pmem is not behind a pci switch"); @@ -383,6 +409,8 @@ static int upstream_bridges_match(struct device *p2pmem, * specified devices * @dma_devices: a null terminated array of device pointers which * all must be compatible with the returned p2pmem device + * @remove_callback: this callback will be called if the p2pmem + * device is removed. * * For now, we only support cases where all the devices that * will transfer to the p2pmem device are on the same switch. @@ -400,9 +428,13 @@ static int upstream_bridges_match(struct device *p2pmem, * (use p2pmem_put to return the reference) or NULL if no compatible * p2pmem device is found. */ -struct p2pmem_dev *p2pmem_find_compat(struct device **dma_devices) +struct p2pmem_dev *p2pmem_find_compat(struct device **dma_devices, + void (*remove_callback)(void *context), + void *context) { struct device *dev; + struct p2pmem_dev *p; + struct remove_callback *remove_call; dev = class_find_device(p2pmem_class, NULL, dma_devices, upstream_bridges_match); @@ -410,21 +442,54 @@ struct p2pmem_dev *p2pmem_find_compat(struct device **dma_devices) if (!dev) return NULL; - return to_p2pmem(dev); + p = to_p2pmem(dev); + mutex_lock(&p->remove_mutex); + + if (!p->alive) { + p = NULL; + goto out; + } + + remove_call = kzalloc(sizeof(*remove_call), GFP_KERNEL); + remove_call->callback = remove_callback; + remove_call->context = context; + INIT_LIST_HEAD(&remove_call->list); + list_add(&remove_call->list, &p->remove_list); + +out: + mutex_unlock(&p->remove_mutex); + return p; } EXPORT_SYMBOL(p2pmem_find_compat); /** * p2pmem_put() - decrement a p2pmem device reference * @p: p2pmem device to return + * @data: data pointer that was passed to p2pmem_find_compat * * Dereference and free (if last) the device's reference counter. * It's safe to pass a NULL pointer to this function. */ -void p2pmem_put(struct p2pmem_dev *p) +void p2pmem_put(struct p2pmem_dev *p, void *context) { - if (p) - put_device(&p->dev); + struct remove_callback *remove_call; + + if (!p) + return; + + mutex_lock(&p->remove_mutex); + + list_for_each_entry(remove_call, &p->remove_list, list) { + if (remove_call->context != context) + continue; + + list_del(&remove_call->list); + kfree(remove_call); + break; + } + + mutex_unlock(&p->remove_mutex); + put_device(&p->dev); } EXPORT_SYMBOL(p2pmem_put); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index abab544..9ebcda6 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1008,7 +1008,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) !queue->host_qid); } nvmet_rdma_free_rsps(queue); - p2pmem_put(queue->p2pmem); + p2pmem_put(queue->p2pmem, queue); ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx); kfree(queue); } @@ -1204,6 +1204,58 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, return ret; } +static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) +{ + bool disconnect = false; + unsigned long flags; + + pr_debug("cm_id= %p queue->state= %d\n", queue->cm_id, queue->state); + + spin_lock_irqsave(&queue->state_lock, flags); + switch (queue->state) { + case NVMET_RDMA_Q_CONNECTING: + case NVMET_RDMA_Q_LIVE: + queue->state = NVMET_RDMA_Q_DISCONNECTING; + case NVMET_RDMA_IN_DEVICE_REMOVAL: + disconnect = true; + break; + case NVMET_RDMA_Q_DISCONNECTING: + break; + } + spin_unlock_irqrestore(&queue->state_lock, flags); + + if (disconnect) { + rdma_disconnect(queue->cm_id); + schedule_work(&queue->release_work); + } +} + +static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) +{ + bool disconnect = false; + + mutex_lock(&nvmet_rdma_queue_mutex); + if (!list_empty(&queue->queue_list)) { + list_del_init(&queue->queue_list); + disconnect = true; + } + mutex_unlock(&nvmet_rdma_queue_mutex); + + if (disconnect) + __nvmet_rdma_queue_disconnect(queue); +} + +static void nvmet_rdma_p2pmem_remove(void *context) +{ + struct nvmet_rdma_queue *queue = context; + + if (!queue->p2pmem) + return; + + nvmet_rdma_queue_disconnect(queue); + flush_scheduled_work(); +} + /* * If allow_p2pmem is set, we will try to use P2P memory for our * sgl lists. This requires the p2pmem device to be compatible with @@ -1241,7 +1293,8 @@ static void nvmet_rdma_queue_setup_p2pmem(struct nvmet_rdma_queue *queue) dma_devs[i++] = NULL; - queue->p2pmem = p2pmem_find_compat(dma_devs); + queue->p2pmem = p2pmem_find_compat(dma_devs, nvmet_rdma_p2pmem_remove, + queue); if (queue->p2pmem) pr_debug("using %s for rdma nvme target queue", @@ -1317,47 +1370,6 @@ static void nvmet_rdma_queue_established(struct nvmet_rdma_queue *queue) spin_unlock_irqrestore(&queue->state_lock, flags); } -static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) -{ - bool disconnect = false; - unsigned long flags; - - pr_debug("cm_id= %p queue->state= %d\n", queue->cm_id, queue->state); - - spin_lock_irqsave(&queue->state_lock, flags); - switch (queue->state) { - case NVMET_RDMA_Q_CONNECTING: - case NVMET_RDMA_Q_LIVE: - queue->state = NVMET_RDMA_Q_DISCONNECTING; - case NVMET_RDMA_IN_DEVICE_REMOVAL: - disconnect = true; - break; - case NVMET_RDMA_Q_DISCONNECTING: - break; - } - spin_unlock_irqrestore(&queue->state_lock, flags); - - if (disconnect) { - rdma_disconnect(queue->cm_id); - schedule_work(&queue->release_work); - } -} - -static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) -{ - bool disconnect = false; - - mutex_lock(&nvmet_rdma_queue_mutex); - if (!list_empty(&queue->queue_list)) { - list_del_init(&queue->queue_list); - disconnect = true; - } - mutex_unlock(&nvmet_rdma_queue_mutex); - - if (disconnect) - __nvmet_rdma_queue_disconnect(queue); -} - static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, struct nvmet_rdma_queue *queue) { diff --git a/include/linux/p2pmem.h b/include/linux/p2pmem.h index 4cd6f35..9365b02 100644 --- a/include/linux/p2pmem.h +++ b/include/linux/p2pmem.h @@ -22,12 +22,16 @@ struct p2pmem_dev { struct device dev; int id; + bool alive; struct percpu_ref ref; struct completion cmp; struct gen_pool *pool; struct dentry *debugfs_root; + + struct mutex remove_mutex; /* protects the remove callback list */ + struct list_head remove_list; }; #ifdef CONFIG_P2PMEM @@ -41,8 +45,12 @@ int p2pmem_add_pci_region(struct p2pmem_dev *p, struct pci_dev *pdev, int bar); void *p2pmem_alloc(struct p2pmem_dev *p, size_t size); void p2pmem_free(struct p2pmem_dev *p, void *addr, size_t size); -struct p2pmem_dev *p2pmem_find_compat(struct device **dma_devices); -void p2pmem_put(struct p2pmem_dev *p); +struct p2pmem_dev * +p2pmem_find_compat(struct device **dma_devices, + void (*unregister_callback)(void *context), + void *context); + +void p2pmem_put(struct p2pmem_dev *p, void *context); #else @@ -76,12 +84,15 @@ static inline void p2pmem_free(struct p2pmem_dev *p, void *addr, size_t size) { } -static inline struct p2pmem_dev *p2pmem_find_compat(struct device **dma_devs) +static inline struct p2pmem_dev * +p2pmem_find_compat(struct device **dma_devices, + void (*unregister_callback)(void *context), + void *context) { return NULL; } -static inline void p2pmem_put(struct p2pmem_dev *p) +static inline void p2pmem_put(struct p2pmem_dev *p, void *context) { } -- 2.1.4