Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751434AbbGMEIq (ORCPT ); Mon, 13 Jul 2015 00:08:46 -0400 Received: from mx1.redhat.com ([209.132.183.28]:55460 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751247AbbGMEIo (ORCPT ); Mon, 13 Jul 2015 00:08:44 -0400 From: Bandan Das To: kvm@vger.kernel.org Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, mst@redhat.com, Eyal Moscovici , Razya Ladelsky , cgroups@vger.kernel.org, jasowang@redhat.com Subject: [RFC PATCH 2/4] vhost: Limit the number of devices served by a single worker thread Date: Mon, 13 Jul 2015 00:07:33 -0400 Message-Id: <1436760455-5686-3-git-send-email-bsd@redhat.com> In-Reply-To: <1436760455-5686-1-git-send-email-bsd@redhat.com> References: <1436760455-5686-1-git-send-email-bsd@redhat.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9185 Lines: 320 When the number of devices increase, the universal thread model (introduced in the preceding patch) may end up being the bottleneck. Moreover, a single worker thread also forces us to change cgroups based on the device we are serving. We introduce a worker pool struct that starts with one worker thread and we keep adding more threads when the numbers of devs reaches a certain threshold. The default value is set at 7 but is not based on any empirical data. The value can also be changed by the user with the devs_per_worker module parameter. Note that this patch doesn't change how cgroups work. We still keep moving around the worker thread to the cgroups of the device we are serving at the moment. Signed-off-by: Razya Ladelsky Signed-off-by: Bandan Das --- drivers/vhost/net.c | 6 +-- drivers/vhost/scsi.c | 3 +- drivers/vhost/vhost.c | 135 +++++++++++++++++++++++++++++++++++++++++--------- drivers/vhost/vhost.h | 13 ++++- 4 files changed, 128 insertions(+), 29 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 7d137a4..7bfa019 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -705,7 +705,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; } - vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); + if (vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX)) + return dev->err; vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); @@ -801,9 +802,6 @@ static int vhost_net_release(struct inode *inode, struct file *f) sockfd_put(rx_sock); /* Make sure no callbacks are outstanding */ synchronize_rcu_bh(); - /* We do an extra flush before freeing memory, - * since jobs can re-queue themselves. */ - vhost_net_flush(n); kfree(n->dev.vqs); kvfree(n); return 0; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 6c42936..97de2db 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1601,7 +1601,8 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vqs[i] = &vs->vqs[i].vq; vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ); + if (vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ)) + return vs->dev.err; vhost_scsi_init_inflight(vs, NULL); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 951c96b..6a5d4c0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -27,11 +27,19 @@ #include #include #include +#include #include "vhost.h" -/* Just one worker thread to service all devices */ -static struct vhost_worker *worker; +static int __read_mostly devs_per_worker = 7; +module_param(devs_per_worker, int, S_IRUGO); +MODULE_PARM_DESC(devs_per_worker, "Setup the number of devices being served by a worker thread"); + +/* Only used to give a unique id to a vhost thread at the moment */ +static unsigned int total_vhost_workers; + +/* Pool of vhost threads */ +static struct vhost_pool *vhost_pool; enum { VHOST_MEMORY_MAX_NREGIONS = 64, @@ -270,6 +278,63 @@ static int vhost_worker(void *data) return 0; } +static void vhost_create_worker(struct vhost_dev *dev) +{ + struct vhost_worker *worker; + struct vhost_pool *pool = vhost_pool; + + worker = kzalloc(sizeof(*worker), GFP_KERNEL); + if (!worker) { + dev->err = -ENOMEM; + return; + } + + worker->thread = kthread_create(vhost_worker, + worker, + "vhost-%d", + total_vhost_workers); + if (IS_ERR(worker->thread)) { + dev->err = PTR_ERR(worker->thread); + goto therror; + } + + spin_lock_init(&worker->work_lock); + INIT_LIST_HEAD(&worker->work_list); + list_add(&worker->node, &pool->workers); + worker->owner = NULL; + worker->num_devices++; + total_vhost_workers++; + dev->worker = worker; + dev->worker_assigned = true; + return; + +therror: + if (worker->thread) + kthread_stop(worker->thread); + kfree(worker); +} + +static int vhost_dev_assign_worker(struct vhost_dev *dev) +{ + struct vhost_worker *worker; + + mutex_lock(&vhost_pool->pool_lock); + list_for_each_entry(worker, &vhost_pool->workers, node) { + if (worker->num_devices < devs_per_worker) { + dev->worker = worker; + dev->worker_assigned = true; + worker->num_devices++; + break; + } + } + if (!dev->worker_assigned) + /* create a new worker */ + vhost_create_worker(dev); + mutex_unlock(&vhost_pool->pool_lock); + + return dev->err; +} + static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) { kfree(vq->indirect); @@ -311,7 +376,7 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) vhost_vq_free_iovecs(dev->vqs[i]); } -void vhost_dev_init(struct vhost_dev *dev, +int vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue **vqs, int nvqs) { struct vhost_virtqueue *vq; @@ -324,9 +389,14 @@ void vhost_dev_init(struct vhost_dev *dev, dev->log_file = NULL; dev->memory = NULL; dev->mm = NULL; - dev->worker = worker; + dev->worker = NULL; + dev->err = 0; + dev->worker_assigned = false; dev->owner = current; + if (vhost_dev_assign_worker(dev)) + goto done; + for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; vq->log = NULL; @@ -339,6 +409,9 @@ void vhost_dev_init(struct vhost_dev *dev, vhost_poll_init(&vq->poll, vq->handle_kick, POLLIN, dev); } + +done: + return dev->err; } EXPORT_SYMBOL_GPL(vhost_dev_init); @@ -370,7 +443,6 @@ long vhost_dev_set_owner(struct vhost_dev *dev) /* No owner, become one */ dev->mm = get_task_mm(current); - dev->worker = worker; err = vhost_dev_alloc_iovecs(dev); if (err) @@ -424,6 +496,24 @@ void vhost_dev_stop(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_stop); +static void vhost_deassign_worker(struct vhost_dev *dev) +{ + if (dev->worker) { + mutex_lock(&vhost_pool->pool_lock); + WARN_ON(dev->worker->num_devices <= 0); + if (!--dev->worker->num_devices) { + WARN_ON(!list_empty(&dev->worker->work_list)); + list_del(&dev->worker->node); + kthread_stop(dev->worker->thread); + dev->worker->thread = NULL; + kfree(dev->worker); + } + mutex_unlock(&vhost_pool->pool_lock); + } + + dev->worker = NULL; +} + /* Caller should have device mutex if and only if locked is set */ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) { @@ -452,6 +542,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) /* No one will access memory at this point */ kfree(dev->memory); dev->memory = NULL; + vhost_deassign_worker(dev); if (dev->mm) mmput(dev->mm); dev->mm = NULL; @@ -1542,31 +1633,29 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify); static int __init vhost_init(void) { - struct vhost_worker *w = - kzalloc(sizeof(*w), GFP_KERNEL); - if (!w) + struct vhost_pool *pool = + kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) return -ENOMEM; - - w->thread = kthread_create(vhost_worker, - w, "vhost-worker"); - if (IS_ERR(w->thread)) - return PTR_ERR(w->thread); - - worker = w; - spin_lock_init(&worker->work_lock); - INIT_LIST_HEAD(&worker->work_list); - wake_up_process(worker->thread); - pr_info("Created universal thread to service requests\n"); + mutex_init(&pool->pool_lock); + INIT_LIST_HEAD(&pool->workers); + vhost_pool = pool; return 0; } static void __exit vhost_exit(void) { - if (worker) { - kthread_stop(worker->thread); - WARN_ON(!list_empty(&worker->work_list)); - kfree(worker); + struct vhost_worker *worker; + + if (vhost_pool) { + list_for_each_entry(worker, &vhost_pool->workers, node) { + kthread_stop(worker->thread); + WARN_ON(!list_empty(&worker->work_list)); + list_del(&worker->node); + kfree(worker); + } + kfree(vhost_pool); } } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 2f204ce..a45193b 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -120,19 +120,30 @@ struct vhost_dev { struct vhost_worker *worker; /* for cgroup support */ struct task_struct *owner; + bool worker_assigned; + int err; }; struct vhost_worker { spinlock_t work_lock; + unsigned id; struct list_head work_list; struct task_struct *thread; struct task_struct *owner; + int num_devices; + struct list_head node; +}; + +struct vhost_pool { + struct work_struct work; + struct mutex pool_lock; + struct list_head workers; }; void vhost_work_queue(struct vhost_worker *worker, struct vhost_work *work); void vhost_work_flush(struct vhost_worker *worker, struct vhost_work *work); -void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); +int vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); long vhost_dev_set_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev); long vhost_dev_check_owner(struct vhost_dev *); -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/