This series implements virtio-scsi queue steering, which gives
performance improvements of up to 50% (measured both with QEMU and
tcm_vhost backends).
This version rebased on Rusty's virtio ring rework patches.
We hope this can go into virtio-next together with the virtio ring
rework pathes.
V5: improving the grammar of 1/5 (Paolo)
move the dropping of sg_elems to 'virtio-scsi: use virtqueue_add_sgs for command buffers'. (Asias)
V4: rebase on virtio ring rework patches (rusty's pending-rebases branch)
V3 and be found http://marc.info/?l=linux-virtualization&m=136067440717154&w=2
It would probably be easier to get it in via Rusty's tree
because of the prerequisites. James, can we get your Acked-by?
Paolo Bonzini (4):
virtio-scsi: redo allocation of target data
virtio-scsi: pass struct virtio_scsi to virtqueue completion function
virtio-scsi: push vq lock/unlock into virtscsi_vq_done
virtio-scsi: introduce multiqueue support
Wanlong Gao (1):
virtio-scsi: reset virtqueue affinity when doing cpu hotplug
drivers/scsi/virtio_scsi.c | 359 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 290 insertions(+), 69 deletions(-)
--
1.8.2.rc2
From: Paolo Bonzini <[email protected]>
Avoid duplicated code in all of the callers.
Cc: [email protected]
Signed-off-by: Paolo Bonzini <[email protected]>
Signed-off-by: Wanlong Gao <[email protected]>
Reviewed-by: Asias He <[email protected]>
---
drivers/scsi/virtio_scsi.c | 22 +++++++++-------------
1 file changed, 9 insertions(+), 13 deletions(-)
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index bcab9d7..94a64ad 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -167,28 +167,30 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
sc->scsi_done(sc);
}
-static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq,
+static void virtscsi_vq_done(struct virtio_scsi *vscsi,
+ struct virtio_scsi_vq *virtscsi_vq,
void (*fn)(struct virtio_scsi *vscsi, void *buf))
{
void *buf;
unsigned int len;
+ unsigned long flags;
+ struct virtqueue *vq = virtscsi_vq->vq;
+ spin_lock_irqsave(&virtscsi_vq->vq_lock, flags);
do {
virtqueue_disable_cb(vq);
while ((buf = virtqueue_get_buf(vq, &len)) != NULL)
fn(vscsi, buf);
} while (!virtqueue_enable_cb(vq));
+ spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags);
}
static void virtscsi_req_done(struct virtqueue *vq)
{
struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
struct virtio_scsi *vscsi = shost_priv(sh);
- unsigned long flags;
- spin_lock_irqsave(&vscsi->req_vq.vq_lock, flags);
- virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd);
- spin_unlock_irqrestore(&vscsi->req_vq.vq_lock, flags);
+ virtscsi_vq_done(vscsi, &vscsi->req_vq, virtscsi_complete_cmd);
};
static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf)
@@ -205,11 +207,8 @@ static void virtscsi_ctrl_done(struct virtqueue *vq)
{
struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
struct virtio_scsi *vscsi = shost_priv(sh);
- unsigned long flags;
- spin_lock_irqsave(&vscsi->ctrl_vq.vq_lock, flags);
- virtscsi_vq_done(vscsi, vq, virtscsi_complete_free);
- spin_unlock_irqrestore(&vscsi->ctrl_vq.vq_lock, flags);
+ virtscsi_vq_done(vscsi, &vscsi->ctrl_vq, virtscsi_complete_free);
};
static int virtscsi_kick_event(struct virtio_scsi *vscsi,
@@ -344,11 +343,8 @@ static void virtscsi_event_done(struct virtqueue *vq)
{
struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
struct virtio_scsi *vscsi = shost_priv(sh);
- unsigned long flags;
- spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags);
- virtscsi_vq_done(vscsi, vq, virtscsi_complete_event);
- spin_unlock_irqrestore(&vscsi->event_vq.vq_lock, flags);
+ virtscsi_vq_done(vscsi, &vscsi->event_vq, virtscsi_complete_event);
};
/**
--
1.8.2.rc2
From: Paolo Bonzini <[email protected]>
virtio_scsi_target_state is now empty. We will find new uses for it in
the next few patches, so this patch does not drop it completely.
However, having dropped the sglist flexible array member, we can turn
the tgt array-of-pointers into a simple array. This simplifies the
allocation.
Even simpler would be to place the virtio_scsi_target_state structs in a
flexible array member at the end of struct virtio_scsi. But we do not
do that, because we will place the virtqueues there in the next patches.
Cc: [email protected]
Signed-off-by: Paolo Bonzini <[email protected]>
Signed-off-by: Wanlong Gao <[email protected]>
---
drivers/scsi/virtio_scsi.c | 40 +++++++++++-----------------------------
1 file changed, 11 insertions(+), 29 deletions(-)
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index b53ba9e..3256c51 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -76,7 +76,7 @@ struct virtio_scsi {
/* Get some buffers ready for event vq */
struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN];
- struct virtio_scsi_target_state *tgt[];
+ struct virtio_scsi_target_state *tgt;
};
static struct kmem_cache *virtscsi_cmd_cache;
@@ -568,18 +568,9 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
virtscsi_vq->vq = vq;
}
-static struct virtio_scsi_target_state *virtscsi_alloc_tgt(
- struct virtio_device *vdev)
+static void virtscsi_init_tgt(struct virtio_scsi_target_state *tgt)
{
- struct virtio_scsi_target_state *tgt;
- gfp_t gfp_mask = GFP_KERNEL;
-
- tgt = kmalloc(sizeof(*tgt), gfp_mask);
- if (!tgt)
- return NULL;
-
spin_lock_init(&tgt->tgt_lock);
- return tgt;
}
static void virtscsi_scan(struct virtio_device *vdev)
@@ -593,17 +584,10 @@ static void virtscsi_remove_vqs(struct virtio_device *vdev)
{
struct Scsi_Host *sh = virtio_scsi_host(vdev);
struct virtio_scsi *vscsi = shost_priv(sh);
- u32 i, num_targets;
/* Stop all the virtqueues. */
vdev->config->reset(vdev);
-
- num_targets = sh->max_id;
- for (i = 0; i < num_targets; i++) {
- kfree(vscsi->tgt[i]);
- vscsi->tgt[i] = NULL;
- }
-
+ kfree(vscsi->tgt);
vdev->config->del_vqs(vdev);
}
@@ -640,13 +624,14 @@ static int virtscsi_init(struct virtio_device *vdev,
if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
virtscsi_kick_event_all(vscsi);
- for (i = 0; i < num_targets; i++) {
- vscsi->tgt[i] = virtscsi_alloc_tgt(vdev);
- if (!vscsi->tgt[i]) {
- err = -ENOMEM;
- goto out;
- }
+ vscsi->tgt = kmalloc(num_targets * sizeof(vscsi->tgt[0]), GFP_KERNEL);
+ if (!vscsi->tgt) {
+ err = -ENOMEM;
+ goto out;
}
+ for (i = 0; i < num_targets; i++)
+ virtscsi_init_tgt(&vscsi->tgt[i]);
+
err = 0;
out:
@@ -665,10 +650,7 @@ static int virtscsi_probe(struct virtio_device *vdev)
/* Allocate memory and link the structs together. */
num_targets = virtscsi_config_get(vdev, max_target) + 1;
- shost = scsi_host_alloc(&virtscsi_host_template,
- sizeof(*vscsi)
- + num_targets * sizeof(struct virtio_scsi_target_state));
-
+ shost = scsi_host_alloc(&virtscsi_host_template, sizeof(*vscsi));
if (!shost)
return -ENOMEM;
--
1.8.2.rc2
From: Paolo Bonzini <[email protected]>
This patch adds queue steering to virtio-scsi. When a target is sent
multiple requests, we always drive them to the same queue so that FIFO
processing order is kept. However, if a target was idle, we can choose
a queue arbitrarily. In this case the queue is chosen according to the
current VCPU, so the driver expects the number of request queues to be
equal to the number of VCPUs. This makes it easy and fast to select
the queue, and also lets the driver optimize the IRQ affinity for the
virtqueues (each virtqueue's affinity is set to the CPU that "owns"
the queue).
The speedup comes from improving cache locality and giving CPU affinity
to the virtqueues, which is why this scheme was selected. Assuming that
the thread that is sending requests to the device is I/O-bound, it is
likely to be sleeping at the time the ISR is executed, and thus executing
the ISR on the same processor that sent the requests is cheap.
However, the kernel will not execute the ISR on the "best" processor
unless you explicitly set the affinity. This is because in practice
you will have many such I/O-bound processes and thus many otherwise
idle processors. Then the kernel will execute the ISR on a random
processor, rather than the one that is sending requests to the device.
The alternative to per-CPU virtqueues is per-target virtqueues. To
achieve the same locality, we could dynamically choose the virtqueue's
affinity based on the CPU of the last task that sent a request. This
is less appealing because we do not set the affinity directly---we only
provide a hint to the irqbalanced running in userspace. Dynamically
changing the affinity only works if the userspace applies the hint
fast enough.
Cc: [email protected]
Signed-off-by: Paolo Bonzini <[email protected]>
Signed-off-by: Wanlong Gao <[email protected]>
Reviewed-by: Asias He <[email protected]>
---
drivers/scsi/virtio_scsi.c | 269 ++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 241 insertions(+), 28 deletions(-)
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 94a64ad..1d95295 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -22,12 +22,14 @@
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
#include <linux/virtio_scsi.h>
+#include <linux/cpu.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_cmnd.h>
#define VIRTIO_SCSI_MEMPOOL_SZ 64
#define VIRTIO_SCSI_EVENT_LEN 8
+#define VIRTIO_SCSI_VQ_BASE 2
/* Command queue element */
struct virtio_scsi_cmd {
@@ -59,24 +61,60 @@ struct virtio_scsi_vq {
struct virtqueue *vq;
};
-/* Per-target queue state */
+/*
+ * Per-target queue state.
+ *
+ * This struct holds the data needed by the queue steering policy. When a
+ * target is sent multiple requests, we need to drive them to the same queue so
+ * that FIFO processing order is kept. However, if a target was idle, we can
+ * choose a queue arbitrarily. In this case the queue is chosen according to
+ * the current VCPU, so the driver expects the number of request queues to be
+ * equal to the number of VCPUs. This makes it easy and fast to select the
+ * queue, and also lets the driver optimize the IRQ affinity for the virtqueues
+ * (each virtqueue's affinity is set to the CPU that "owns" the queue).
+ *
+ * An interesting effect of this policy is that only writes to req_vq need to
+ * take the tgt_lock. Read can be done outside the lock because:
+ *
+ * - writes of req_vq only occur when atomic_inc_return(&tgt->reqs) returns 1.
+ * In that case, no other CPU is reading req_vq: even if they were in
+ * virtscsi_queuecommand_multi, they would be spinning on tgt_lock.
+ *
+ * - reads of req_vq only occur when the target is not idle (reqs != 0).
+ * A CPU that enters virtscsi_queuecommand_multi will not modify req_vq.
+ *
+ * Similarly, decrements of reqs are never concurrent with writes of req_vq.
+ * Thus they can happen outside the tgt_lock, provided of course we make reqs
+ * an atomic_t.
+ */
struct virtio_scsi_target_state {
- /* Never held at the same time as vq_lock. */
+ /* This spinlock never help at the same time as vq_lock. */
spinlock_t tgt_lock;
+
+ /* Count of outstanding requests. */
+ atomic_t reqs;
+
+ /* Currently active virtqueue for requests sent to this target. */
+ struct virtio_scsi_vq *req_vq;
};
/* Driver instance state */
struct virtio_scsi {
struct virtio_device *vdev;
- struct virtio_scsi_vq ctrl_vq;
- struct virtio_scsi_vq event_vq;
- struct virtio_scsi_vq req_vq;
-
/* Get some buffers ready for event vq */
struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN];
struct virtio_scsi_target_state *tgt;
+
+ u32 num_queues;
+
+ /* Does the affinity hint is set for virtqueues? */
+ bool affinity_hint_set;
+
+ struct virtio_scsi_vq ctrl_vq;
+ struct virtio_scsi_vq event_vq;
+ struct virtio_scsi_vq req_vqs[];
};
static struct kmem_cache *virtscsi_cmd_cache;
@@ -111,6 +149,7 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
struct virtio_scsi_cmd *cmd = buf;
struct scsi_cmnd *sc = cmd->sc;
struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd;
+ struct virtio_scsi_target_state *tgt = &vscsi->tgt[sc->device->id];
dev_dbg(&sc->device->sdev_gendev,
"cmd %p response %u status %#02x sense_len %u\n",
@@ -165,6 +204,8 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
mempool_free(cmd, virtscsi_cmd_pool);
sc->scsi_done(sc);
+
+ atomic_dec(&tgt->reqs);
}
static void virtscsi_vq_done(struct virtio_scsi *vscsi,
@@ -189,8 +230,42 @@ static void virtscsi_req_done(struct virtqueue *vq)
{
struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
struct virtio_scsi *vscsi = shost_priv(sh);
+ int index = vq->index - VIRTIO_SCSI_VQ_BASE;
+ struct virtio_scsi_vq *req_vq = &vscsi->req_vqs[index];
+
+ /*
+ * Read req_vq before decrementing the reqs field in
+ * virtscsi_complete_cmd.
+ *
+ * With barriers:
+ *
+ * CPU #0 virtscsi_queuecommand_multi (CPU #1)
+ * ------------------------------------------------------------
+ * lock vq_lock
+ * read req_vq
+ * read reqs (reqs = 1)
+ * write reqs (reqs = 0)
+ * increment reqs (reqs = 1)
+ * write req_vq
+ *
+ * Possible reordering without barriers:
+ *
+ * CPU #0 virtscsi_queuecommand_multi (CPU #1)
+ * ------------------------------------------------------------
+ * lock vq_lock
+ * read reqs (reqs = 1)
+ * write reqs (reqs = 0)
+ * increment reqs (reqs = 1)
+ * write req_vq
+ * read (wrong) req_vq
+ *
+ * We do not need a full smp_rmb, because req_vq is required to get
+ * to tgt->reqs: tgt is &vscsi->tgt[sc->device->id], where sc is stored
+ * in the virtqueue as the user token.
+ */
+ smp_read_barrier_depends();
- virtscsi_vq_done(vscsi, &vscsi->req_vq, virtscsi_complete_cmd);
+ virtscsi_vq_done(vscsi, req_vq, virtscsi_complete_cmd);
};
static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf)
@@ -412,9 +487,10 @@ static int virtscsi_kick_cmd(struct virtio_scsi_vq *vq,
return err;
}
-static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
+static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
+ struct virtio_scsi_vq *req_vq,
+ struct scsi_cmnd *sc)
{
- struct virtio_scsi *vscsi = shost_priv(sh);
struct virtio_scsi_cmd *cmd;
int ret;
@@ -448,7 +524,7 @@ static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
- if (virtscsi_kick_cmd(&vscsi->req_vq, cmd,
+ if (virtscsi_kick_cmd(req_vq, cmd,
sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
GFP_ATOMIC) == 0)
ret = 0;
@@ -459,6 +535,53 @@ out:
return ret;
}
+static int virtscsi_queuecommand_single(struct Scsi_Host *sh,
+ struct scsi_cmnd *sc)
+{
+ struct virtio_scsi *vscsi = shost_priv(sh);
+ struct virtio_scsi_target_state *tgt = &vscsi->tgt[sc->device->id];
+
+ atomic_inc(&tgt->reqs);
+ return virtscsi_queuecommand(vscsi, &vscsi->req_vqs[0], sc);
+}
+
+static struct virtio_scsi_vq *virtscsi_pick_vq(struct virtio_scsi *vscsi,
+ struct virtio_scsi_target_state *tgt)
+{
+ struct virtio_scsi_vq *vq;
+ unsigned long flags;
+ u32 queue_num;
+
+ spin_lock_irqsave(&tgt->tgt_lock, flags);
+
+ /*
+ * The memory barrier after atomic_inc_return matches
+ * the smp_read_barrier_depends() in virtscsi_req_done.
+ */
+ if (atomic_inc_return(&tgt->reqs) > 1)
+ vq = ACCESS_ONCE(tgt->req_vq);
+ else {
+ queue_num = smp_processor_id();
+ while (unlikely(queue_num >= vscsi->num_queues))
+ queue_num -= vscsi->num_queues;
+
+ tgt->req_vq = vq = &vscsi->req_vqs[queue_num];
+ }
+
+ spin_unlock_irqrestore(&tgt->tgt_lock, flags);
+ return vq;
+}
+
+static int virtscsi_queuecommand_multi(struct Scsi_Host *sh,
+ struct scsi_cmnd *sc)
+{
+ struct virtio_scsi *vscsi = shost_priv(sh);
+ struct virtio_scsi_target_state *tgt = &vscsi->tgt[sc->device->id];
+ struct virtio_scsi_vq *req_vq = virtscsi_pick_vq(vscsi, tgt);
+
+ return virtscsi_queuecommand(vscsi, req_vq, sc);
+}
+
static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
{
DECLARE_COMPLETION_ONSTACK(comp);
@@ -527,12 +650,26 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
return virtscsi_tmf(vscsi, cmd);
}
-static struct scsi_host_template virtscsi_host_template = {
+static struct scsi_host_template virtscsi_host_template_single = {
.module = THIS_MODULE,
.name = "Virtio SCSI HBA",
.proc_name = "virtio_scsi",
- .queuecommand = virtscsi_queuecommand,
.this_id = -1,
+ .queuecommand = virtscsi_queuecommand_single,
+ .eh_abort_handler = virtscsi_abort,
+ .eh_device_reset_handler = virtscsi_device_reset,
+
+ .can_queue = 1024,
+ .dma_boundary = UINT_MAX,
+ .use_clustering = ENABLE_CLUSTERING,
+};
+
+static struct scsi_host_template virtscsi_host_template_multi = {
+ .module = THIS_MODULE,
+ .name = "Virtio SCSI HBA",
+ .proc_name = "virtio_scsi",
+ .this_id = -1,
+ .queuecommand = virtscsi_queuecommand_multi,
.eh_abort_handler = virtscsi_abort,
.eh_device_reset_handler = virtscsi_device_reset,
@@ -558,6 +695,47 @@ static struct scsi_host_template virtscsi_host_template = {
&__val, sizeof(__val)); \
})
+static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
+{
+ int i;
+ int cpu;
+
+ /* In multiqueue mode, when the number of cpu is equal
+ * to the number of request queues, we let the qeueues
+ * to be private to one cpu by setting the affinity hint
+ * to eliminate the contention.
+ */
+ if ((vscsi->num_queues == 1 ||
+ vscsi->num_queues != num_online_cpus()) && affinity) {
+ if (vscsi->affinity_hint_set)
+ affinity = false;
+ else
+ return;
+ }
+
+ if (affinity) {
+ i = 0;
+ for_each_online_cpu(cpu) {
+ virtqueue_set_affinity(vscsi->req_vqs[i].vq, cpu);
+ i++;
+ }
+
+ vscsi->affinity_hint_set = true;
+ } else {
+ for (i = 0; i < vscsi->num_queues - VIRTIO_SCSI_VQ_BASE; i++)
+ virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1);
+
+ vscsi->affinity_hint_set = false;
+ }
+}
+
+static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
+{
+ get_online_cpus();
+ __virtscsi_set_affinity(vscsi, affinity);
+ put_online_cpus();
+}
+
static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
struct virtqueue *vq)
{
@@ -565,9 +743,11 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
virtscsi_vq->vq = vq;
}
-static void virtscsi_init_tgt(struct virtio_scsi_target_state *tgt)
+static void virtscsi_init_tgt(struct virtio_scsi *vscsi, int i)
{
+ struct virtio_scsi_target_state *tgt = &vscsi->tgt[i];
spin_lock_init(&tgt->tgt_lock);
+ atomic_set(&tgt->reqs, 0);
}
static void virtscsi_scan(struct virtio_device *vdev)
@@ -582,6 +762,8 @@ static void virtscsi_remove_vqs(struct virtio_device *vdev)
struct Scsi_Host *sh = virtio_scsi_host(vdev);
struct virtio_scsi *vscsi = shost_priv(sh);
+ virtscsi_set_affinity(vscsi, false);
+
/* Stop all the virtqueues. */
vdev->config->reset(vdev);
kfree(vscsi->tgt);
@@ -592,28 +774,43 @@ static int virtscsi_init(struct virtio_device *vdev,
struct virtio_scsi *vscsi, int num_targets)
{
int err;
- struct virtqueue *vqs[3];
u32 i;
+ u32 num_vqs;
+ vq_callback_t **callbacks;
+ const char **names;
+ struct virtqueue **vqs;
- vq_callback_t *callbacks[] = {
- virtscsi_ctrl_done,
- virtscsi_event_done,
- virtscsi_req_done
- };
- const char *names[] = {
- "control",
- "event",
- "request"
- };
+ num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE;
+ vqs = kmalloc(num_vqs * sizeof(struct virtqueue *), GFP_KERNEL);
+ callbacks = kmalloc(num_vqs * sizeof(vq_callback_t *), GFP_KERNEL);
+ names = kmalloc(num_vqs * sizeof(char *), GFP_KERNEL);
+
+ if (!callbacks || !vqs || !names) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ callbacks[0] = virtscsi_ctrl_done;
+ callbacks[1] = virtscsi_event_done;
+ names[0] = "control";
+ names[1] = "event";
+ for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++) {
+ callbacks[i] = virtscsi_req_done;
+ names[i] = "request";
+ }
/* Discover virtqueues and write information to configuration. */
- err = vdev->config->find_vqs(vdev, 3, vqs, callbacks, names);
+ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
if (err)
return err;
virtscsi_init_vq(&vscsi->ctrl_vq, vqs[0]);
virtscsi_init_vq(&vscsi->event_vq, vqs[1]);
- virtscsi_init_vq(&vscsi->req_vq, vqs[2]);
+ for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++)
+ virtscsi_init_vq(&vscsi->req_vqs[i - VIRTIO_SCSI_VQ_BASE],
+ vqs[i]);
+
+ virtscsi_set_affinity(vscsi, true);
virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE);
virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE);
@@ -627,11 +824,14 @@ static int virtscsi_init(struct virtio_device *vdev,
goto out;
}
for (i = 0; i < num_targets; i++)
- virtscsi_init_tgt(&vscsi->tgt[i]);
+ virtscsi_init_tgt(vscsi, i);
err = 0;
out:
+ kfree(names);
+ kfree(callbacks);
+ kfree(vqs);
if (err)
virtscsi_remove_vqs(vdev);
return err;
@@ -644,10 +844,22 @@ static int virtscsi_probe(struct virtio_device *vdev)
int err;
u32 sg_elems, num_targets;
u32 cmd_per_lun;
+ u32 num_queues;
+ struct scsi_host_template *hostt;
+
+ /* We need to know how many queues before we allocate. */
+ num_queues = virtscsi_config_get(vdev, num_queues) ? : 1;
/* Allocate memory and link the structs together. */
num_targets = virtscsi_config_get(vdev, max_target) + 1;
- shost = scsi_host_alloc(&virtscsi_host_template, sizeof(*vscsi));
+
+ if (num_queues == 1)
+ hostt = &virtscsi_host_template_single;
+ else
+ hostt = &virtscsi_host_template_multi;
+
+ shost = scsi_host_alloc(hostt,
+ sizeof(*vscsi) + sizeof(vscsi->req_vqs[0]) * num_queues);
if (!shost)
return -ENOMEM;
@@ -655,6 +867,7 @@ static int virtscsi_probe(struct virtio_device *vdev)
shost->sg_tablesize = sg_elems;
vscsi = shost_priv(shost);
vscsi->vdev = vdev;
+ vscsi->num_queues = num_queues;
vdev->priv = shost;
err = virtscsi_init(vdev, vscsi, num_targets);
--
1.8.2.rc2
From: Paolo Bonzini <[email protected]>
This will be needed soon in order to retrieve the per-target
struct.
Cc: [email protected]
Signed-off-by: Paolo Bonzini <[email protected]>
Signed-off-by: Wanlong Gao <[email protected]>
Reviewed-by: Asias He <[email protected]>
---
drivers/scsi/virtio_scsi.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 3256c51..bcab9d7 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -106,7 +106,7 @@ static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid)
*
* Called with vq_lock held.
*/
-static void virtscsi_complete_cmd(void *buf)
+static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
{
struct virtio_scsi_cmd *cmd = buf;
struct scsi_cmnd *sc = cmd->sc;
@@ -167,7 +167,8 @@ static void virtscsi_complete_cmd(void *buf)
sc->scsi_done(sc);
}
-static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf))
+static void virtscsi_vq_done(struct virtio_scsi *vscsi, struct virtqueue *vq,
+ void (*fn)(struct virtio_scsi *vscsi, void *buf))
{
void *buf;
unsigned int len;
@@ -175,7 +176,7 @@ static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf))
do {
virtqueue_disable_cb(vq);
while ((buf = virtqueue_get_buf(vq, &len)) != NULL)
- fn(buf);
+ fn(vscsi, buf);
} while (!virtqueue_enable_cb(vq));
}
@@ -186,11 +187,11 @@ static void virtscsi_req_done(struct virtqueue *vq)
unsigned long flags;
spin_lock_irqsave(&vscsi->req_vq.vq_lock, flags);
- virtscsi_vq_done(vq, virtscsi_complete_cmd);
+ virtscsi_vq_done(vscsi, vq, virtscsi_complete_cmd);
spin_unlock_irqrestore(&vscsi->req_vq.vq_lock, flags);
};
-static void virtscsi_complete_free(void *buf)
+static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf)
{
struct virtio_scsi_cmd *cmd = buf;
@@ -207,7 +208,7 @@ static void virtscsi_ctrl_done(struct virtqueue *vq)
unsigned long flags;
spin_lock_irqsave(&vscsi->ctrl_vq.vq_lock, flags);
- virtscsi_vq_done(vq, virtscsi_complete_free);
+ virtscsi_vq_done(vscsi, vq, virtscsi_complete_free);
spin_unlock_irqrestore(&vscsi->ctrl_vq.vq_lock, flags);
};
@@ -331,7 +332,7 @@ static void virtscsi_handle_event(struct work_struct *work)
virtscsi_kick_event(vscsi, event_node);
}
-static void virtscsi_complete_event(void *buf)
+static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf)
{
struct virtio_scsi_event_node *event_node = buf;
@@ -346,7 +347,7 @@ static void virtscsi_event_done(struct virtqueue *vq)
unsigned long flags;
spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags);
- virtscsi_vq_done(vq, virtscsi_complete_event);
+ virtscsi_vq_done(vscsi, vq, virtscsi_complete_event);
spin_unlock_irqrestore(&vscsi->event_vq.vq_lock, flags);
};
--
1.8.2.rc2
Add hot cpu notifier to reset the request virtqueue affinity
when doing cpu hotplug.
Cc: [email protected]
Signed-off-by: Paolo Bonzini <[email protected]>
Signed-off-by: Wanlong Gao <[email protected]>
Reviewed-by: Asias He <[email protected]>
---
drivers/scsi/virtio_scsi.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 1d95295..83023f5 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -112,6 +112,9 @@ struct virtio_scsi {
/* Does the affinity hint is set for virtqueues? */
bool affinity_hint_set;
+ /* CPU hotplug notifier */
+ struct notifier_block nb;
+
struct virtio_scsi_vq ctrl_vq;
struct virtio_scsi_vq event_vq;
struct virtio_scsi_vq req_vqs[];
@@ -736,6 +739,23 @@ static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
put_online_cpus();
}
+static int virtscsi_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb);
+ switch(action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ __virtscsi_set_affinity(vscsi, true);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
struct virtqueue *vq)
{
@@ -874,6 +894,13 @@ static int virtscsi_probe(struct virtio_device *vdev)
if (err)
goto virtscsi_init_failed;
+ vscsi->nb.notifier_call = &virtscsi_cpu_callback;
+ err = register_hotcpu_notifier(&vscsi->nb);
+ if (err) {
+ pr_err("registering cpu notifier failed\n");
+ goto scsi_add_host_failed;
+ }
+
cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF;
@@ -911,6 +938,8 @@ static void virtscsi_remove(struct virtio_device *vdev)
scsi_remove_host(shost);
+ unregister_hotcpu_notifier(&vscsi->nb);
+
virtscsi_remove_vqs(vdev);
scsi_host_put(shost);
}
--
1.8.2.rc2
On Tue, 2013-03-19 at 17:57 +0800, Wanlong Gao wrote:
> From: Paolo Bonzini <[email protected]>
>
> virtio_scsi_target_state is now empty. We will find new uses for it in
> the next few patches, so this patch does not drop it completely.
> However, having dropped the sglist flexible array member, we can turn
> the tgt array-of-pointers into a simple array. This simplifies the
> allocation.
>
> Even simpler would be to place the virtio_scsi_target_state structs in a
> flexible array member at the end of struct virtio_scsi. But we do not
> do that, because we will place the virtqueues there in the next patches.
I'm really sorry, but I must have been asleep at the wheel when I let
code like this go in. No modern driver should have fixed arrays for
target information. The way this is supposed to work is that you have
entries in the host template for target_alloc and target_destroy. You
hook into these and attach your struct virtio_scsi_target_state to
scsi_target->hostdata, which you kmalloc in the target_alloc routine and
kfree in the target_destroy routine. Now you get at it from the sdev
with scsi_target(sdev)->hostdata. No messing around with fixed size
arrays and bulk memory allocation and no need to pass in the maximum
target size as a parameter because everything should now happen
dynamically.
Since you're redoing the code anyway, can you fix it to work this way?
Thanks,
James
Il 19/03/2013 12:32, James Bottomley ha scritto:
> On Tue, 2013-03-19 at 17:57 +0800, Wanlong Gao wrote:
>> From: Paolo Bonzini <[email protected]>
>>
>> virtio_scsi_target_state is now empty. We will find new uses for it in
>> the next few patches, so this patch does not drop it completely.
>> However, having dropped the sglist flexible array member, we can turn
>> the tgt array-of-pointers into a simple array. This simplifies the
>> allocation.
>>
>> Even simpler would be to place the virtio_scsi_target_state structs in a
>> flexible array member at the end of struct virtio_scsi. But we do not
>> do that, because we will place the virtqueues there in the next patches.
>
> I'm really sorry, but I must have been asleep at the wheel when I let
> code like this go in. No modern driver should have fixed arrays for
> target information. The way this is supposed to work is that you have
> entries in the host template for target_alloc and target_destroy. You
> hook into these and attach your struct virtio_scsi_target_state to
> scsi_target->hostdata,
So that would be sc->device->sdev_target->hostdata.
> which you kmalloc in the target_alloc routine and
> kfree in the target_destroy routine. Now you get at it from the sdev
> with scsi_target(sdev)->hostdata. No messing around with fixed size
> arrays and bulk memory allocation and no need to pass in the maximum
> target size as a parameter because everything should now happen
> dynamically.
The maximum target size is not a module parameter, it is given by the
host; so the module itself is not placing arbitrary limitation. Still
it is a good idea to do it like this.
Thanks for the review.
Paolo
> Since you're redoing the code anyway, can you fix it to work this way?
>
> Thanks,
>
> James
>
>
On Tue, 2013-03-19 at 12:45 +0100, Paolo Bonzini wrote:
> Il 19/03/2013 12:32, James Bottomley ha scritto:
> > On Tue, 2013-03-19 at 17:57 +0800, Wanlong Gao wrote:
> >> From: Paolo Bonzini <[email protected]>
> >>
> >> virtio_scsi_target_state is now empty. We will find new uses for it in
> >> the next few patches, so this patch does not drop it completely.
> >> However, having dropped the sglist flexible array member, we can turn
> >> the tgt array-of-pointers into a simple array. This simplifies the
> >> allocation.
> >>
> >> Even simpler would be to place the virtio_scsi_target_state structs in a
> >> flexible array member at the end of struct virtio_scsi. But we do not
> >> do that, because we will place the virtqueues there in the next patches.
> >
> > I'm really sorry, but I must have been asleep at the wheel when I let
> > code like this go in. No modern driver should have fixed arrays for
> > target information. The way this is supposed to work is that you have
> > entries in the host template for target_alloc and target_destroy. You
> > hook into these and attach your struct virtio_scsi_target_state to
> > scsi_target->hostdata,
>
> So that would be sc->device->sdev_target->hostdata.
No, unfortunate name, but it's used for something else (actually, I
think it *was* used by something else and is unused now). The construct
is
scsi_target(sc->device)->hostdata
James
This looks pretty good!
I rather like the (lack of) locking in I/O completion (around the req
count vs. target/queue binding). It is unfortunate that you need to
hold the per-target lock in virtscsi_pick_vq() though; have any idea
how much that lock hurts?
Just two minor comments:
(in struct virtio_scsi_target_data):
+ /* This spinlock never help at the same time as vq_lock. */
^^^^ held?
(in struct virtio_scsi):
+ /* Does the affinity hint is set for virtqueues? */
Could you rephrase that, please?
Tested on qemu and w/ Google Compute Engine's virtio-scsi device.
Reviewed-and-tested-by: Venkatesh Srinivas <[email protected]>
Thanks,
-- vs;
On 03/20/2013 09:46 AM, Venkatesh Srinivas wrote:
> This looks pretty good!
>
> I rather like the (lack of) locking in I/O completion (around the req
> count vs. target/queue binding). It is unfortunate that you need to hold the per-target lock in virtscsi_pick_vq() though; have any idea
> how much that lock hurts?
Paolo?
>
> Just two minor comments:
>
> (in struct virtio_scsi_target_data):
> + /* This spinlock never help at the same time as vq_lock. */
> ^^^^ held?
>
> (in struct virtio_scsi):
> + /* Does the affinity hint is set for virtqueues? */
> Could you rephrase that, please?
Thank you, fixed in V6, please review.
>
> Tested on qemu and w/ Google Compute Engine's virtio-scsi device.
Cool.
>
> Reviewed-and-tested-by: Venkatesh Srinivas <[email protected]>
Do you mind review and test the V6? Thank you.
Regards,
Wanlong Gao
>
> Thanks,
> -- vs;
>
Il 20/03/2013 02:46, Venkatesh Srinivas ha scritto:
> This looks pretty good!
>
> I rather like the (lack of) locking in I/O completion (around the req
> count vs. target/queue binding). It is unfortunate that you need to hold
> the per-target lock in virtscsi_pick_vq() though; have any idea
> how much that lock hurts?
It doesn't hurt, the lock is mostly uncontended.
- if you have lots of I/O, it's held for a very small period of time; if
you have little I/O, it's uncontended anyway.
- the SCSI layer will serialize on the host lock anyway before calling
into the LLD. Locks are "pipelined" so that in the end the host lock
will be a bigger bottleneck than the others.
Most of the time it only costs 2 extra atomic operations, which should
be galf a microsecond or less.
Paolo
> Just two minor comments:
>
> (in struct virtio_scsi_target_data):
> + /* This spinlock never help at the same time as vq_lock. */
> ^^^^ held?
>
> (in struct virtio_scsi):
> + /* Does the affinity hint is set for virtqueues? */
> Could you rephrase that, please?
>
> Tested on qemu and w/ Google Compute Engine's virtio-scsi device.
>
> Reviewed-and-tested-by: Venkatesh Srinivas <[email protected]>
>
> Thanks,
> -- vs;