2019-11-15 20:58:52

by Vivek Goyal

[permalink] [raw]
Subject: [PATCH 0/4] [RFC] virtiofs: Add a notification queue

Hi,

These are RFC patches for adding a notification queue to allow sending
notifications from host to guest.

It also adds support for blocking remote posix locks using the newly
introduced notification queue.

These patches apply on top of 5.4-rc8 plus following patch series I had
posted a while back.

https://lkml.org/lkml/2019/10/30/493

These changes require virtio spec changes as well. I have yet to do
that.

Thanks
Vivek

Vivek Goyal (4):
virtiofs: Provide a helper function for virtqueue initialization
virtiofs: Add an index to keep track of first request queue
virtiofs: Add a virtqueue for notifications
virtiofs: Support blocking posix locks (fcntl(F_SETLKW))

fs/fuse/virtio_fs.c | 328 ++++++++++++++++++++++++++++++---
include/uapi/linux/fuse.h | 7 +
include/uapi/linux/virtio_fs.h | 5 +
3 files changed, 310 insertions(+), 30 deletions(-)

--
2.20.1


2019-11-15 20:59:00

by Vivek Goyal

[permalink] [raw]
Subject: [PATCH 4/4] virtiofs: Support blocking posix locks (fcntl(F_SETLKW))

As of now we don't support blocking variant of posix locks and daemon returns
-EOPNOTSUPP. Reason being that it can lead to deadlocks. Virtqueue size is
limited and it is possible we fill virtqueue with all the requests of
fcntl(F_SETLKW) and wait for reply. And later a subsequent unlock request
can't make progress because virtqueue is full. And that means F_SETLKW can't
make progress and we are deadlocked.

Use notification queue to solve this problem. After submitting lock request
device will send a reply asking requester to wait. Once lock is available,
requester will get a notification saying locking is available. That way
we don't keep the request virtueue busy while we are waiting for lock
and further unlock requests can make progress.

When we get a reply in response to lock request, we need a way to know if
we need to wait for notification or not. I have overloaded the
fuse_out_header->error field. If value is ->error is 1, that's a signal
to caller to wait for lock notification.

Signed-off-by: Vivek Goyal <[email protected]>
---
fs/fuse/virtio_fs.c | 78 ++++++++++++++++++++++++++++++++++++++-
include/uapi/linux/fuse.h | 7 ++++
2 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 21d8d9d7d317..8aa9fc996556 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -35,6 +35,7 @@ struct virtio_fs_vq {
struct work_struct done_work;
struct list_head queued_reqs;
struct list_head end_reqs; /* End these requests */
+ struct list_head wait_reqs; /* requests waiting for notification */
struct virtio_fs_notify_node *notify_nodes;
struct list_head notify_reqs; /* List for queuing notify requests */
struct delayed_work dispatch_work;
@@ -85,7 +86,6 @@ struct virtio_fs_notify_node {

static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq);

-
static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
{
struct virtio_fs *fs = vq->vdev->priv;
@@ -513,13 +513,75 @@ static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq)
return 0;
}

+static int notify_complete_waiting_req(struct virtio_fs *vfs,
+ struct fuse_notify_lock_out *out_args)
+{
+ struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_REQUEST];
+ struct fuse_req *req, *next;
+ bool found = false;
+ struct fuse_conn *fc = fsvq->fud->fc;
+
+ /* Find waiting request with the unique number and end it */
+ spin_lock(&fsvq->lock);
+ list_for_each_entry_safe(req, next, &fsvq->wait_reqs, list) {
+ if (req->in.h.unique == out_args->id) {
+ list_del_init(&req->list);
+ clear_bit(FR_SENT, &req->flags);
+ /* Transfer error code from notify */
+ req->out.h.error = out_args->error;
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&fsvq->lock);
+
+ /*
+ * TODO: It is possible that some re-ordering happens in notify
+ * comes before request is complete. Deal with it.
+ */
+ if (found) {
+ fuse_request_end(fc, req);
+ spin_lock(&fsvq->lock);
+ dec_in_flight_req(fsvq);
+ spin_unlock(&fsvq->lock);
+ } else
+ pr_debug("virtio-fs: Did not find waiting request with"
+ " unique=0x%llx\n", out_args->id);
+
+ return 0;
+}
+
+static int virtio_fs_handle_notify(struct virtio_fs *vfs,
+ struct virtio_fs_notify *notify)
+{
+ int ret = 0;
+ struct fuse_out_header *oh = &notify->out_hdr;
+ struct fuse_notify_lock_out *lo;
+
+ /*
+ * For notifications, oh.unique is 0 and oh->error contains code
+ * for which notification as arrived.
+ */
+ switch(oh->error) {
+ case FUSE_NOTIFY_LOCK:
+ lo = (struct fuse_notify_lock_out *) &notify->outarg;
+ notify_complete_waiting_req(vfs, lo);
+ break;
+ default:
+ printk("virtio-fs: Unexpected notification %d\n", oh->error);
+ }
+ return ret;
+}
+
static void virtio_fs_notify_done_work(struct work_struct *work)
{
struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
done_work);
struct virtqueue *vq = fsvq->vq;
+ struct virtio_fs *vfs = vq->vdev->priv;
LIST_HEAD(reqs);
struct virtio_fs_notify_node *notify, *next;
+ struct fuse_out_header *oh;

spin_lock(&fsvq->lock);
do {
@@ -535,6 +597,10 @@ static void virtio_fs_notify_done_work(struct work_struct *work)

/* Process notify */
list_for_each_entry_safe(notify, next, &reqs, list) {
+ oh = &notify->notify.out_hdr;
+ WARN_ON(oh->unique);
+ /* Handle notification */
+ virtio_fs_handle_notify(vfs, &notify->notify);
spin_lock(&fsvq->lock);
dec_in_flight_req(fsvq);
list_del_init(&notify->list);
@@ -656,6 +722,15 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
* TODO verify that server properly follows FUSE protocol
* (oh.uniq, oh.len)
*/
+ if (req->out.h.error == 1) {
+ /* Wait for notification to complete request */
+ list_del_init(&req->list);
+ spin_lock(&fsvq->lock);
+ list_add_tail(&req->list, &fsvq->wait_reqs);
+ spin_unlock(&fsvq->lock);
+ continue;
+ }
+
args = req->args;
copy_args_from_argbuf(args, req);

@@ -705,6 +780,7 @@ static int virtio_fs_init_vq(struct virtio_fs *fs, struct virtio_fs_vq *fsvq,
strncpy(fsvq->name, name, VQ_NAME_LEN);
spin_lock_init(&fsvq->lock);
INIT_LIST_HEAD(&fsvq->queued_reqs);
+ INIT_LIST_HEAD(&fsvq->wait_reqs);
INIT_LIST_HEAD(&fsvq->end_reqs);
INIT_LIST_HEAD(&fsvq->notify_reqs);
init_completion(&fsvq->in_flight_zero);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 373cada89815..45f0c4efec8e 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -481,6 +481,7 @@ enum fuse_notify_code {
FUSE_NOTIFY_STORE = 4,
FUSE_NOTIFY_RETRIEVE = 5,
FUSE_NOTIFY_DELETE = 6,
+ FUSE_NOTIFY_LOCK = 7,
FUSE_NOTIFY_CODE_MAX,
};

@@ -868,6 +869,12 @@ struct fuse_notify_retrieve_in {
uint64_t dummy4;
};

+struct fuse_notify_lock_out {
+ uint64_t id;
+ int32_t error;
+ int32_t padding;
+};
+
/* Device ioctls: */
#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t)

--
2.20.1

2019-11-15 21:00:41

by Vivek Goyal

[permalink] [raw]
Subject: [PATCH 2/4] virtiofs: Add an index to keep track of first request queue

We have many virtqueues and first queue which carries fuse normal requests
(except forget requests) has index pointed to by enum VQ_REQUEST. This works
fine as long as number of queues are not dynamic.

I am about to introduce one more virtqueue, called notification queue, which
will be present only if device on host supports it. That means index of
request queue will change depending on if notification queue is present
or not.

So, add a variable to keep track of that index and this will help when
notification queue is added in next patch.

Signed-off-by: Vivek Goyal <[email protected]>
---
fs/fuse/virtio_fs.c | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index a0fb0a93980c..1ab4b7b83707 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -49,6 +49,7 @@ struct virtio_fs {
struct virtio_fs_vq *vqs;
unsigned int nvqs; /* number of virtqueues */
unsigned int num_request_queues; /* number of request queues */
+ unsigned int first_reqq_idx; /* First request queue idx */
};

struct virtio_fs_forget_req {
@@ -597,7 +598,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
if (fs->num_request_queues == 0)
return -EINVAL;

- fs->nvqs = VQ_REQUEST + fs->num_request_queues;
+ /* One hiprio queue and rest are request queues */
+ fs->nvqs = 1 + fs->num_request_queues;
+ fs->first_reqq_idx = 1;
fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
if (!fs->vqs)
return -ENOMEM;
@@ -617,10 +620,11 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;

/* Initialize the requests virtqueues */
- for (i = VQ_REQUEST; i < fs->nvqs; i++) {
+ for (i = fs->first_reqq_idx; i < fs->nvqs; i++) {
char vq_name[VQ_NAME_LEN];

- snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST);
+ snprintf(vq_name, VQ_NAME_LEN, "requests.%u",
+ i - fs->first_reqq_idx);
virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST);
callbacks[i] = virtio_fs_vq_done;
names[i] = fs->vqs[i].name;
@@ -990,7 +994,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
__releases(fiq->lock)
{
- unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
+ unsigned int queue_id;
struct virtio_fs *fs;
struct fuse_req *req;
struct virtio_fs_vq *fsvq;
@@ -1004,6 +1008,7 @@ __releases(fiq->lock)
spin_unlock(&fiq->lock);

fs = fiq->priv;
+ queue_id = fs->first_reqq_idx;

pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
__func__, req->in.h.opcode, req->in.h.unique,
@@ -1077,7 +1082,7 @@ static int virtio_fs_fill_super(struct super_block *sb)

err = -ENOMEM;
/* Allocate fuse_dev for hiprio and notification queues */
- for (i = 0; i < VQ_REQUEST; i++) {
+ for (i = 0; i < fs->first_reqq_idx; i++) {
struct virtio_fs_vq *fsvq = &fs->vqs[i];

fsvq->fud = fuse_dev_alloc();
@@ -1085,17 +1090,17 @@ static int virtio_fs_fill_super(struct super_block *sb)
goto err_free_fuse_devs;
}

- ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
+ ctx.fudptr = (void **)&fs->vqs[fs->first_reqq_idx].fud;
err = fuse_fill_super_common(sb, &ctx);
if (err < 0)
goto err_free_fuse_devs;

- fc = fs->vqs[VQ_REQUEST].fud->fc;
+ fc = fs->vqs[fs->first_reqq_idx].fud->fc;

for (i = 0; i < fs->nvqs; i++) {
struct virtio_fs_vq *fsvq = &fs->vqs[i];

- if (i == VQ_REQUEST)
+ if (i == fs->first_reqq_idx)
continue; /* already initialized */
fuse_dev_install(fsvq->fud, fc);
}
--
2.20.1

2019-11-15 21:00:49

by Vivek Goyal

[permalink] [raw]
Subject: [PATCH 1/4] virtiofs: Provide a helper function for virtqueue initialization

This reduces code duplication and make it little easier to read code.

Signed-off-by: Vivek Goyal <[email protected]>
---
fs/fuse/virtio_fs.c | 50 +++++++++++++++++++++++++++------------------
1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index b5ba83ef1914..a0fb0a93980c 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -24,6 +24,8 @@ enum {
VQ_REQUEST
};

+#define VQ_NAME_LEN 24
+
/* Per-virtqueue state */
struct virtio_fs_vq {
spinlock_t lock;
@@ -36,7 +38,7 @@ struct virtio_fs_vq {
bool connected;
long in_flight;
struct completion in_flight_zero; /* No inflight requests */
- char name[24];
+ char name[VQ_NAME_LEN];
} ____cacheline_aligned_in_smp;

/* A virtio-fs device instance */
@@ -560,6 +562,26 @@ static void virtio_fs_vq_done(struct virtqueue *vq)
schedule_work(&fsvq->done_work);
}

+static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name,
+ int vq_type)
+{
+ strncpy(fsvq->name, name, VQ_NAME_LEN);
+ spin_lock_init(&fsvq->lock);
+ INIT_LIST_HEAD(&fsvq->queued_reqs);
+ INIT_LIST_HEAD(&fsvq->end_reqs);
+ init_completion(&fsvq->in_flight_zero);
+
+ if (vq_type == VQ_REQUEST) {
+ INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work);
+ INIT_DELAYED_WORK(&fsvq->dispatch_work,
+ virtio_fs_request_dispatch_work);
+ } else {
+ INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work);
+ INIT_DELAYED_WORK(&fsvq->dispatch_work,
+ virtio_fs_hiprio_dispatch_work);
+ }
+}
+
/* Initialize virtqueues */
static int virtio_fs_setup_vqs(struct virtio_device *vdev,
struct virtio_fs *fs)
@@ -575,7 +597,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
if (fs->num_request_queues == 0)
return -EINVAL;

- fs->nvqs = 1 + fs->num_request_queues;
+ fs->nvqs = VQ_REQUEST + fs->num_request_queues;
fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
if (!fs->vqs)
return -ENOMEM;
@@ -589,29 +611,17 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
goto out;
}

+ /* Initialize the hiprio/forget request virtqueue */
callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
- snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
- "hiprio");
+ virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO);
names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
- INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
- INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
- INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
- INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
- virtio_fs_hiprio_dispatch_work);
- init_completion(&fs->vqs[VQ_HIPRIO].in_flight_zero);
- spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);

/* Initialize the requests virtqueues */
for (i = VQ_REQUEST; i < fs->nvqs; i++) {
- spin_lock_init(&fs->vqs[i].lock);
- INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
- INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
- virtio_fs_request_dispatch_work);
- INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
- INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
- init_completion(&fs->vqs[i].in_flight_zero);
- snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
- "requests.%u", i - VQ_REQUEST);
+ char vq_name[VQ_NAME_LEN];
+
+ snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST);
+ virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST);
callbacks[i] = virtio_fs_vq_done;
names[i] = fs->vqs[i].name;
}
--
2.20.1

2019-11-15 21:01:21

by Vivek Goyal

[permalink] [raw]
Subject: [PATCH 3/4] virtiofs: Add a virtqueue for notifications

Add a new virtqueue for notifications. This will allow device to send
notifications to guest. This queue is created only if device supports
it. This is negotiated using feature bit VIRTIO_FS_F_NOTIFICATION.

Given the architecture of virtqueue, one needs to queue up pre-allocated
elements in notication queue and device can pop these elements and fill
the notification info and send it back. Size of notication buffer is
negotiable and is specified by device through config space. This will
allow us to add and support more notification types without having to
change the spec.

Signed-off-by: Vivek Goyal <[email protected]>
---
fs/fuse/virtio_fs.c | 199 +++++++++++++++++++++++++++++++--
include/uapi/linux/virtio_fs.h | 5 +
2 files changed, 193 insertions(+), 11 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 1ab4b7b83707..21d8d9d7d317 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -21,10 +21,12 @@ static LIST_HEAD(virtio_fs_instances);

enum {
VQ_HIPRIO,
+ VQ_NOTIFY,
VQ_REQUEST
};

#define VQ_NAME_LEN 24
+#define VQ_NOTIFY_ELEMS 16 /* Number of notification elements */

/* Per-virtqueue state */
struct virtio_fs_vq {
@@ -33,6 +35,8 @@ struct virtio_fs_vq {
struct work_struct done_work;
struct list_head queued_reqs;
struct list_head end_reqs; /* End these requests */
+ struct virtio_fs_notify_node *notify_nodes;
+ struct list_head notify_reqs; /* List for queuing notify requests */
struct delayed_work dispatch_work;
struct fuse_dev *fud;
bool connected;
@@ -50,6 +54,8 @@ struct virtio_fs {
unsigned int nvqs; /* number of virtqueues */
unsigned int num_request_queues; /* number of request queues */
unsigned int first_reqq_idx; /* First request queue idx */
+ bool notify_enabled;
+ unsigned int notify_buf_size; /* Size of notification buffer */
};

struct virtio_fs_forget_req {
@@ -66,6 +72,20 @@ struct virtio_fs_forget {
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
struct fuse_req *req, bool in_flight);

+struct virtio_fs_notify {
+ struct fuse_out_header out_hdr;
+ /* Size of notify data specified by fs->notify_buf_size */
+ char outarg[];
+};
+
+struct virtio_fs_notify_node {
+ struct list_head list;
+ struct virtio_fs_notify notify;
+};
+
+static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq);
+
+
static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
{
struct virtio_fs *fs = vq->vdev->priv;
@@ -78,6 +98,11 @@ static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
return &vq_to_fsvq(vq)->fud->pq;
}

+static inline struct virtio_fs *fsvq_to_fs(struct virtio_fs_vq *fsvq)
+{
+ return (struct virtio_fs *)fsvq->vq->vdev->priv;
+}
+
/* Should be called with fsvq->lock held. */
static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
{
@@ -93,10 +118,17 @@ static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
complete(&fsvq->in_flight_zero);
}

+static void virtio_fs_free_notify_nodes(struct virtio_fs *fs)
+{
+ if (fs->notify_enabled && fs->vqs)
+ kfree(fs->vqs[VQ_NOTIFY].notify_nodes);
+}
+
static void release_virtio_fs_obj(struct kref *ref)
{
struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);

+ virtio_fs_free_notify_nodes(vfs);
kfree(vfs->vqs);
kfree(vfs);
}
@@ -143,6 +175,13 @@ static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs)
int i;

for (i = 0; i < fs->nvqs; i++) {
+ /*
+ * Can't wait to drain notification queue as it always
+ * has pending requests so that server can use those
+ * to send notifications.
+ */
+ if (fs->notify_enabled && (i == VQ_NOTIFY))
+ continue;
fsvq = &fs->vqs[i];
virtio_fs_drain_queue(fsvq);
}
@@ -171,6 +210,8 @@ static void virtio_fs_start_all_queues(struct virtio_fs *fs)
spin_lock(&fsvq->lock);
fsvq->connected = true;
spin_unlock(&fsvq->lock);
+ if (fs->notify_enabled && (i == VQ_NOTIFY))
+ virtio_fs_enqueue_all_notify(fsvq);
}
}

@@ -420,6 +461,99 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
}
}

+/* Allocate memory for event requests in notify queue */
+static int virtio_fs_init_notify_vq(struct virtio_fs *fs,
+ struct virtio_fs_vq *fsvq)
+{
+ struct virtio_fs_notify_node *notify;
+ unsigned notify_node_sz = sizeof(struct virtio_fs_notify_node) +
+ fs->notify_buf_size;
+ int i;
+
+ fsvq->notify_nodes = kcalloc(VQ_NOTIFY_ELEMS, notify_node_sz,
+ GFP_KERNEL);
+ if (!fsvq->notify_nodes)
+ return -ENOMEM;
+
+ for (i = 0; i < VQ_NOTIFY_ELEMS; i++) {
+ notify = (void *)fsvq->notify_nodes + (i * notify_node_sz);
+ list_add_tail(&notify->list, &fsvq->notify_reqs);
+ }
+
+ return 0;
+}
+
+static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq)
+{
+ struct scatterlist sg[1];
+ int ret;
+ bool kick;
+ struct virtio_fs *fs = fsvq_to_fs(fsvq);
+ struct virtio_fs_notify_node *notify, *next;
+ unsigned notify_sz;
+
+ notify_sz = sizeof(struct fuse_out_header) + fs->notify_buf_size;
+ spin_lock(&fsvq->lock);
+ list_for_each_entry_safe(notify, next, &fsvq->notify_reqs, list) {
+ list_del_init(&notify->list);
+ sg_init_one(sg, &notify->notify, notify_sz);
+ ret = virtqueue_add_inbuf(fsvq->vq, sg, 1, notify, GFP_ATOMIC);
+ if (ret) {
+ list_add_tail(&notify->list, &fsvq->notify_reqs);
+ spin_unlock(&fsvq->lock);
+ return ret;
+ }
+ inc_in_flight_req(fsvq);
+ }
+
+ kick = virtqueue_kick_prepare(fsvq->vq);
+ spin_unlock(&fsvq->lock);
+ if (kick)
+ virtqueue_notify(fsvq->vq);
+ return 0;
+}
+
+static void virtio_fs_notify_done_work(struct work_struct *work)
+{
+ struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+ done_work);
+ struct virtqueue *vq = fsvq->vq;
+ LIST_HEAD(reqs);
+ struct virtio_fs_notify_node *notify, *next;
+
+ spin_lock(&fsvq->lock);
+ do {
+ unsigned int len;
+
+ virtqueue_disable_cb(vq);
+
+ while ((notify = virtqueue_get_buf(vq, &len)) != NULL) {
+ list_add_tail(&notify->list, &reqs);
+ }
+ } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
+ spin_unlock(&fsvq->lock);
+
+ /* Process notify */
+ list_for_each_entry_safe(notify, next, &reqs, list) {
+ spin_lock(&fsvq->lock);
+ dec_in_flight_req(fsvq);
+ list_del_init(&notify->list);
+ list_add_tail(&notify->list, &fsvq->notify_reqs);
+ spin_unlock(&fsvq->lock);
+ }
+
+ /*
+ * If queue is connected, queue notifications again. If not,
+ * these will be queued again when virtuqueue is restarted.
+ */
+ if (fsvq->connected)
+ virtio_fs_enqueue_all_notify(fsvq);
+}
+
+static void virtio_fs_notify_dispatch_work(struct work_struct *work)
+{
+}
+
/* Allocate and copy args into req->argbuf */
static int copy_args_to_argbuf(struct fuse_req *req)
{
@@ -563,24 +697,34 @@ static void virtio_fs_vq_done(struct virtqueue *vq)
schedule_work(&fsvq->done_work);
}

-static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name,
- int vq_type)
+static int virtio_fs_init_vq(struct virtio_fs *fs, struct virtio_fs_vq *fsvq,
+ char *name, int vq_type)
{
+ int ret = 0;
+
strncpy(fsvq->name, name, VQ_NAME_LEN);
spin_lock_init(&fsvq->lock);
INIT_LIST_HEAD(&fsvq->queued_reqs);
INIT_LIST_HEAD(&fsvq->end_reqs);
+ INIT_LIST_HEAD(&fsvq->notify_reqs);
init_completion(&fsvq->in_flight_zero);

if (vq_type == VQ_REQUEST) {
INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work);
INIT_DELAYED_WORK(&fsvq->dispatch_work,
virtio_fs_request_dispatch_work);
+ } else if (vq_type == VQ_NOTIFY) {
+ INIT_WORK(&fsvq->done_work, virtio_fs_notify_done_work);
+ INIT_DELAYED_WORK(&fsvq->dispatch_work,
+ virtio_fs_notify_dispatch_work);
+ ret = virtio_fs_init_notify_vq(fs, fsvq);
} else {
INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work);
INIT_DELAYED_WORK(&fsvq->dispatch_work,
virtio_fs_hiprio_dispatch_work);
}
+
+ return ret;
}

/* Initialize virtqueues */
@@ -598,9 +742,27 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
if (fs->num_request_queues == 0)
return -EINVAL;

- /* One hiprio queue and rest are request queues */
- fs->nvqs = 1 + fs->num_request_queues;
- fs->first_reqq_idx = 1;
+ if (virtio_has_feature(vdev, VIRTIO_FS_F_NOTIFICATION)) {
+ pr_debug("virtio_fs: device supports notification.\n");
+ fs->notify_enabled = true;
+ virtio_cread(vdev, struct virtio_fs_config, notify_buf_size,
+ &fs->notify_buf_size);
+ if (fs->notify_buf_size == 0) {
+ printk("virtio-fs: Invalid value %d of notification"
+ " buffer size\n", fs->notify_buf_size);
+ return -EINVAL;
+ }
+ }
+
+ if (fs->notify_enabled) {
+ /* One additional queue for hiprio and one for notifications */
+ fs->nvqs = 2 + fs->num_request_queues;
+ fs->first_reqq_idx = 2;
+ } else {
+ fs->nvqs = 1 + fs->num_request_queues;
+ fs->first_reqq_idx = 1;
+ }
+
fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
if (!fs->vqs)
return -ENOMEM;
@@ -616,16 +778,30 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,

/* Initialize the hiprio/forget request virtqueue */
callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
- virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO);
+ ret = virtio_fs_init_vq(fs, &fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO);
+ if (ret < 0)
+ goto out;
names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;

+ /* Initialize notification queue */
+ if (fs->notify_enabled) {
+ callbacks[VQ_NOTIFY] = virtio_fs_vq_done;
+ ret = virtio_fs_init_vq(fs, &fs->vqs[VQ_NOTIFY], "notification",
+ VQ_NOTIFY);
+ if (ret < 0)
+ goto out;
+ names[VQ_NOTIFY] = fs->vqs[VQ_NOTIFY].name;
+ }
+
/* Initialize the requests virtqueues */
for (i = fs->first_reqq_idx; i < fs->nvqs; i++) {
char vq_name[VQ_NAME_LEN];

snprintf(vq_name, VQ_NAME_LEN, "requests.%u",
i - fs->first_reqq_idx);
- virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST);
+ ret = virtio_fs_init_vq(fs, &fs->vqs[i], vq_name, VQ_REQUEST);
+ if (ret < 0)
+ goto out;
callbacks[i] = virtio_fs_vq_done;
names[i] = fs->vqs[i].name;
}
@@ -636,14 +812,14 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,

for (i = 0; i < fs->nvqs; i++)
fs->vqs[i].vq = vqs[i];
-
- virtio_fs_start_all_queues(fs);
out:
kfree(names);
kfree(callbacks);
kfree(vqs);
- if (ret)
+ if (ret) {
+ virtio_fs_free_notify_nodes(fs);
kfree(fs->vqs);
+ }
return ret;
}

@@ -679,6 +855,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
* requests need to be sent before we return.
*/
virtio_device_ready(vdev);
+ virtio_fs_start_all_queues(fs);

ret = virtio_fs_add_instance(fs);
if (ret < 0)
@@ -747,7 +924,7 @@ const static struct virtio_device_id id_table[] = {
{},
};

-const static unsigned int feature_table[] = {};
+const static unsigned int feature_table[] = {VIRTIO_FS_F_NOTIFICATION};

static struct virtio_driver virtio_fs_driver = {
.driver.name = KBUILD_MODNAME,
diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h
index b02eb2ac3d99..f3f2ba3399a4 100644
--- a/include/uapi/linux/virtio_fs.h
+++ b/include/uapi/linux/virtio_fs.h
@@ -8,12 +8,17 @@
#include <linux/virtio_config.h>
#include <linux/virtio_types.h>

+/* Feature bits */
+#define VIRTIO_FS_F_NOTIFICATION 0 /* Notification queue supported */
+
struct virtio_fs_config {
/* Filesystem name (UTF-8, not NUL-terminated, padded with NULs) */
__u8 tag[36];

/* Number of request queues */
__u32 num_request_queues;
+ /* Size of notification buffer */
+ __u32 notify_buf_size;
} __attribute__((packed));

#endif /* _UAPI_LINUX_VIRTIO_FS_H */
--
2.20.1

2019-11-21 16:01:45

by Stefan Hajnoczi

[permalink] [raw]
Subject: Re: [PATCH 2/4] virtiofs: Add an index to keep track of first request queue

On Fri, Nov 15, 2019 at 03:57:03PM -0500, Vivek Goyal wrote:
> @@ -1004,6 +1008,7 @@ __releases(fiq->lock)
> spin_unlock(&fiq->lock);
>
> fs = fiq->priv;
> + queue_id = fs->first_reqq_idx;

The TODO should be moved here.


Attachments:
(No filename) (235.00 B)
signature.asc (499.00 B)
Download all attachments

2019-11-21 16:02:16

by Stefan Hajnoczi

[permalink] [raw]
Subject: Re: [PATCH 2/4] virtiofs: Add an index to keep track of first request queue

On Fri, Nov 15, 2019 at 03:57:03PM -0500, Vivek Goyal wrote:
> @@ -990,7 +994,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
> static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
> __releases(fiq->lock)
> {
> - unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
> + unsigned int queue_id;
> struct virtio_fs *fs;
> struct fuse_req *req;
> struct virtio_fs_vq *fsvq;

Sorry, I removed too much context in my reply. This TODO...

> @@ -1004,6 +1008,7 @@ __releases(fiq->lock)
> spin_unlock(&fiq->lock);
>
> fs = fiq->priv;
> + queue_id = fs->first_reqq_idx;

...should be moved here.


Attachments:
(No filename) (662.00 B)
signature.asc (499.00 B)
Download all attachments

2019-11-21 17:05:31

by Stefan Hajnoczi

[permalink] [raw]
Subject: Re: [PATCH 4/4] virtiofs: Support blocking posix locks (fcntl(F_SETLKW))

On Fri, Nov 15, 2019 at 03:57:05PM -0500, Vivek Goyal wrote:
> As of now we don't support blocking variant of posix locks and daemon returns
> -EOPNOTSUPP. Reason being that it can lead to deadlocks. Virtqueue size is
> limited and it is possible we fill virtqueue with all the requests of
> fcntl(F_SETLKW) and wait for reply. And later a subsequent unlock request
> can't make progress because virtqueue is full. And that means F_SETLKW can't
> make progress and we are deadlocked.
>
> Use notification queue to solve this problem. After submitting lock request
> device will send a reply asking requester to wait. Once lock is available,
> requester will get a notification saying locking is available. That way
> we don't keep the request virtueue busy while we are waiting for lock
> and further unlock requests can make progress.
>
> When we get a reply in response to lock request, we need a way to know if
> we need to wait for notification or not. I have overloaded the
> fuse_out_header->error field. If value is ->error is 1, that's a signal
> to caller to wait for lock notification.
>
> Signed-off-by: Vivek Goyal <[email protected]>
> ---
> fs/fuse/virtio_fs.c | 78 ++++++++++++++++++++++++++++++++++++++-
> include/uapi/linux/fuse.h | 7 ++++
> 2 files changed, 84 insertions(+), 1 deletion(-)
>
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 21d8d9d7d317..8aa9fc996556 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -35,6 +35,7 @@ struct virtio_fs_vq {
> struct work_struct done_work;
> struct list_head queued_reqs;
> struct list_head end_reqs; /* End these requests */
> + struct list_head wait_reqs; /* requests waiting for notification */
> struct virtio_fs_notify_node *notify_nodes;
> struct list_head notify_reqs; /* List for queuing notify requests */
> struct delayed_work dispatch_work;
> @@ -85,7 +86,6 @@ struct virtio_fs_notify_node {
>
> static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq);
>
> -
> static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
> {
> struct virtio_fs *fs = vq->vdev->priv;
> @@ -513,13 +513,75 @@ static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq)
> return 0;
> }
>
> +static int notify_complete_waiting_req(struct virtio_fs *vfs,
> + struct fuse_notify_lock_out *out_args)
> +{
> + struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_REQUEST];
> + struct fuse_req *req, *next;
> + bool found = false;
> + struct fuse_conn *fc = fsvq->fud->fc;
> +
> + /* Find waiting request with the unique number and end it */
> + spin_lock(&fsvq->lock);
> + list_for_each_entry_safe(req, next, &fsvq->wait_reqs, list) {
> + if (req->in.h.unique == out_args->id) {
> + list_del_init(&req->list);
> + clear_bit(FR_SENT, &req->flags);
> + /* Transfer error code from notify */
> + req->out.h.error = out_args->error;
> + found = true;
> + break;
> + }
> + }
> + spin_unlock(&fsvq->lock);
> +
> + /*
> + * TODO: It is possible that some re-ordering happens in notify
> + * comes before request is complete. Deal with it.
> + */
> + if (found) {
> + fuse_request_end(fc, req);
> + spin_lock(&fsvq->lock);
> + dec_in_flight_req(fsvq);
> + spin_unlock(&fsvq->lock);
> + } else
> + pr_debug("virtio-fs: Did not find waiting request with"
> + " unique=0x%llx\n", out_args->id);
> +
> + return 0;
> +}
> +
> +static int virtio_fs_handle_notify(struct virtio_fs *vfs,
> + struct virtio_fs_notify *notify)
> +{
> + int ret = 0;
> + struct fuse_out_header *oh = &notify->out_hdr;
> + struct fuse_notify_lock_out *lo;
> +
> + /*
> + * For notifications, oh.unique is 0 and oh->error contains code
> + * for which notification as arrived.
> + */
> + switch(oh->error) {
> + case FUSE_NOTIFY_LOCK:
> + lo = (struct fuse_notify_lock_out *) &notify->outarg;
> + notify_complete_waiting_req(vfs, lo);
> + break;
> + default:
> + printk("virtio-fs: Unexpected notification %d\n", oh->error);
> + }
> + return ret;
> +}

Is this specific to virtio or can be it handled in common code?

> +
> static void virtio_fs_notify_done_work(struct work_struct *work)
> {
> struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
> done_work);
> struct virtqueue *vq = fsvq->vq;
> + struct virtio_fs *vfs = vq->vdev->priv;
> LIST_HEAD(reqs);
> struct virtio_fs_notify_node *notify, *next;
> + struct fuse_out_header *oh;
>
> spin_lock(&fsvq->lock);
> do {
> @@ -535,6 +597,10 @@ static void virtio_fs_notify_done_work(struct work_struct *work)
>
> /* Process notify */
> list_for_each_entry_safe(notify, next, &reqs, list) {
> + oh = &notify->notify.out_hdr;
> + WARN_ON(oh->unique);
> + /* Handle notification */
> + virtio_fs_handle_notify(vfs, &notify->notify);
> spin_lock(&fsvq->lock);
> dec_in_flight_req(fsvq);
> list_del_init(&notify->list);
> @@ -656,6 +722,15 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
> * TODO verify that server properly follows FUSE protocol
> * (oh.uniq, oh.len)
> */
> + if (req->out.h.error == 1) {
> + /* Wait for notification to complete request */
> + list_del_init(&req->list);
> + spin_lock(&fsvq->lock);
> + list_add_tail(&req->list, &fsvq->wait_reqs);
> + spin_unlock(&fsvq->lock);
> + continue;
> + }
> +
> args = req->args;
> copy_args_from_argbuf(args, req);
>
> @@ -705,6 +780,7 @@ static int virtio_fs_init_vq(struct virtio_fs *fs, struct virtio_fs_vq *fsvq,
> strncpy(fsvq->name, name, VQ_NAME_LEN);
> spin_lock_init(&fsvq->lock);
> INIT_LIST_HEAD(&fsvq->queued_reqs);
> + INIT_LIST_HEAD(&fsvq->wait_reqs);
> INIT_LIST_HEAD(&fsvq->end_reqs);
> INIT_LIST_HEAD(&fsvq->notify_reqs);
> init_completion(&fsvq->in_flight_zero);
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 373cada89815..45f0c4efec8e 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -481,6 +481,7 @@ enum fuse_notify_code {
> FUSE_NOTIFY_STORE = 4,
> FUSE_NOTIFY_RETRIEVE = 5,
> FUSE_NOTIFY_DELETE = 6,
> + FUSE_NOTIFY_LOCK = 7,
> FUSE_NOTIFY_CODE_MAX,
> };
>
> @@ -868,6 +869,12 @@ struct fuse_notify_retrieve_in {
> uint64_t dummy4;
> };
>
> +struct fuse_notify_lock_out {
> + uint64_t id;

Please call this field "unique" or "lock_unique" so it's clear this
identifier is the fuse_header_in->unique value of the lock request.

> + int32_t error;
> + int32_t padding;
> +};
> +
> /* Device ioctls: */
> #define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t)
>
> --
> 2.20.1
>


Attachments:
(No filename) (6.61 kB)
signature.asc (499.00 B)
Download all attachments

2019-11-22 13:03:32

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH 4/4] virtiofs: Support blocking posix locks (fcntl(F_SETLKW))

On Thu, Nov 21, 2019 at 05:00:20PM +0000, Stefan Hajnoczi wrote:

[..]
> > +static int virtio_fs_handle_notify(struct virtio_fs *vfs,
> > + struct virtio_fs_notify *notify)
> > +{
> > + int ret = 0;
> > + struct fuse_out_header *oh = &notify->out_hdr;
> > + struct fuse_notify_lock_out *lo;
> > +
> > + /*
> > + * For notifications, oh.unique is 0 and oh->error contains code
> > + * for which notification as arrived.
> > + */
> > + switch(oh->error) {
> > + case FUSE_NOTIFY_LOCK:
> > + lo = (struct fuse_notify_lock_out *) &notify->outarg;
> > + notify_complete_waiting_req(vfs, lo);
> > + break;
> > + default:
> > + printk("virtio-fs: Unexpected notification %d\n", oh->error);
> > + }
> > + return ret;
> > +}
>
> Is this specific to virtio or can be it handled in common code?

This is not specific to virtio_fs. In principle, regular fuse daemon could
implement something similar. Though they might not have to because client
can just block without introducing deadlock possibilities.

Anyway, I will look into moving this code into fuse common.

[..]
> > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> > index 373cada89815..45f0c4efec8e 100644
> > --- a/include/uapi/linux/fuse.h
> > +++ b/include/uapi/linux/fuse.h
> > @@ -481,6 +481,7 @@ enum fuse_notify_code {
> > FUSE_NOTIFY_STORE = 4,
> > FUSE_NOTIFY_RETRIEVE = 5,
> > FUSE_NOTIFY_DELETE = 6,
> > + FUSE_NOTIFY_LOCK = 7,
> > FUSE_NOTIFY_CODE_MAX,
> > };
> >
> > @@ -868,6 +869,12 @@ struct fuse_notify_retrieve_in {
> > uint64_t dummy4;
> > };
> >
> > +struct fuse_notify_lock_out {
> > + uint64_t id;
>
> Please call this field "unique" or "lock_unique" so it's clear this
> identifier is the fuse_header_in->unique value of the lock request.

Ok, will do.

Vivek