Implement cuse mmap using shmem to provide the actual memory maps.
Pages must be read/written using fuse's NOTIFY_RETRIEVE and NOTIFY_STORE api.
Signed-off-by: Jader H. Silva <[email protected]>
---
fs/fuse/cuse.c | 459 +++++++++++++++++++++++++++++++++++++++++++++-
fs/fuse/dev.c | 163 +---------------
fs/fuse/fuse_i.h | 34 +++-
fs/fuse/inode.c | 166 ++++++++++++++++-
include/uapi/linux/fuse.h | 26 +++
5 files changed, 688 insertions(+), 160 deletions(-)
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index eae2c11..7749c13 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -48,6 +48,9 @@
#include <linux/stat.h>
#include <linux/module.h>
#include <linux/uio.h>
+#include <linux/mman.h>
+#include <linux/falloc.h>
+#include <linux/shmem_fs.h>
#include "fuse_i.h"
@@ -175,6 +178,441 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
return fuse_do_ioctl(file, cmd, arg, flags);
}
+struct fuse_dmmap_region {
+ u64 mapid;
+ u64 size;
+ struct file *filp;
+ struct vm_operations_struct vm_ops;
+ const struct vm_operations_struct *vm_original_ops;
+ struct list_head list;
+ atomic_t ref;
+};
+
+/*
+ * fuse_dmmap_vm represents the result of a single mmap() call, which
+ * can be shared by multiple client vmas created by forking.
+ */
+struct fuse_dmmap_vm {
+ u64 len;
+ u64 off;
+ atomic_t open_count;
+ struct fuse_dmmap_region *region;
+};
+
+static void fuse_dmmap_region_put(struct fuse_conn *fc,
+ struct fuse_dmmap_region *fdr)
+{
+ if (atomic_dec_and_lock(&fdr->ref, &fc->lock)) {
+
+ list_del(&fdr->list);
+
+ spin_unlock(&fc->lock);
+
+ fput(fdr->filp);
+ kfree(fdr);
+ }
+}
+
+static void fuse_dmmap_vm_open(struct vm_area_struct *vma)
+{
+ struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+ struct fuse_dmmap_region *fdr = fdvm->region;
+
+ /* vma copied */
+ atomic_inc(&fdvm->open_count);
+
+ if (fdr->vm_original_ops->open)
+ fdr->vm_original_ops->open(vma);
+}
+
+static void fuse_dmmap_vm_close(struct vm_area_struct *vma)
+{
+ struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+ struct fuse_dmmap_region *fdr = fdvm->region;
+ struct fuse_file *ff = vma->vm_file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_req *req;
+ struct fuse_munmap_in *inarg;
+
+ if (fdr->vm_original_ops->close)
+ fdr->vm_original_ops->close(vma);
+
+ if (!atomic_dec_and_test(&fdvm->open_count))
+ return;
+
+ /*
+ * Notify server that the mmap region has been unmapped.
+ * Failing this might lead to resource leak in server, don't
+ * fail.
+ */
+ req = fuse_get_req_nofail_nopages(fc, vma->vm_file);
+ inarg = &req->misc.munmap_in;
+
+ inarg->fh = ff->fh;
+ inarg->mapid = fdvm->region->mapid;
+ inarg->size = fdvm->len;
+ inarg->offset = fdvm->off;
+
+ req->in.h.opcode = FUSE_MUNMAP;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(*inarg);
+ req->in.args[0].value = inarg;
+
+ fuse_request_send(fc, req);
+ fuse_put_request(fc, req);
+ fuse_dmmap_region_put(fc, fdvm->region);
+ kfree(fdvm);
+}
+
+static int fuse_dmmap_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ int ret;
+ struct file *filp = vma->vm_file;
+ struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+ struct fuse_dmmap_region *fdr = fdvm->region;
+
+ vma->vm_file = fdr->filp;
+ ret = fdr->vm_original_ops->fault(vma, vmf);
+
+ vma->vm_file = filp;
+
+ return ret;
+}
+
+static const struct vm_operations_struct fuse_dmmap_vm_ops = {
+ .open = fuse_dmmap_vm_open,
+ .close = fuse_dmmap_vm_close,
+ .fault = fuse_dmmap_vm_fault,
+};
+
+static struct fuse_dmmap_region *fuse_dmmap_find_locked(struct fuse_conn *fc,
+ u64 mapid)
+{
+ struct fuse_dmmap_region *curr;
+ struct fuse_dmmap_region *fdr = NULL;
+
+ list_for_each_entry(curr, &fc->dmmap_list, list) {
+ if (curr->mapid == mapid) {
+ fdr = curr;
+ atomic_inc(&fdr->ref);
+ break;
+ }
+ }
+
+ return fdr;
+}
+
+static struct fuse_dmmap_region *fuse_dmmap_find(struct fuse_conn *fc,
+ u64 mapid)
+{
+ struct fuse_dmmap_region *fdr;
+
+ spin_lock(&fc->lock);
+ fdr = fuse_dmmap_find_locked(fc, mapid);
+ spin_unlock(&fc->lock);
+
+ return fdr;
+}
+
+static struct fuse_dmmap_region *fuse_dmmap_get(struct fuse_conn *fc,
+ struct file *file, u64 mapid,
+ u64 size, unsigned long flags)
+{
+ struct fuse_dmmap_region *fdr;
+ char *pathbuf, *filepath;
+ struct file *shmem_file;
+
+ fdr = fuse_dmmap_find(fc, mapid);
+ if (!fdr) {
+ struct fuse_dmmap_region *tmp;
+
+ fdr = kzalloc(sizeof(struct fuse_dmmap_region), GFP_KERNEL);
+ if (!fdr)
+ return ERR_PTR(-ENOMEM);
+
+ atomic_set(&fdr->ref, 1);
+
+ pathbuf = kzalloc(PATH_MAX+1, GFP_KERNEL);
+ if (!pathbuf) {
+ kfree(fdr);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ filepath = d_path(&file->f_path, pathbuf, PATH_MAX+1);
+ if (IS_ERR(filepath)) {
+ kfree(fdr);
+ kfree(pathbuf);
+ return (struct fuse_dmmap_region *) filepath;
+ }
+
+ fdr->mapid = mapid;
+ shmem_file = shmem_file_setup(filepath, size, flags);
+ kfree(pathbuf);
+
+ if (IS_ERR(shmem_file)) {
+ kfree(fdr);
+ return (struct fuse_dmmap_region *) shmem_file;
+ }
+
+ fdr->filp = shmem_file;
+
+ spin_lock(&fc->lock);
+ tmp = fuse_dmmap_find_locked(fc, mapid);
+ if (tmp) {
+ fput(fdr->filp);
+ kfree(fdr);
+ fdr = tmp;
+ } else {
+ INIT_LIST_HEAD(&fdr->list);
+ list_add(&fdr->list, &fc->dmmap_list);
+ }
+ spin_unlock(&fc->lock);
+ }
+
+ if (size > fdr->size) {
+
+ fdr->filp->f_op->fallocate(fdr->filp, 0, 0, size);
+ fdr->size = size;
+ }
+
+ return fdr;
+}
+
+static int cuse_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int err;
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_dmmap_vm *fdvm;
+ struct fuse_dmmap_region *fdr;
+ struct fuse_req *req = NULL;
+ struct fuse_mmap_in inarg;
+ struct fuse_mmap_out outarg;
+
+ if (fc->no_dmmap)
+ return -ENOSYS;
+
+ req = fuse_get_req(fc, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ /* ask server whether this mmap is okay and what the size should be */
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.addr = vma->vm_start;
+ inarg.len = vma->vm_end - vma->vm_start;
+ inarg.prot = ((vma->vm_flags & VM_READ) ? PROT_READ : 0) |
+ ((vma->vm_flags & VM_WRITE) ? PROT_WRITE : 0) |
+ ((vma->vm_flags & VM_EXEC) ? PROT_EXEC : 0);
+ inarg.flags = ((vma->vm_flags & VM_SHARED) ? MAP_SHARED : 0 ) |
+ ((vma->vm_flags & VM_GROWSDOWN) ? MAP_GROWSDOWN : 0) |
+ ((vma->vm_flags & VM_DENYWRITE) ? MAP_DENYWRITE : 0) |
+ ((vma->vm_flags & VM_EXEC) ? MAP_EXECUTABLE : 0) |
+ ((vma->vm_flags & VM_LOCKED) ? MAP_LOCKED : 0);
+ inarg.offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
+
+ req->in.h.opcode = FUSE_MMAP;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ if (err) {
+ if (err == -ENOSYS)
+ fc->no_dmmap = 1;
+ goto free_req;
+ }
+
+ fdr = fuse_dmmap_get(fc, file, outarg.mapid, outarg.size,
+ vma->vm_flags);
+ err = PTR_ERR(fdr);
+ if (IS_ERR(fdr))
+ goto free_req;
+
+ err = -ENOMEM;
+
+ fdvm = kzalloc(sizeof(*fdvm), GFP_KERNEL);
+ if (!fdvm) {
+ fuse_dmmap_region_put(fc, fdr);
+ goto free_req;
+ }
+
+ atomic_set(&fdvm->open_count, 1);
+ fdvm->region = fdr;
+ fdvm->len = inarg.len;
+ fdvm->off = inarg.offset;
+
+ fdr->filp->f_op->mmap(fdr->filp, vma);
+
+ memcpy(&fdr->vm_ops, vma->vm_ops, sizeof(fdr->vm_ops));
+ fdr->vm_ops.open = fuse_dmmap_vm_ops.open;
+ fdr->vm_ops.close = fuse_dmmap_vm_ops.close;
+ fdr->vm_ops.fault = fuse_dmmap_vm_ops.fault;
+
+ fdr->vm_original_ops = vma->vm_ops;
+
+ vma->vm_ops = &fdr->vm_ops;
+
+ vma->vm_private_data = fdvm;
+ vma->vm_flags |= VM_DONTEXPAND; /* disallow expansion for now */
+ err = 0;
+
+free_req:
+ fuse_put_request(fc, req);
+ return err;
+}
+
+static int fuse_notify_store_to_dmmap(struct fuse_conn *fc,
+ struct fuse_copy_state *cs,
+ u64 nodeid, u32 size, u64 pos)
+{
+ struct fuse_dmmap_region *fdr;
+ struct file *filp;
+ pgoff_t index;
+ unsigned int off;
+ int err;
+
+ fdr = fuse_dmmap_find(fc, nodeid);
+ if (!fdr)
+ return -ENOENT;
+
+ index = pos >> PAGE_SHIFT;
+ off = pos & ~PAGE_MASK;
+ if (pos > fdr->size)
+ size = 0;
+ else if (size > fdr->size - pos)
+ size = fdr->size - pos;
+
+ filp = fdr->filp;
+
+ while (size) {
+ struct page *page;
+ unsigned int this_num;
+
+ page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
+ index, GFP_HIGHUSER);
+ if (IS_ERR(page)) {
+
+ err = -ENOMEM;
+ goto out_iput;
+ }
+
+ this_num = min_t(unsigned, size, PAGE_SIZE - off);
+ err = fuse_copy_page(cs, &page, off, this_num, 0);
+
+ unlock_page(page);
+ page_cache_release(page);
+
+ if (err)
+ goto out_iput;
+
+ size -= this_num;
+ off = 0;
+ index++;
+ }
+
+ err = 0;
+
+out_iput:
+ fuse_dmmap_region_put(fc, fdr);
+
+ return err;
+}
+
+static void fuse_retrieve_dmmap_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ release_pages(req->pages, req->num_pages, 0);
+}
+
+static int fuse_notify_retrieve_from_dmmap(struct fuse_conn *fc,
+ struct fuse_notify_retrieve_out *outarg)
+{
+ struct fuse_dmmap_region *fdr;
+ struct fuse_req *req;
+ struct page *page;
+ struct file *filp;
+ pgoff_t index;
+ unsigned int num;
+ unsigned int offset;
+ unsigned int npages;
+ unsigned int this_num;
+ size_t total_len = 0;
+ int err;
+
+ fdr = fuse_dmmap_find(fc, outarg->nodeid);
+ if (!fdr)
+ return -ENOENT;
+
+ npages = outarg->size >> PAGE_SHIFT;
+ if (outarg->size & ~PAGE_MASK)
+ npages++;
+
+ req = fuse_get_req(fc, npages);
+ err = PTR_ERR(req);
+ if (IS_ERR(req))
+ goto out_put_region;
+
+ offset = outarg->offset & ~PAGE_MASK;
+
+ req->in.h.opcode = FUSE_NOTIFY_REPLY;
+ req->in.h.nodeid = outarg->nodeid;
+ req->in.numargs = 2;
+ req->in.argpages = 1;
+ req->end = fuse_retrieve_dmmap_end;
+
+ index = outarg->offset >> PAGE_SHIFT;
+ num = outarg->size;
+ if (outarg->offset > fdr->size)
+ num = 0;
+ else if (outarg->offset + num > fdr->size)
+ num = fdr->size - outarg->offset;
+
+ filp = fdr->filp;
+
+ npages = 0;
+ while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+
+ page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
+ index,
+ GFP_KERNEL);
+ if (IS_ERR(page)) {
+ err = -ENOMEM;
+ goto out_put_region;
+ }
+
+ this_num = min_t(unsigned, num, PAGE_SIZE - offset);
+ req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].length = this_num;
+ req->num_pages++;
+
+ num -= this_num;
+ total_len += this_num;
+ index++;
+ npages++;
+ }
+ req->misc.retrieve_in.offset = outarg->offset;
+ req->misc.retrieve_in.size = total_len;
+ req->in.args[0].size = sizeof(req->misc.retrieve_in);
+ req->in.args[0].value = &req->misc.retrieve_in;
+ req->in.args[1].size = total_len;
+
+ err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+ if (err)
+ fuse_retrieve_dmmap_end(fc, req);
+
+out_put_region:
+ fuse_dmmap_region_put(fc, fdr);
+
+ return err;
+}
+
+
static const struct file_operations cuse_frontend_fops = {
.owner = THIS_MODULE,
.read_iter = cuse_read_iter,
@@ -184,7 +622,8 @@ static const struct file_operations cuse_frontend_fops = {
.unlocked_ioctl = cuse_file_ioctl,
.compat_ioctl = cuse_file_compat_ioctl,
.poll = fuse_file_poll,
- .llseek = noop_llseek,
+ .llseek = noop_llseek,
+ .mmap = cuse_mmap,
};
@@ -468,10 +907,26 @@ err:
static void cuse_fc_release(struct fuse_conn *fc)
{
+ struct fuse_dmmap_region *fdr;
struct cuse_conn *cc = fc_to_cc(fc);
+
+ spin_lock(&fc->lock);
+ while (!list_empty(&fc->dmmap_list)) {
+
+ fdr = list_entry(fc->dmmap_list.next, typeof(*fdr), list);
+ fuse_dmmap_region_put(fc, fdr);
+ }
+ spin_unlock(&fc->lock);
+
kfree_rcu(cc, fc.rcu);
}
+static const struct fuse_conn_operations cuse_ops = {
+ .release = cuse_fc_release,
+ .notify_store = fuse_notify_store_to_dmmap,
+ .notify_retrieve = fuse_notify_retrieve_from_dmmap,
+};
+
/**
* cuse_channel_open - open method for /dev/cuse
* @inode: inode for /dev/cuse
@@ -507,7 +962,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
}
INIT_LIST_HEAD(&cc->list);
- cc->fc.release = cuse_fc_release;
+ cc->fc.ops = &cuse_ops;
cc->fc.initialized = 1;
rc = cuse_send_init(cc);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 80cc1b3..0faf92c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -279,6 +279,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
__clear_bit(FR_BACKGROUND, &req->flags);
return req;
}
+EXPORT_SYMBOL_GPL(fuse_get_req_nofail_nopages);
void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
{
@@ -617,8 +618,8 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
}
EXPORT_SYMBOL_GPL(fuse_request_send_background);
-static int fuse_request_send_notify_reply(struct fuse_conn *fc,
- struct fuse_req *req, u64 unique)
+int fuse_request_send_notify_reply(struct fuse_conn *fc,
+ struct fuse_req *req, u64 unique)
{
int err = -ENODEV;
struct fuse_iqueue *fiq = &fc->iq;
@@ -674,6 +675,7 @@ static int lock_request(struct fuse_req *req)
}
return err;
}
+EXPORT_SYMBOL_GPL(fuse_request_send_notify_reply);
/*
* Unlock request. If it was aborted while locked, caller is responsible
@@ -967,8 +969,8 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
* Copy a page in the request to/from the userspace buffer. Must be
* done atomically
*/
-static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
- unsigned offset, unsigned count, int zeroing)
+int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
+ unsigned offset, unsigned count, int zeroing)
{
int err;
struct page *page = *pagep;
@@ -1003,6 +1005,7 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
flush_dcache_page(page);
return 0;
}
+EXPORT_SYMBOL_GPL(fuse_copy_page);
/* Copy pages in the request to/from userspace buffer */
static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
@@ -1597,15 +1600,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_store_out outarg;
- struct inode *inode;
- struct address_space *mapping;
- u64 nodeid;
int err;
- pgoff_t index;
- unsigned int offset;
- unsigned int num;
- loff_t file_size;
- loff_t end;
err = -EINVAL;
if (size < sizeof(outarg))
@@ -1619,145 +1614,18 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
if (size - sizeof(outarg) != outarg.size)
goto out_finish;
- nodeid = outarg.nodeid;
+ err = fc->ops->notify_store(fc, cs, outarg.nodeid, outarg.size,
+ outarg.offset);
- down_read(&fc->killsb);
-
- err = -ENOENT;
- if (!fc->sb)
- goto out_up_killsb;
-
- inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
- if (!inode)
- goto out_up_killsb;
-
- mapping = inode->i_mapping;
- index = outarg.offset >> PAGE_CACHE_SHIFT;
- offset = outarg.offset & ~PAGE_CACHE_MASK;
- file_size = i_size_read(inode);
- end = outarg.offset + outarg.size;
- if (end > file_size) {
- file_size = end;
- fuse_write_update_size(inode, file_size);
- }
-
- num = outarg.size;
- while (num) {
- struct page *page;
- unsigned int this_num;
-
- err = -ENOMEM;
- page = find_or_create_page(mapping, index,
- mapping_gfp_mask(mapping));
- if (!page)
- goto out_iput;
-
- this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
- err = fuse_copy_page(cs, &page, offset, this_num, 0);
- if (!err && offset == 0 &&
- (this_num == PAGE_CACHE_SIZE || file_size == end))
- SetPageUptodate(page);
- unlock_page(page);
- page_cache_release(page);
-
- if (err)
- goto out_iput;
-
- num -= this_num;
- offset = 0;
- index++;
- }
-
- err = 0;
-
-out_iput:
- iput(inode);
-out_up_killsb:
- up_read(&fc->killsb);
out_finish:
fuse_copy_finish(cs);
return err;
}
-static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
-{
- release_pages(req->pages, req->num_pages, false);
-}
-
-static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
- struct fuse_notify_retrieve_out *outarg)
-{
- int err;
- struct address_space *mapping = inode->i_mapping;
- struct fuse_req *req;
- pgoff_t index;
- loff_t file_size;
- unsigned int num;
- unsigned int offset;
- size_t total_len = 0;
- int num_pages;
-
- offset = outarg->offset & ~PAGE_CACHE_MASK;
- file_size = i_size_read(inode);
-
- num = outarg->size;
- if (outarg->offset > file_size)
- num = 0;
- else if (outarg->offset + num > file_size)
- num = file_size - outarg->offset;
-
- num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
- num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
-
- req = fuse_get_req(fc, num_pages);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.h.opcode = FUSE_NOTIFY_REPLY;
- req->in.h.nodeid = outarg->nodeid;
- req->in.numargs = 2;
- req->in.argpages = 1;
- req->page_descs[0].offset = offset;
- req->end = fuse_retrieve_end;
-
- index = outarg->offset >> PAGE_CACHE_SHIFT;
-
- while (num && req->num_pages < num_pages) {
- struct page *page;
- unsigned int this_num;
-
- page = find_get_page(mapping, index);
- if (!page)
- break;
-
- this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
- req->pages[req->num_pages] = page;
- req->page_descs[req->num_pages].length = this_num;
- req->num_pages++;
-
- offset = 0;
- num -= this_num;
- total_len += this_num;
- index++;
- }
- req->misc.retrieve_in.offset = outarg->offset;
- req->misc.retrieve_in.size = total_len;
- req->in.args[0].size = sizeof(req->misc.retrieve_in);
- req->in.args[0].value = &req->misc.retrieve_in;
- req->in.args[1].size = total_len;
-
- err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
- if (err)
- fuse_retrieve_end(fc, req);
-
- return err;
-}
-
static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_retrieve_out outarg;
- struct inode *inode;
int err;
err = -EINVAL;
@@ -1770,18 +1638,7 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
fuse_copy_finish(cs);
- down_read(&fc->killsb);
- err = -ENOENT;
- if (fc->sb) {
- u64 nodeid = outarg.nodeid;
-
- inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
- if (inode) {
- err = fuse_retrieve(fc, inode, &outarg);
- iput(inode);
- }
- }
- up_read(&fc->killsb);
+ err = fc->ops->notify_retrieve(fc, &outarg);
return err;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 4051131..a56222b 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -337,6 +337,7 @@ struct fuse_req {
struct fuse_req *next;
} write;
struct fuse_notify_retrieve_in retrieve_in;
+ struct fuse_munmap_in munmap_in;
} misc;
/** page vector */
@@ -431,6 +432,21 @@ struct fuse_dev {
struct list_head entry;
};
+struct fuse_copy_state;
+
+struct fuse_conn_operations {
+ /** Called on final put */
+ void (*release)(struct fuse_conn *);
+
+ /** Called to store data into a mapping */
+ int (*notify_store)(struct fuse_conn *, struct fuse_copy_state *,
+ u64 nodeid, u32 size, u64 pos);
+
+ /** Called to retrieve data from a mapping */
+ int (*notify_retrieve)(struct fuse_conn *,
+ struct fuse_notify_retrieve_out *);
+};
+
/**
* A Fuse connection.
*
@@ -578,6 +594,9 @@ struct fuse_conn {
/** Is poll not implemented by fs? */
unsigned no_poll:1;
+ /** Is direct mmap not implemente by fs? */
+ unsigned no_dmmap:1;
+
/** Do multi-page cached writes */
unsigned big_writes:1;
@@ -635,9 +654,6 @@ struct fuse_conn {
/** Version counter for attribute changes */
u64 attr_version;
- /** Called on final put */
- void (*release)(struct fuse_conn *);
-
/** Super block for this connection. */
struct super_block *sb;
@@ -646,6 +662,12 @@ struct fuse_conn {
/** List of device instances belonging to this connection */
struct list_head devices;
+
+ /** List of direct mmaps (currently CUSE only) */
+ struct list_head dmmap_list;
+
+ /** Operations that fuse and cuse can implement differently */
+ const struct fuse_conn_operations *ops;
};
static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -944,4 +966,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
void fuse_set_initialized(struct fuse_conn *fc);
+int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
+ unsigned offset, unsigned count, int zeroing);
+
+int fuse_request_send_notify_reply(struct fuse_conn *fc,
+ struct fuse_req *req, u64 unique);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ac81f48..5284b84 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -609,6 +609,7 @@ void fuse_conn_init(struct fuse_conn *fc)
fc->connected = 1;
fc->attr_version = 1;
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
+ INIT_LIST_HEAD(&fc->dmmap_list);
}
EXPORT_SYMBOL_GPL(fuse_conn_init);
@@ -617,7 +618,7 @@ void fuse_conn_put(struct fuse_conn *fc)
if (atomic_dec_and_test(&fc->count)) {
if (fc->destroy_req)
fuse_request_free(fc->destroy_req);
- fc->release(fc);
+ fc->ops->release(fc);
}
}
EXPORT_SYMBOL_GPL(fuse_conn_put);
@@ -1025,6 +1026,167 @@ void fuse_dev_free(struct fuse_dev *fud)
}
EXPORT_SYMBOL_GPL(fuse_dev_free);
+static int fuse_notify_store_to_inode(struct fuse_conn *fc,
+ struct fuse_copy_state *cs,
+ u64 nodeid, u32 size, u64 pos)
+{
+ struct inode *inode;
+ struct address_space *mapping;
+ pgoff_t index;
+ unsigned int off;
+ loff_t file_size;
+ loff_t end;
+ int err;
+
+ down_read(&fc->killsb);
+
+ err = -ENOENT;
+ if (!fc->sb)
+ goto out_up_killsb;
+
+ inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+ if (!inode)
+ goto out_up_killsb;
+
+ mapping = inode->i_mapping;
+ index = pos >> PAGE_CACHE_SHIFT;
+ off = pos & ~PAGE_CACHE_MASK;
+ file_size = i_size_read(inode);
+ end = pos + size;
+ if (end > file_size) {
+ file_size = end;
+ fuse_write_update_size(inode, file_size);
+ }
+
+ while (size) {
+ struct page *page;
+ unsigned int this_num;
+
+ err = -ENOMEM;
+ page = find_or_create_page(mapping, index,
+ mapping_gfp_mask(mapping));
+ if (!page)
+ goto out_iput;
+
+ this_num = min_t(unsigned, size, PAGE_CACHE_SIZE - off);
+ err = fuse_copy_page(cs, &page, off, this_num, 0);
+ if (!err && off == 0 && (size != 0 || file_size == end))
+ SetPageUptodate(page);
+ unlock_page(page);
+ page_cache_release(page);
+
+ if (err)
+ goto out_iput;
+
+ size -= this_num;
+ off = 0;
+ index++;
+ }
+
+ err = 0;
+
+out_iput:
+ iput(inode);
+out_up_killsb:
+ up_read(&fc->killsb);
+
+ return err;
+}
+
+static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ release_pages(req->pages, req->num_pages, 0);
+}
+
+static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
+ struct fuse_notify_retrieve_out *outarg)
+{
+ int err;
+ struct address_space *mapping = inode->i_mapping;
+ struct fuse_req *req;
+ pgoff_t index;
+ loff_t file_size;
+ unsigned int num;
+ unsigned int offset;
+ size_t total_len = 0;
+
+ req = fuse_get_req(fc, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ offset = outarg->offset & ~PAGE_CACHE_MASK;
+
+ req->in.h.opcode = FUSE_NOTIFY_REPLY;
+ req->in.h.nodeid = outarg->nodeid;
+ req->in.numargs = 2;
+ req->in.argpages = 1;
+ req->end = fuse_retrieve_end;
+
+ index = outarg->offset >> PAGE_CACHE_SHIFT;
+ file_size = i_size_read(inode);
+ num = outarg->size;
+ if (outarg->offset > file_size)
+ num = 0;
+ else if (outarg->offset + num > file_size)
+ num = file_size - outarg->offset;
+
+ while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+ struct page *page;
+ unsigned int this_num;
+
+ page = find_get_page(mapping, index);
+ if (!page)
+ break;
+
+ this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+ req->pages[req->num_pages] = page;
+ req->num_pages++;
+
+ num -= this_num;
+ total_len += this_num;
+ index++;
+ }
+ req->misc.retrieve_in.offset = outarg->offset;
+ req->misc.retrieve_in.size = total_len;
+ req->in.args[0].size = sizeof(req->misc.retrieve_in);
+ req->in.args[0].value = &req->misc.retrieve_in;
+ req->in.args[1].size = total_len;
+
+ err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+ if (err)
+ fuse_retrieve_end(fc, req);
+
+ return err;
+}
+
+static int fuse_notify_retrieve_from_inode(struct fuse_conn *fc,
+ struct fuse_notify_retrieve_out *outarg)
+{
+ struct inode *inode;
+ int err;
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (fc->sb) {
+ u64 nodeid = outarg->nodeid;
+
+ inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+ if (inode) {
+ err = fuse_retrieve(fc, inode, outarg);
+ iput(inode);
+ }
+ }
+ up_read(&fc->killsb);
+
+ return err;
+}
+
+static const struct fuse_conn_operations fuse_default_ops = {
+ .release = fuse_free_conn,
+ .notify_store = fuse_notify_store_to_inode,
+ .notify_retrieve = fuse_notify_retrieve_from_inode,
+};
+
static int fuse_fill_super(struct super_block *sb, void *data, int silent)
{
struct fuse_dev *fud;
@@ -1077,7 +1239,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
goto err_fput;
fuse_conn_init(fc);
- fc->release = fuse_free_conn;
+ fc->ops = &fuse_default_ops;
fud = fuse_dev_alloc(fc);
if (!fud)
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index c9aca04..3f4c54b 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -102,6 +102,7 @@
* - add ctime and ctimensec to fuse_setattr_in
* - add FUSE_RENAME2 request
* - add FUSE_NO_OPEN_SUPPORT flag
+ * - add FUSE_MMAP and FUSE_MUNMAP
*/
#ifndef _LINUX_FUSE_H
@@ -358,6 +359,8 @@ enum fuse_opcode {
FUSE_FALLOCATE = 43,
FUSE_READDIRPLUS = 44,
FUSE_RENAME2 = 45,
+ FUSE_MMAP = 46,
+ FUSE_MUNMAP = 47,
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -670,6 +673,29 @@ struct fuse_fallocate_in {
uint32_t padding;
};
+struct fuse_mmap_in {
+ __u64 fh;
+ __u64 addr;
+ __u64 len;
+ __u32 prot;
+ __u32 flags;
+ __u64 offset;
+};
+
+struct fuse_mmap_out {
+ __u64 mapid; /* Mmap ID, same namespace as Inode ID */
+ __u64 size; /* Size of memory region */
+ __u64 reserved;
+};
+
+struct fuse_munmap_in {
+ __u64 fh;
+ __u64 mapid;
+ __u64 size; /* Size of memory region */
+ __u64 offset;
+ __u64 reserved;
+};
+
struct fuse_in_header {
uint32_t len;
uint32_t opcode;
--
2.1.4
I tested this patch and gave some hints to Jader when it first appeared
on the libfuse mailing list some months ago.
Signed-off-by: Luca Risolia <[email protected]>
Jader H. Silva wrote:
> Implement cuse mmap using shmem to provide the actual memory maps.
> Pages must be read/written using fuse's NOTIFY_RETRIEVE and NOTIFY_STORE api.
>
> Signed-off-by: Jader H. Silva <[email protected]>
> ---
> fs/fuse/cuse.c | 459 +++++++++++++++++++++++++++++++++++++++++++++-
> fs/fuse/dev.c | 163 +---------------
> fs/fuse/fuse_i.h | 34 +++-
> fs/fuse/inode.c | 166 ++++++++++++++++-
> include/uapi/linux/fuse.h | 26 +++
> 5 files changed, 688 insertions(+), 160 deletions(-)
>
> diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
> index eae2c11..7749c13 100644
> --- a/fs/fuse/cuse.c
> +++ b/fs/fuse/cuse.c
> @@ -48,6 +48,9 @@
> #include <linux/stat.h>
> #include <linux/module.h>
> #include <linux/uio.h>
> +#include <linux/mman.h>
> +#include <linux/falloc.h>
> +#include <linux/shmem_fs.h>
>
> #include "fuse_i.h"
>
> @@ -175,6 +178,441 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
> return fuse_do_ioctl(file, cmd, arg, flags);
> }
>
> +struct fuse_dmmap_region {
> + u64 mapid;
> + u64 size;
> + struct file *filp;
> + struct vm_operations_struct vm_ops;
> + const struct vm_operations_struct *vm_original_ops;
> + struct list_head list;
> + atomic_t ref;
> +};
> +
> +/*
> + * fuse_dmmap_vm represents the result of a single mmap() call, which
> + * can be shared by multiple client vmas created by forking.
> + */
> +struct fuse_dmmap_vm {
> + u64 len;
> + u64 off;
> + atomic_t open_count;
> + struct fuse_dmmap_region *region;
> +};
> +
> +static void fuse_dmmap_region_put(struct fuse_conn *fc,
> + struct fuse_dmmap_region *fdr)
> +{
> + if (atomic_dec_and_lock(&fdr->ref, &fc->lock)) {
> +
> + list_del(&fdr->list);
> +
> + spin_unlock(&fc->lock);
> +
> + fput(fdr->filp);
> + kfree(fdr);
> + }
> +}
> +
> +static void fuse_dmmap_vm_open(struct vm_area_struct *vma)
> +{
> + struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
> + struct fuse_dmmap_region *fdr = fdvm->region;
> +
> + /* vma copied */
> + atomic_inc(&fdvm->open_count);
> +
> + if (fdr->vm_original_ops->open)
> + fdr->vm_original_ops->open(vma);
> +}
> +
> +static void fuse_dmmap_vm_close(struct vm_area_struct *vma)
> +{
> + struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
> + struct fuse_dmmap_region *fdr = fdvm->region;
> + struct fuse_file *ff = vma->vm_file->private_data;
> + struct fuse_conn *fc = ff->fc;
> + struct fuse_req *req;
> + struct fuse_munmap_in *inarg;
> +
> + if (fdr->vm_original_ops->close)
> + fdr->vm_original_ops->close(vma);
> +
> + if (!atomic_dec_and_test(&fdvm->open_count))
> + return;
> +
> + /*
> + * Notify server that the mmap region has been unmapped.
> + * Failing this might lead to resource leak in server, don't
> + * fail.
> + */
> + req = fuse_get_req_nofail_nopages(fc, vma->vm_file);
> + inarg = &req->misc.munmap_in;
> +
> + inarg->fh = ff->fh;
> + inarg->mapid = fdvm->region->mapid;
> + inarg->size = fdvm->len;
> + inarg->offset = fdvm->off;
> +
> + req->in.h.opcode = FUSE_MUNMAP;
> + req->in.h.nodeid = ff->nodeid;
> + req->in.numargs = 1;
> + req->in.args[0].size = sizeof(*inarg);
> + req->in.args[0].value = inarg;
> +
> + fuse_request_send(fc, req);
> + fuse_put_request(fc, req);
> + fuse_dmmap_region_put(fc, fdvm->region);
> + kfree(fdvm);
> +}
> +
> +static int fuse_dmmap_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> + int ret;
> + struct file *filp = vma->vm_file;
> + struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
> + struct fuse_dmmap_region *fdr = fdvm->region;
> +
> + vma->vm_file = fdr->filp;
> + ret = fdr->vm_original_ops->fault(vma, vmf);
> +
> + vma->vm_file = filp;
> +
> + return ret;
> +}
> +
> +static const struct vm_operations_struct fuse_dmmap_vm_ops = {
> + .open = fuse_dmmap_vm_open,
> + .close = fuse_dmmap_vm_close,
> + .fault = fuse_dmmap_vm_fault,
> +};
> +
> +static struct fuse_dmmap_region *fuse_dmmap_find_locked(struct fuse_conn *fc,
> + u64 mapid)
> +{
> + struct fuse_dmmap_region *curr;
> + struct fuse_dmmap_region *fdr = NULL;
> +
> + list_for_each_entry(curr, &fc->dmmap_list, list) {
> + if (curr->mapid == mapid) {
> + fdr = curr;
> + atomic_inc(&fdr->ref);
> + break;
> + }
> + }
> +
> + return fdr;
> +}
> +
> +static struct fuse_dmmap_region *fuse_dmmap_find(struct fuse_conn *fc,
> + u64 mapid)
> +{
> + struct fuse_dmmap_region *fdr;
> +
> + spin_lock(&fc->lock);
> + fdr = fuse_dmmap_find_locked(fc, mapid);
> + spin_unlock(&fc->lock);
> +
> + return fdr;
> +}
> +
> +static struct fuse_dmmap_region *fuse_dmmap_get(struct fuse_conn *fc,
> + struct file *file, u64 mapid,
> + u64 size, unsigned long flags)
> +{
> + struct fuse_dmmap_region *fdr;
> + char *pathbuf, *filepath;
> + struct file *shmem_file;
> +
> + fdr = fuse_dmmap_find(fc, mapid);
> + if (!fdr) {
> + struct fuse_dmmap_region *tmp;
> +
> + fdr = kzalloc(sizeof(struct fuse_dmmap_region), GFP_KERNEL);
> + if (!fdr)
> + return ERR_PTR(-ENOMEM);
> +
> + atomic_set(&fdr->ref, 1);
> +
> + pathbuf = kzalloc(PATH_MAX+1, GFP_KERNEL);
> + if (!pathbuf) {
> + kfree(fdr);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + filepath = d_path(&file->f_path, pathbuf, PATH_MAX+1);
> + if (IS_ERR(filepath)) {
> + kfree(fdr);
> + kfree(pathbuf);
> + return (struct fuse_dmmap_region *) filepath;
> + }
> +
> + fdr->mapid = mapid;
> + shmem_file = shmem_file_setup(filepath, size, flags);
> + kfree(pathbuf);
> +
> + if (IS_ERR(shmem_file)) {
> + kfree(fdr);
> + return (struct fuse_dmmap_region *) shmem_file;
> + }
> +
> + fdr->filp = shmem_file;
> +
> + spin_lock(&fc->lock);
> + tmp = fuse_dmmap_find_locked(fc, mapid);
> + if (tmp) {
> + fput(fdr->filp);
> + kfree(fdr);
> + fdr = tmp;
> + } else {
> + INIT_LIST_HEAD(&fdr->list);
> + list_add(&fdr->list, &fc->dmmap_list);
> + }
> + spin_unlock(&fc->lock);
> + }
> +
> + if (size > fdr->size) {
> +
> + fdr->filp->f_op->fallocate(fdr->filp, 0, 0, size);
> + fdr->size = size;
> + }
> +
> + return fdr;
> +}
> +
> +static int cuse_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + int err;
> + struct fuse_file *ff = file->private_data;
> + struct fuse_conn *fc = ff->fc;
> + struct fuse_dmmap_vm *fdvm;
> + struct fuse_dmmap_region *fdr;
> + struct fuse_req *req = NULL;
> + struct fuse_mmap_in inarg;
> + struct fuse_mmap_out outarg;
> +
> + if (fc->no_dmmap)
> + return -ENOSYS;
> +
> + req = fuse_get_req(fc, 0);
> + if (IS_ERR(req))
> + return PTR_ERR(req);
> +
> + /* ask server whether this mmap is okay and what the size should be */
> + memset(&inarg, 0, sizeof(inarg));
> + inarg.fh = ff->fh;
> + inarg.addr = vma->vm_start;
> + inarg.len = vma->vm_end - vma->vm_start;
> + inarg.prot = ((vma->vm_flags & VM_READ) ? PROT_READ : 0) |
> + ((vma->vm_flags & VM_WRITE) ? PROT_WRITE : 0) |
> + ((vma->vm_flags & VM_EXEC) ? PROT_EXEC : 0);
> + inarg.flags = ((vma->vm_flags & VM_SHARED) ? MAP_SHARED : 0 ) |
> + ((vma->vm_flags & VM_GROWSDOWN) ? MAP_GROWSDOWN : 0) |
> + ((vma->vm_flags & VM_DENYWRITE) ? MAP_DENYWRITE : 0) |
> + ((vma->vm_flags & VM_EXEC) ? MAP_EXECUTABLE : 0) |
> + ((vma->vm_flags & VM_LOCKED) ? MAP_LOCKED : 0);
> + inarg.offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
> +
> + req->in.h.opcode = FUSE_MMAP;
> + req->in.h.nodeid = ff->nodeid;
> + req->in.numargs = 1;
> + req->in.args[0].size = sizeof(inarg);
> + req->in.args[0].value = &inarg;
> + req->out.numargs = 1;
> + req->out.args[0].size = sizeof(outarg);
> + req->out.args[0].value = &outarg;
> +
> + fuse_request_send(fc, req);
> + err = req->out.h.error;
> + if (err) {
> + if (err == -ENOSYS)
> + fc->no_dmmap = 1;
> + goto free_req;
> + }
> +
> + fdr = fuse_dmmap_get(fc, file, outarg.mapid, outarg.size,
> + vma->vm_flags);
> + err = PTR_ERR(fdr);
> + if (IS_ERR(fdr))
> + goto free_req;
> +
> + err = -ENOMEM;
> +
> + fdvm = kzalloc(sizeof(*fdvm), GFP_KERNEL);
> + if (!fdvm) {
> + fuse_dmmap_region_put(fc, fdr);
> + goto free_req;
> + }
> +
> + atomic_set(&fdvm->open_count, 1);
> + fdvm->region = fdr;
> + fdvm->len = inarg.len;
> + fdvm->off = inarg.offset;
> +
> + fdr->filp->f_op->mmap(fdr->filp, vma);
> +
> + memcpy(&fdr->vm_ops, vma->vm_ops, sizeof(fdr->vm_ops));
> + fdr->vm_ops.open = fuse_dmmap_vm_ops.open;
> + fdr->vm_ops.close = fuse_dmmap_vm_ops.close;
> + fdr->vm_ops.fault = fuse_dmmap_vm_ops.fault;
> +
> + fdr->vm_original_ops = vma->vm_ops;
> +
> + vma->vm_ops = &fdr->vm_ops;
> +
> + vma->vm_private_data = fdvm;
> + vma->vm_flags |= VM_DONTEXPAND; /* disallow expansion for now */
> + err = 0;
> +
> +free_req:
> + fuse_put_request(fc, req);
> + return err;
> +}
> +
> +static int fuse_notify_store_to_dmmap(struct fuse_conn *fc,
> + struct fuse_copy_state *cs,
> + u64 nodeid, u32 size, u64 pos)
> +{
> + struct fuse_dmmap_region *fdr;
> + struct file *filp;
> + pgoff_t index;
> + unsigned int off;
> + int err;
> +
> + fdr = fuse_dmmap_find(fc, nodeid);
> + if (!fdr)
> + return -ENOENT;
> +
> + index = pos >> PAGE_SHIFT;
> + off = pos & ~PAGE_MASK;
> + if (pos > fdr->size)
> + size = 0;
> + else if (size > fdr->size - pos)
> + size = fdr->size - pos;
> +
> + filp = fdr->filp;
> +
> + while (size) {
> + struct page *page;
> + unsigned int this_num;
> +
> + page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
> + index, GFP_HIGHUSER);
> + if (IS_ERR(page)) {
> +
> + err = -ENOMEM;
> + goto out_iput;
> + }
> +
> + this_num = min_t(unsigned, size, PAGE_SIZE - off);
> + err = fuse_copy_page(cs, &page, off, this_num, 0);
> +
> + unlock_page(page);
> + page_cache_release(page);
> +
> + if (err)
> + goto out_iput;
> +
> + size -= this_num;
> + off = 0;
> + index++;
> + }
> +
> + err = 0;
> +
> +out_iput:
> + fuse_dmmap_region_put(fc, fdr);
> +
> + return err;
> +}
> +
> +static void fuse_retrieve_dmmap_end(struct fuse_conn *fc, struct fuse_req *req)
> +{
> + release_pages(req->pages, req->num_pages, 0);
> +}
> +
> +static int fuse_notify_retrieve_from_dmmap(struct fuse_conn *fc,
> + struct fuse_notify_retrieve_out *outarg)
> +{
> + struct fuse_dmmap_region *fdr;
> + struct fuse_req *req;
> + struct page *page;
> + struct file *filp;
> + pgoff_t index;
> + unsigned int num;
> + unsigned int offset;
> + unsigned int npages;
> + unsigned int this_num;
> + size_t total_len = 0;
> + int err;
> +
> + fdr = fuse_dmmap_find(fc, outarg->nodeid);
> + if (!fdr)
> + return -ENOENT;
> +
> + npages = outarg->size >> PAGE_SHIFT;
> + if (outarg->size & ~PAGE_MASK)
> + npages++;
> +
> + req = fuse_get_req(fc, npages);
> + err = PTR_ERR(req);
> + if (IS_ERR(req))
> + goto out_put_region;
> +
> + offset = outarg->offset & ~PAGE_MASK;
> +
> + req->in.h.opcode = FUSE_NOTIFY_REPLY;
> + req->in.h.nodeid = outarg->nodeid;
> + req->in.numargs = 2;
> + req->in.argpages = 1;
> + req->end = fuse_retrieve_dmmap_end;
> +
> + index = outarg->offset >> PAGE_SHIFT;
> + num = outarg->size;
> + if (outarg->offset > fdr->size)
> + num = 0;
> + else if (outarg->offset + num > fdr->size)
> + num = fdr->size - outarg->offset;
> +
> + filp = fdr->filp;
> +
> + npages = 0;
> + while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
> +
> + page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
> + index,
> + GFP_KERNEL);
> + if (IS_ERR(page)) {
> + err = -ENOMEM;
> + goto out_put_region;
> + }
> +
> + this_num = min_t(unsigned, num, PAGE_SIZE - offset);
> + req->pages[req->num_pages] = page;
> + req->page_descs[req->num_pages].length = this_num;
> + req->num_pages++;
> +
> + num -= this_num;
> + total_len += this_num;
> + index++;
> + npages++;
> + }
> + req->misc.retrieve_in.offset = outarg->offset;
> + req->misc.retrieve_in.size = total_len;
> + req->in.args[0].size = sizeof(req->misc.retrieve_in);
> + req->in.args[0].value = &req->misc.retrieve_in;
> + req->in.args[1].size = total_len;
> +
> + err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
> + if (err)
> + fuse_retrieve_dmmap_end(fc, req);
> +
> +out_put_region:
> + fuse_dmmap_region_put(fc, fdr);
> +
> + return err;
> +}
> +
> +
> static const struct file_operations cuse_frontend_fops = {
> .owner = THIS_MODULE,
> .read_iter = cuse_read_iter,
> @@ -184,7 +622,8 @@ static const struct file_operations cuse_frontend_fops = {
> .unlocked_ioctl = cuse_file_ioctl,
> .compat_ioctl = cuse_file_compat_ioctl,
> .poll = fuse_file_poll,
> - .llseek = noop_llseek,
> + .llseek = noop_llseek,
> + .mmap = cuse_mmap,
> };
>
>
> @@ -468,10 +907,26 @@ err:
>
> static void cuse_fc_release(struct fuse_conn *fc)
> {
> + struct fuse_dmmap_region *fdr;
> struct cuse_conn *cc = fc_to_cc(fc);
> +
> + spin_lock(&fc->lock);
> + while (!list_empty(&fc->dmmap_list)) {
> +
> + fdr = list_entry(fc->dmmap_list.next, typeof(*fdr), list);
> + fuse_dmmap_region_put(fc, fdr);
> + }
> + spin_unlock(&fc->lock);
> +
> kfree_rcu(cc, fc.rcu);
> }
>
> +static const struct fuse_conn_operations cuse_ops = {
> + .release = cuse_fc_release,
> + .notify_store = fuse_notify_store_to_dmmap,
> + .notify_retrieve = fuse_notify_retrieve_from_dmmap,
> +};
> +
> /**
> * cuse_channel_open - open method for /dev/cuse
> * @inode: inode for /dev/cuse
> @@ -507,7 +962,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
> }
>
> INIT_LIST_HEAD(&cc->list);
> - cc->fc.release = cuse_fc_release;
> + cc->fc.ops = &cuse_ops;
>
> cc->fc.initialized = 1;
> rc = cuse_send_init(cc);
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 80cc1b3..0faf92c 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -279,6 +279,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
> __clear_bit(FR_BACKGROUND, &req->flags);
> return req;
> }
> +EXPORT_SYMBOL_GPL(fuse_get_req_nofail_nopages);
>
> void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
> {
> @@ -617,8 +618,8 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
> }
> EXPORT_SYMBOL_GPL(fuse_request_send_background);
>
> -static int fuse_request_send_notify_reply(struct fuse_conn *fc,
> - struct fuse_req *req, u64 unique)
> +int fuse_request_send_notify_reply(struct fuse_conn *fc,
> + struct fuse_req *req, u64 unique)
> {
> int err = -ENODEV;
> struct fuse_iqueue *fiq = &fc->iq;
> @@ -674,6 +675,7 @@ static int lock_request(struct fuse_req *req)
> }
> return err;
> }
> +EXPORT_SYMBOL_GPL(fuse_request_send_notify_reply);
>
> /*
> * Unlock request. If it was aborted while locked, caller is responsible
> @@ -967,8 +969,8 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
> * Copy a page in the request to/from the userspace buffer. Must be
> * done atomically
> */
> -static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> - unsigned offset, unsigned count, int zeroing)
> +int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> + unsigned offset, unsigned count, int zeroing)
> {
> int err;
> struct page *page = *pagep;
> @@ -1003,6 +1005,7 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> flush_dcache_page(page);
> return 0;
> }
> +EXPORT_SYMBOL_GPL(fuse_copy_page);
>
> /* Copy pages in the request to/from userspace buffer */
> static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
> @@ -1597,15 +1600,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
> struct fuse_copy_state *cs)
> {
> struct fuse_notify_store_out outarg;
> - struct inode *inode;
> - struct address_space *mapping;
> - u64 nodeid;
> int err;
> - pgoff_t index;
> - unsigned int offset;
> - unsigned int num;
> - loff_t file_size;
> - loff_t end;
>
> err = -EINVAL;
> if (size < sizeof(outarg))
> @@ -1619,145 +1614,18 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
> if (size - sizeof(outarg) != outarg.size)
> goto out_finish;
>
> - nodeid = outarg.nodeid;
> + err = fc->ops->notify_store(fc, cs, outarg.nodeid, outarg.size,
> + outarg.offset);
>
> - down_read(&fc->killsb);
> -
> - err = -ENOENT;
> - if (!fc->sb)
> - goto out_up_killsb;
> -
> - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> - if (!inode)
> - goto out_up_killsb;
> -
> - mapping = inode->i_mapping;
> - index = outarg.offset >> PAGE_CACHE_SHIFT;
> - offset = outarg.offset & ~PAGE_CACHE_MASK;
> - file_size = i_size_read(inode);
> - end = outarg.offset + outarg.size;
> - if (end > file_size) {
> - file_size = end;
> - fuse_write_update_size(inode, file_size);
> - }
> -
> - num = outarg.size;
> - while (num) {
> - struct page *page;
> - unsigned int this_num;
> -
> - err = -ENOMEM;
> - page = find_or_create_page(mapping, index,
> - mapping_gfp_mask(mapping));
> - if (!page)
> - goto out_iput;
> -
> - this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
> - err = fuse_copy_page(cs, &page, offset, this_num, 0);
> - if (!err && offset == 0 &&
> - (this_num == PAGE_CACHE_SIZE || file_size == end))
> - SetPageUptodate(page);
> - unlock_page(page);
> - page_cache_release(page);
> -
> - if (err)
> - goto out_iput;
> -
> - num -= this_num;
> - offset = 0;
> - index++;
> - }
> -
> - err = 0;
> -
> -out_iput:
> - iput(inode);
> -out_up_killsb:
> - up_read(&fc->killsb);
> out_finish:
> fuse_copy_finish(cs);
> return err;
> }
>
> -static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
> -{
> - release_pages(req->pages, req->num_pages, false);
> -}
> -
> -static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
> - struct fuse_notify_retrieve_out *outarg)
> -{
> - int err;
> - struct address_space *mapping = inode->i_mapping;
> - struct fuse_req *req;
> - pgoff_t index;
> - loff_t file_size;
> - unsigned int num;
> - unsigned int offset;
> - size_t total_len = 0;
> - int num_pages;
> -
> - offset = outarg->offset & ~PAGE_CACHE_MASK;
> - file_size = i_size_read(inode);
> -
> - num = outarg->size;
> - if (outarg->offset > file_size)
> - num = 0;
> - else if (outarg->offset + num > file_size)
> - num = file_size - outarg->offset;
> -
> - num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
> - num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
> -
> - req = fuse_get_req(fc, num_pages);
> - if (IS_ERR(req))
> - return PTR_ERR(req);
> -
> - req->in.h.opcode = FUSE_NOTIFY_REPLY;
> - req->in.h.nodeid = outarg->nodeid;
> - req->in.numargs = 2;
> - req->in.argpages = 1;
> - req->page_descs[0].offset = offset;
> - req->end = fuse_retrieve_end;
> -
> - index = outarg->offset >> PAGE_CACHE_SHIFT;
> -
> - while (num && req->num_pages < num_pages) {
> - struct page *page;
> - unsigned int this_num;
> -
> - page = find_get_page(mapping, index);
> - if (!page)
> - break;
> -
> - this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
> - req->pages[req->num_pages] = page;
> - req->page_descs[req->num_pages].length = this_num;
> - req->num_pages++;
> -
> - offset = 0;
> - num -= this_num;
> - total_len += this_num;
> - index++;
> - }
> - req->misc.retrieve_in.offset = outarg->offset;
> - req->misc.retrieve_in.size = total_len;
> - req->in.args[0].size = sizeof(req->misc.retrieve_in);
> - req->in.args[0].value = &req->misc.retrieve_in;
> - req->in.args[1].size = total_len;
> -
> - err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
> - if (err)
> - fuse_retrieve_end(fc, req);
> -
> - return err;
> -}
> -
> static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
> struct fuse_copy_state *cs)
> {
> struct fuse_notify_retrieve_out outarg;
> - struct inode *inode;
> int err;
>
> err = -EINVAL;
> @@ -1770,18 +1638,7 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
>
> fuse_copy_finish(cs);
>
> - down_read(&fc->killsb);
> - err = -ENOENT;
> - if (fc->sb) {
> - u64 nodeid = outarg.nodeid;
> -
> - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> - if (inode) {
> - err = fuse_retrieve(fc, inode, &outarg);
> - iput(inode);
> - }
> - }
> - up_read(&fc->killsb);
> + err = fc->ops->notify_retrieve(fc, &outarg);
>
> return err;
>
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 4051131..a56222b 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -337,6 +337,7 @@ struct fuse_req {
> struct fuse_req *next;
> } write;
> struct fuse_notify_retrieve_in retrieve_in;
> + struct fuse_munmap_in munmap_in;
> } misc;
>
> /** page vector */
> @@ -431,6 +432,21 @@ struct fuse_dev {
> struct list_head entry;
> };
>
> +struct fuse_copy_state;
> +
> +struct fuse_conn_operations {
> + /** Called on final put */
> + void (*release)(struct fuse_conn *);
> +
> + /** Called to store data into a mapping */
> + int (*notify_store)(struct fuse_conn *, struct fuse_copy_state *,
> + u64 nodeid, u32 size, u64 pos);
> +
> + /** Called to retrieve data from a mapping */
> + int (*notify_retrieve)(struct fuse_conn *,
> + struct fuse_notify_retrieve_out *);
> +};
> +
> /**
> * A Fuse connection.
> *
> @@ -578,6 +594,9 @@ struct fuse_conn {
> /** Is poll not implemented by fs? */
> unsigned no_poll:1;
>
> + /** Is direct mmap not implemente by fs? */
> + unsigned no_dmmap:1;
> +
> /** Do multi-page cached writes */
> unsigned big_writes:1;
>
> @@ -635,9 +654,6 @@ struct fuse_conn {
> /** Version counter for attribute changes */
> u64 attr_version;
>
> - /** Called on final put */
> - void (*release)(struct fuse_conn *);
> -
> /** Super block for this connection. */
> struct super_block *sb;
>
> @@ -646,6 +662,12 @@ struct fuse_conn {
>
> /** List of device instances belonging to this connection */
> struct list_head devices;
> +
> + /** List of direct mmaps (currently CUSE only) */
> + struct list_head dmmap_list;
> +
> + /** Operations that fuse and cuse can implement differently */
> + const struct fuse_conn_operations *ops;
> };
>
> static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
> @@ -944,4 +966,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
>
> void fuse_set_initialized(struct fuse_conn *fc);
>
> +int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> + unsigned offset, unsigned count, int zeroing);
> +
> +int fuse_request_send_notify_reply(struct fuse_conn *fc,
> + struct fuse_req *req, u64 unique);
> +
> #endif /* _FS_FUSE_I_H */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index ac81f48..5284b84 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -609,6 +609,7 @@ void fuse_conn_init(struct fuse_conn *fc)
> fc->connected = 1;
> fc->attr_version = 1;
> get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
> + INIT_LIST_HEAD(&fc->dmmap_list);
> }
> EXPORT_SYMBOL_GPL(fuse_conn_init);
>
> @@ -617,7 +618,7 @@ void fuse_conn_put(struct fuse_conn *fc)
> if (atomic_dec_and_test(&fc->count)) {
> if (fc->destroy_req)
> fuse_request_free(fc->destroy_req);
> - fc->release(fc);
> + fc->ops->release(fc);
> }
> }
> EXPORT_SYMBOL_GPL(fuse_conn_put);
> @@ -1025,6 +1026,167 @@ void fuse_dev_free(struct fuse_dev *fud)
> }
> EXPORT_SYMBOL_GPL(fuse_dev_free);
>
> +static int fuse_notify_store_to_inode(struct fuse_conn *fc,
> + struct fuse_copy_state *cs,
> + u64 nodeid, u32 size, u64 pos)
> +{
> + struct inode *inode;
> + struct address_space *mapping;
> + pgoff_t index;
> + unsigned int off;
> + loff_t file_size;
> + loff_t end;
> + int err;
> +
> + down_read(&fc->killsb);
> +
> + err = -ENOENT;
> + if (!fc->sb)
> + goto out_up_killsb;
> +
> + inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> + if (!inode)
> + goto out_up_killsb;
> +
> + mapping = inode->i_mapping;
> + index = pos >> PAGE_CACHE_SHIFT;
> + off = pos & ~PAGE_CACHE_MASK;
> + file_size = i_size_read(inode);
> + end = pos + size;
> + if (end > file_size) {
> + file_size = end;
> + fuse_write_update_size(inode, file_size);
> + }
> +
> + while (size) {
> + struct page *page;
> + unsigned int this_num;
> +
> + err = -ENOMEM;
> + page = find_or_create_page(mapping, index,
> + mapping_gfp_mask(mapping));
> + if (!page)
> + goto out_iput;
> +
> + this_num = min_t(unsigned, size, PAGE_CACHE_SIZE - off);
> + err = fuse_copy_page(cs, &page, off, this_num, 0);
> + if (!err && off == 0 && (size != 0 || file_size == end))
> + SetPageUptodate(page);
> + unlock_page(page);
> + page_cache_release(page);
> +
> + if (err)
> + goto out_iput;
> +
> + size -= this_num;
> + off = 0;
> + index++;
> + }
> +
> + err = 0;
> +
> +out_iput:
> + iput(inode);
> +out_up_killsb:
> + up_read(&fc->killsb);
> +
> + return err;
> +}
> +
> +static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
> +{
> + release_pages(req->pages, req->num_pages, 0);
> +}
> +
> +static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
> + struct fuse_notify_retrieve_out *outarg)
> +{
> + int err;
> + struct address_space *mapping = inode->i_mapping;
> + struct fuse_req *req;
> + pgoff_t index;
> + loff_t file_size;
> + unsigned int num;
> + unsigned int offset;
> + size_t total_len = 0;
> +
> + req = fuse_get_req(fc, 0);
> + if (IS_ERR(req))
> + return PTR_ERR(req);
> +
> + offset = outarg->offset & ~PAGE_CACHE_MASK;
> +
> + req->in.h.opcode = FUSE_NOTIFY_REPLY;
> + req->in.h.nodeid = outarg->nodeid;
> + req->in.numargs = 2;
> + req->in.argpages = 1;
> + req->end = fuse_retrieve_end;
> +
> + index = outarg->offset >> PAGE_CACHE_SHIFT;
> + file_size = i_size_read(inode);
> + num = outarg->size;
> + if (outarg->offset > file_size)
> + num = 0;
> + else if (outarg->offset + num > file_size)
> + num = file_size - outarg->offset;
> +
> + while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
> + struct page *page;
> + unsigned int this_num;
> +
> + page = find_get_page(mapping, index);
> + if (!page)
> + break;
> +
> + this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
> + req->pages[req->num_pages] = page;
> + req->num_pages++;
> +
> + num -= this_num;
> + total_len += this_num;
> + index++;
> + }
> + req->misc.retrieve_in.offset = outarg->offset;
> + req->misc.retrieve_in.size = total_len;
> + req->in.args[0].size = sizeof(req->misc.retrieve_in);
> + req->in.args[0].value = &req->misc.retrieve_in;
> + req->in.args[1].size = total_len;
> +
> + err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
> + if (err)
> + fuse_retrieve_end(fc, req);
> +
> + return err;
> +}
> +
> +static int fuse_notify_retrieve_from_inode(struct fuse_conn *fc,
> + struct fuse_notify_retrieve_out *outarg)
> +{
> + struct inode *inode;
> + int err;
> +
> + down_read(&fc->killsb);
> + err = -ENOENT;
> + if (fc->sb) {
> + u64 nodeid = outarg->nodeid;
> +
> + inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
> + if (inode) {
> + err = fuse_retrieve(fc, inode, outarg);
> + iput(inode);
> + }
> + }
> + up_read(&fc->killsb);
> +
> + return err;
> +}
> +
> +static const struct fuse_conn_operations fuse_default_ops = {
> + .release = fuse_free_conn,
> + .notify_store = fuse_notify_store_to_inode,
> + .notify_retrieve = fuse_notify_retrieve_from_inode,
> +};
> +
> static int fuse_fill_super(struct super_block *sb, void *data, int silent)
> {
> struct fuse_dev *fud;
> @@ -1077,7 +1239,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
> goto err_fput;
>
> fuse_conn_init(fc);
> - fc->release = fuse_free_conn;
> + fc->ops = &fuse_default_ops;
>
> fud = fuse_dev_alloc(fc);
> if (!fud)
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index c9aca04..3f4c54b 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -102,6 +102,7 @@
> * - add ctime and ctimensec to fuse_setattr_in
> * - add FUSE_RENAME2 request
> * - add FUSE_NO_OPEN_SUPPORT flag
> + * - add FUSE_MMAP and FUSE_MUNMAP
> */
>
> #ifndef _LINUX_FUSE_H
> @@ -358,6 +359,8 @@ enum fuse_opcode {
> FUSE_FALLOCATE = 43,
> FUSE_READDIRPLUS = 44,
> FUSE_RENAME2 = 45,
> + FUSE_MMAP = 46,
> + FUSE_MUNMAP = 47,
>
> /* CUSE specific operations */
> CUSE_INIT = 4096,
> @@ -670,6 +673,29 @@ struct fuse_fallocate_in {
> uint32_t padding;
> };
>
> +struct fuse_mmap_in {
> + __u64 fh;
> + __u64 addr;
> + __u64 len;
> + __u32 prot;
> + __u32 flags;
> + __u64 offset;
> +};
> +
> +struct fuse_mmap_out {
> + __u64 mapid; /* Mmap ID, same namespace as Inode ID */
> + __u64 size; /* Size of memory region */
> + __u64 reserved;
> +};
> +
> +struct fuse_munmap_in {
> + __u64 fh;
> + __u64 mapid;
> + __u64 size; /* Size of memory region */
> + __u64 offset;
> + __u64 reserved;
> +};
> +
> struct fuse_in_header {
> uint32_t len;
> uint32_t opcode;
>
Hi Jader,
[auto build test WARNING on v4.4-rc5]
[cannot apply to next-20151216]
url: https://github.com/0day-ci/linux/commits/Jader-H-Silva/fuse-implement-cuse-mmap/20151217-034148
config: i386-tinyconfig (attached as .config)
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
All warnings (new ones prefixed by >>):
>> ./usr/include/linux/fuse.h:673: found __[us]{8,16,32,64} type without #include <linux/types.h>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation