Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S966489AbbLPTjA (ORCPT ); Wed, 16 Dec 2015 14:39:00 -0500 Received: from mail-qg0-f48.google.com ([209.85.192.48]:35739 "EHLO mail-qg0-f48.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933202AbbLPTi6 (ORCPT ); Wed, 16 Dec 2015 14:38:58 -0500 From: "Jader H. Silva" Subject: [PATCH] fuse: implement cuse mmap To: Miklos Szeredi , Andrew Morton , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, Luca Risolia Message-ID: <5671BE21.4080800@gmail.com> Date: Wed, 16 Dec 2015 17:40:17 -0200 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.4.0 MIME-Version: 1.0 Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 27032 Lines: 1083 Implement cuse mmap using shmem to provide the actual memory maps. Pages must be read/written using fuse's NOTIFY_RETRIEVE and NOTIFY_STORE api. Signed-off-by: Jader H. Silva --- fs/fuse/cuse.c | 459 +++++++++++++++++++++++++++++++++++++++++++++- fs/fuse/dev.c | 163 +--------------- fs/fuse/fuse_i.h | 34 +++- fs/fuse/inode.c | 166 ++++++++++++++++- include/uapi/linux/fuse.h | 26 +++ 5 files changed, 688 insertions(+), 160 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index eae2c11..7749c13 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -48,6 +48,9 @@ #include #include #include +#include +#include +#include #include "fuse_i.h" @@ -175,6 +178,441 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd, return fuse_do_ioctl(file, cmd, arg, flags); } +struct fuse_dmmap_region { + u64 mapid; + u64 size; + struct file *filp; + struct vm_operations_struct vm_ops; + const struct vm_operations_struct *vm_original_ops; + struct list_head list; + atomic_t ref; +}; + +/* + * fuse_dmmap_vm represents the result of a single mmap() call, which + * can be shared by multiple client vmas created by forking. + */ +struct fuse_dmmap_vm { + u64 len; + u64 off; + atomic_t open_count; + struct fuse_dmmap_region *region; +}; + +static void fuse_dmmap_region_put(struct fuse_conn *fc, + struct fuse_dmmap_region *fdr) +{ + if (atomic_dec_and_lock(&fdr->ref, &fc->lock)) { + + list_del(&fdr->list); + + spin_unlock(&fc->lock); + + fput(fdr->filp); + kfree(fdr); + } +} + +static void fuse_dmmap_vm_open(struct vm_area_struct *vma) +{ + struct fuse_dmmap_vm *fdvm = vma->vm_private_data; + struct fuse_dmmap_region *fdr = fdvm->region; + + /* vma copied */ + atomic_inc(&fdvm->open_count); + + if (fdr->vm_original_ops->open) + fdr->vm_original_ops->open(vma); +} + +static void fuse_dmmap_vm_close(struct vm_area_struct *vma) +{ + struct fuse_dmmap_vm *fdvm = vma->vm_private_data; + struct fuse_dmmap_region *fdr = fdvm->region; + struct fuse_file *ff = vma->vm_file->private_data; + struct fuse_conn *fc = ff->fc; + struct fuse_req *req; + struct fuse_munmap_in *inarg; + + if (fdr->vm_original_ops->close) + fdr->vm_original_ops->close(vma); + + if (!atomic_dec_and_test(&fdvm->open_count)) + return; + + /* + * Notify server that the mmap region has been unmapped. + * Failing this might lead to resource leak in server, don't + * fail. + */ + req = fuse_get_req_nofail_nopages(fc, vma->vm_file); + inarg = &req->misc.munmap_in; + + inarg->fh = ff->fh; + inarg->mapid = fdvm->region->mapid; + inarg->size = fdvm->len; + inarg->offset = fdvm->off; + + req->in.h.opcode = FUSE_MUNMAP; + req->in.h.nodeid = ff->nodeid; + req->in.numargs = 1; + req->in.args[0].size = sizeof(*inarg); + req->in.args[0].value = inarg; + + fuse_request_send(fc, req); + fuse_put_request(fc, req); + fuse_dmmap_region_put(fc, fdvm->region); + kfree(fdvm); +} + +static int fuse_dmmap_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + int ret; + struct file *filp = vma->vm_file; + struct fuse_dmmap_vm *fdvm = vma->vm_private_data; + struct fuse_dmmap_region *fdr = fdvm->region; + + vma->vm_file = fdr->filp; + ret = fdr->vm_original_ops->fault(vma, vmf); + + vma->vm_file = filp; + + return ret; +} + +static const struct vm_operations_struct fuse_dmmap_vm_ops = { + .open = fuse_dmmap_vm_open, + .close = fuse_dmmap_vm_close, + .fault = fuse_dmmap_vm_fault, +}; + +static struct fuse_dmmap_region *fuse_dmmap_find_locked(struct fuse_conn *fc, + u64 mapid) +{ + struct fuse_dmmap_region *curr; + struct fuse_dmmap_region *fdr = NULL; + + list_for_each_entry(curr, &fc->dmmap_list, list) { + if (curr->mapid == mapid) { + fdr = curr; + atomic_inc(&fdr->ref); + break; + } + } + + return fdr; +} + +static struct fuse_dmmap_region *fuse_dmmap_find(struct fuse_conn *fc, + u64 mapid) +{ + struct fuse_dmmap_region *fdr; + + spin_lock(&fc->lock); + fdr = fuse_dmmap_find_locked(fc, mapid); + spin_unlock(&fc->lock); + + return fdr; +} + +static struct fuse_dmmap_region *fuse_dmmap_get(struct fuse_conn *fc, + struct file *file, u64 mapid, + u64 size, unsigned long flags) +{ + struct fuse_dmmap_region *fdr; + char *pathbuf, *filepath; + struct file *shmem_file; + + fdr = fuse_dmmap_find(fc, mapid); + if (!fdr) { + struct fuse_dmmap_region *tmp; + + fdr = kzalloc(sizeof(struct fuse_dmmap_region), GFP_KERNEL); + if (!fdr) + return ERR_PTR(-ENOMEM); + + atomic_set(&fdr->ref, 1); + + pathbuf = kzalloc(PATH_MAX+1, GFP_KERNEL); + if (!pathbuf) { + kfree(fdr); + return ERR_PTR(-ENOMEM); + } + + filepath = d_path(&file->f_path, pathbuf, PATH_MAX+1); + if (IS_ERR(filepath)) { + kfree(fdr); + kfree(pathbuf); + return (struct fuse_dmmap_region *) filepath; + } + + fdr->mapid = mapid; + shmem_file = shmem_file_setup(filepath, size, flags); + kfree(pathbuf); + + if (IS_ERR(shmem_file)) { + kfree(fdr); + return (struct fuse_dmmap_region *) shmem_file; + } + + fdr->filp = shmem_file; + + spin_lock(&fc->lock); + tmp = fuse_dmmap_find_locked(fc, mapid); + if (tmp) { + fput(fdr->filp); + kfree(fdr); + fdr = tmp; + } else { + INIT_LIST_HEAD(&fdr->list); + list_add(&fdr->list, &fc->dmmap_list); + } + spin_unlock(&fc->lock); + } + + if (size > fdr->size) { + + fdr->filp->f_op->fallocate(fdr->filp, 0, 0, size); + fdr->size = size; + } + + return fdr; +} + +static int cuse_mmap(struct file *file, struct vm_area_struct *vma) +{ + int err; + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; + struct fuse_dmmap_vm *fdvm; + struct fuse_dmmap_region *fdr; + struct fuse_req *req = NULL; + struct fuse_mmap_in inarg; + struct fuse_mmap_out outarg; + + if (fc->no_dmmap) + return -ENOSYS; + + req = fuse_get_req(fc, 0); + if (IS_ERR(req)) + return PTR_ERR(req); + + /* ask server whether this mmap is okay and what the size should be */ + memset(&inarg, 0, sizeof(inarg)); + inarg.fh = ff->fh; + inarg.addr = vma->vm_start; + inarg.len = vma->vm_end - vma->vm_start; + inarg.prot = ((vma->vm_flags & VM_READ) ? PROT_READ : 0) | + ((vma->vm_flags & VM_WRITE) ? PROT_WRITE : 0) | + ((vma->vm_flags & VM_EXEC) ? PROT_EXEC : 0); + inarg.flags = ((vma->vm_flags & VM_SHARED) ? MAP_SHARED : 0 ) | + ((vma->vm_flags & VM_GROWSDOWN) ? MAP_GROWSDOWN : 0) | + ((vma->vm_flags & VM_DENYWRITE) ? MAP_DENYWRITE : 0) | + ((vma->vm_flags & VM_EXEC) ? MAP_EXECUTABLE : 0) | + ((vma->vm_flags & VM_LOCKED) ? MAP_LOCKED : 0); + inarg.offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; + + req->in.h.opcode = FUSE_MMAP; + req->in.h.nodeid = ff->nodeid; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->out.numargs = 1; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + + fuse_request_send(fc, req); + err = req->out.h.error; + if (err) { + if (err == -ENOSYS) + fc->no_dmmap = 1; + goto free_req; + } + + fdr = fuse_dmmap_get(fc, file, outarg.mapid, outarg.size, + vma->vm_flags); + err = PTR_ERR(fdr); + if (IS_ERR(fdr)) + goto free_req; + + err = -ENOMEM; + + fdvm = kzalloc(sizeof(*fdvm), GFP_KERNEL); + if (!fdvm) { + fuse_dmmap_region_put(fc, fdr); + goto free_req; + } + + atomic_set(&fdvm->open_count, 1); + fdvm->region = fdr; + fdvm->len = inarg.len; + fdvm->off = inarg.offset; + + fdr->filp->f_op->mmap(fdr->filp, vma); + + memcpy(&fdr->vm_ops, vma->vm_ops, sizeof(fdr->vm_ops)); + fdr->vm_ops.open = fuse_dmmap_vm_ops.open; + fdr->vm_ops.close = fuse_dmmap_vm_ops.close; + fdr->vm_ops.fault = fuse_dmmap_vm_ops.fault; + + fdr->vm_original_ops = vma->vm_ops; + + vma->vm_ops = &fdr->vm_ops; + + vma->vm_private_data = fdvm; + vma->vm_flags |= VM_DONTEXPAND; /* disallow expansion for now */ + err = 0; + +free_req: + fuse_put_request(fc, req); + return err; +} + +static int fuse_notify_store_to_dmmap(struct fuse_conn *fc, + struct fuse_copy_state *cs, + u64 nodeid, u32 size, u64 pos) +{ + struct fuse_dmmap_region *fdr; + struct file *filp; + pgoff_t index; + unsigned int off; + int err; + + fdr = fuse_dmmap_find(fc, nodeid); + if (!fdr) + return -ENOENT; + + index = pos >> PAGE_SHIFT; + off = pos & ~PAGE_MASK; + if (pos > fdr->size) + size = 0; + else if (size > fdr->size - pos) + size = fdr->size - pos; + + filp = fdr->filp; + + while (size) { + struct page *page; + unsigned int this_num; + + page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping, + index, GFP_HIGHUSER); + if (IS_ERR(page)) { + + err = -ENOMEM; + goto out_iput; + } + + this_num = min_t(unsigned, size, PAGE_SIZE - off); + err = fuse_copy_page(cs, &page, off, this_num, 0); + + unlock_page(page); + page_cache_release(page); + + if (err) + goto out_iput; + + size -= this_num; + off = 0; + index++; + } + + err = 0; + +out_iput: + fuse_dmmap_region_put(fc, fdr); + + return err; +} + +static void fuse_retrieve_dmmap_end(struct fuse_conn *fc, struct fuse_req *req) +{ + release_pages(req->pages, req->num_pages, 0); +} + +static int fuse_notify_retrieve_from_dmmap(struct fuse_conn *fc, + struct fuse_notify_retrieve_out *outarg) +{ + struct fuse_dmmap_region *fdr; + struct fuse_req *req; + struct page *page; + struct file *filp; + pgoff_t index; + unsigned int num; + unsigned int offset; + unsigned int npages; + unsigned int this_num; + size_t total_len = 0; + int err; + + fdr = fuse_dmmap_find(fc, outarg->nodeid); + if (!fdr) + return -ENOENT; + + npages = outarg->size >> PAGE_SHIFT; + if (outarg->size & ~PAGE_MASK) + npages++; + + req = fuse_get_req(fc, npages); + err = PTR_ERR(req); + if (IS_ERR(req)) + goto out_put_region; + + offset = outarg->offset & ~PAGE_MASK; + + req->in.h.opcode = FUSE_NOTIFY_REPLY; + req->in.h.nodeid = outarg->nodeid; + req->in.numargs = 2; + req->in.argpages = 1; + req->end = fuse_retrieve_dmmap_end; + + index = outarg->offset >> PAGE_SHIFT; + num = outarg->size; + if (outarg->offset > fdr->size) + num = 0; + else if (outarg->offset + num > fdr->size) + num = fdr->size - outarg->offset; + + filp = fdr->filp; + + npages = 0; + while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { + + page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping, + index, + GFP_KERNEL); + if (IS_ERR(page)) { + err = -ENOMEM; + goto out_put_region; + } + + this_num = min_t(unsigned, num, PAGE_SIZE - offset); + req->pages[req->num_pages] = page; + req->page_descs[req->num_pages].length = this_num; + req->num_pages++; + + num -= this_num; + total_len += this_num; + index++; + npages++; + } + req->misc.retrieve_in.offset = outarg->offset; + req->misc.retrieve_in.size = total_len; + req->in.args[0].size = sizeof(req->misc.retrieve_in); + req->in.args[0].value = &req->misc.retrieve_in; + req->in.args[1].size = total_len; + + err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); + if (err) + fuse_retrieve_dmmap_end(fc, req); + +out_put_region: + fuse_dmmap_region_put(fc, fdr); + + return err; +} + + static const struct file_operations cuse_frontend_fops = { .owner = THIS_MODULE, .read_iter = cuse_read_iter, @@ -184,7 +622,8 @@ static const struct file_operations cuse_frontend_fops = { .unlocked_ioctl = cuse_file_ioctl, .compat_ioctl = cuse_file_compat_ioctl, .poll = fuse_file_poll, - .llseek = noop_llseek, + .llseek = noop_llseek, + .mmap = cuse_mmap, }; @@ -468,10 +907,26 @@ err: static void cuse_fc_release(struct fuse_conn *fc) { + struct fuse_dmmap_region *fdr; struct cuse_conn *cc = fc_to_cc(fc); + + spin_lock(&fc->lock); + while (!list_empty(&fc->dmmap_list)) { + + fdr = list_entry(fc->dmmap_list.next, typeof(*fdr), list); + fuse_dmmap_region_put(fc, fdr); + } + spin_unlock(&fc->lock); + kfree_rcu(cc, fc.rcu); } +static const struct fuse_conn_operations cuse_ops = { + .release = cuse_fc_release, + .notify_store = fuse_notify_store_to_dmmap, + .notify_retrieve = fuse_notify_retrieve_from_dmmap, +}; + /** * cuse_channel_open - open method for /dev/cuse * @inode: inode for /dev/cuse @@ -507,7 +962,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) } INIT_LIST_HEAD(&cc->list); - cc->fc.release = cuse_fc_release; + cc->fc.ops = &cuse_ops; cc->fc.initialized = 1; rc = cuse_send_init(cc); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 80cc1b3..0faf92c 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -279,6 +279,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, __clear_bit(FR_BACKGROUND, &req->flags); return req; } +EXPORT_SYMBOL_GPL(fuse_get_req_nofail_nopages); void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) { @@ -617,8 +618,8 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) } EXPORT_SYMBOL_GPL(fuse_request_send_background); -static int fuse_request_send_notify_reply(struct fuse_conn *fc, - struct fuse_req *req, u64 unique) +int fuse_request_send_notify_reply(struct fuse_conn *fc, + struct fuse_req *req, u64 unique) { int err = -ENODEV; struct fuse_iqueue *fiq = &fc->iq; @@ -674,6 +675,7 @@ static int lock_request(struct fuse_req *req) } return err; } +EXPORT_SYMBOL_GPL(fuse_request_send_notify_reply); /* * Unlock request. If it was aborted while locked, caller is responsible @@ -967,8 +969,8 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, * Copy a page in the request to/from the userspace buffer. Must be * done atomically */ -static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, - unsigned offset, unsigned count, int zeroing) +int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, + unsigned offset, unsigned count, int zeroing) { int err; struct page *page = *pagep; @@ -1003,6 +1005,7 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, flush_dcache_page(page); return 0; } +EXPORT_SYMBOL_GPL(fuse_copy_page); /* Copy pages in the request to/from userspace buffer */ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, @@ -1597,15 +1600,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, struct fuse_copy_state *cs) { struct fuse_notify_store_out outarg; - struct inode *inode; - struct address_space *mapping; - u64 nodeid; int err; - pgoff_t index; - unsigned int offset; - unsigned int num; - loff_t file_size; - loff_t end; err = -EINVAL; if (size < sizeof(outarg)) @@ -1619,145 +1614,18 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, if (size - sizeof(outarg) != outarg.size) goto out_finish; - nodeid = outarg.nodeid; + err = fc->ops->notify_store(fc, cs, outarg.nodeid, outarg.size, + outarg.offset); - down_read(&fc->killsb); - - err = -ENOENT; - if (!fc->sb) - goto out_up_killsb; - - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); - if (!inode) - goto out_up_killsb; - - mapping = inode->i_mapping; - index = outarg.offset >> PAGE_CACHE_SHIFT; - offset = outarg.offset & ~PAGE_CACHE_MASK; - file_size = i_size_read(inode); - end = outarg.offset + outarg.size; - if (end > file_size) { - file_size = end; - fuse_write_update_size(inode, file_size); - } - - num = outarg.size; - while (num) { - struct page *page; - unsigned int this_num; - - err = -ENOMEM; - page = find_or_create_page(mapping, index, - mapping_gfp_mask(mapping)); - if (!page) - goto out_iput; - - this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); - err = fuse_copy_page(cs, &page, offset, this_num, 0); - if (!err && offset == 0 && - (this_num == PAGE_CACHE_SIZE || file_size == end)) - SetPageUptodate(page); - unlock_page(page); - page_cache_release(page); - - if (err) - goto out_iput; - - num -= this_num; - offset = 0; - index++; - } - - err = 0; - -out_iput: - iput(inode); -out_up_killsb: - up_read(&fc->killsb); out_finish: fuse_copy_finish(cs); return err; } -static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) -{ - release_pages(req->pages, req->num_pages, false); -} - -static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, - struct fuse_notify_retrieve_out *outarg) -{ - int err; - struct address_space *mapping = inode->i_mapping; - struct fuse_req *req; - pgoff_t index; - loff_t file_size; - unsigned int num; - unsigned int offset; - size_t total_len = 0; - int num_pages; - - offset = outarg->offset & ~PAGE_CACHE_MASK; - file_size = i_size_read(inode); - - num = outarg->size; - if (outarg->offset > file_size) - num = 0; - else if (outarg->offset + num > file_size) - num = file_size - outarg->offset; - - num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ); - - req = fuse_get_req(fc, num_pages); - if (IS_ERR(req)) - return PTR_ERR(req); - - req->in.h.opcode = FUSE_NOTIFY_REPLY; - req->in.h.nodeid = outarg->nodeid; - req->in.numargs = 2; - req->in.argpages = 1; - req->page_descs[0].offset = offset; - req->end = fuse_retrieve_end; - - index = outarg->offset >> PAGE_CACHE_SHIFT; - - while (num && req->num_pages < num_pages) { - struct page *page; - unsigned int this_num; - - page = find_get_page(mapping, index); - if (!page) - break; - - this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); - req->pages[req->num_pages] = page; - req->page_descs[req->num_pages].length = this_num; - req->num_pages++; - - offset = 0; - num -= this_num; - total_len += this_num; - index++; - } - req->misc.retrieve_in.offset = outarg->offset; - req->misc.retrieve_in.size = total_len; - req->in.args[0].size = sizeof(req->misc.retrieve_in); - req->in.args[0].value = &req->misc.retrieve_in; - req->in.args[1].size = total_len; - - err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); - if (err) - fuse_retrieve_end(fc, req); - - return err; -} - static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, struct fuse_copy_state *cs) { struct fuse_notify_retrieve_out outarg; - struct inode *inode; int err; err = -EINVAL; @@ -1770,18 +1638,7 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, fuse_copy_finish(cs); - down_read(&fc->killsb); - err = -ENOENT; - if (fc->sb) { - u64 nodeid = outarg.nodeid; - - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); - if (inode) { - err = fuse_retrieve(fc, inode, &outarg); - iput(inode); - } - } - up_read(&fc->killsb); + err = fc->ops->notify_retrieve(fc, &outarg); return err; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 4051131..a56222b 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -337,6 +337,7 @@ struct fuse_req { struct fuse_req *next; } write; struct fuse_notify_retrieve_in retrieve_in; + struct fuse_munmap_in munmap_in; } misc; /** page vector */ @@ -431,6 +432,21 @@ struct fuse_dev { struct list_head entry; }; +struct fuse_copy_state; + +struct fuse_conn_operations { + /** Called on final put */ + void (*release)(struct fuse_conn *); + + /** Called to store data into a mapping */ + int (*notify_store)(struct fuse_conn *, struct fuse_copy_state *, + u64 nodeid, u32 size, u64 pos); + + /** Called to retrieve data from a mapping */ + int (*notify_retrieve)(struct fuse_conn *, + struct fuse_notify_retrieve_out *); +}; + /** * A Fuse connection. * @@ -578,6 +594,9 @@ struct fuse_conn { /** Is poll not implemented by fs? */ unsigned no_poll:1; + /** Is direct mmap not implemente by fs? */ + unsigned no_dmmap:1; + /** Do multi-page cached writes */ unsigned big_writes:1; @@ -635,9 +654,6 @@ struct fuse_conn { /** Version counter for attribute changes */ u64 attr_version; - /** Called on final put */ - void (*release)(struct fuse_conn *); - /** Super block for this connection. */ struct super_block *sb; @@ -646,6 +662,12 @@ struct fuse_conn { /** List of device instances belonging to this connection */ struct list_head devices; + + /** List of direct mmaps (currently CUSE only) */ + struct list_head dmmap_list; + + /** Operations that fuse and cuse can implement differently */ + const struct fuse_conn_operations *ops; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -944,4 +966,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, void fuse_set_initialized(struct fuse_conn *fc); +int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, + unsigned offset, unsigned count, int zeroing); + +int fuse_request_send_notify_reply(struct fuse_conn *fc, + struct fuse_req *req, u64 unique); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ac81f48..5284b84 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -609,6 +609,7 @@ void fuse_conn_init(struct fuse_conn *fc) fc->connected = 1; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); + INIT_LIST_HEAD(&fc->dmmap_list); } EXPORT_SYMBOL_GPL(fuse_conn_init); @@ -617,7 +618,7 @@ void fuse_conn_put(struct fuse_conn *fc) if (atomic_dec_and_test(&fc->count)) { if (fc->destroy_req) fuse_request_free(fc->destroy_req); - fc->release(fc); + fc->ops->release(fc); } } EXPORT_SYMBOL_GPL(fuse_conn_put); @@ -1025,6 +1026,167 @@ void fuse_dev_free(struct fuse_dev *fud) } EXPORT_SYMBOL_GPL(fuse_dev_free); +static int fuse_notify_store_to_inode(struct fuse_conn *fc, + struct fuse_copy_state *cs, + u64 nodeid, u32 size, u64 pos) +{ + struct inode *inode; + struct address_space *mapping; + pgoff_t index; + unsigned int off; + loff_t file_size; + loff_t end; + int err; + + down_read(&fc->killsb); + + err = -ENOENT; + if (!fc->sb) + goto out_up_killsb; + + inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); + if (!inode) + goto out_up_killsb; + + mapping = inode->i_mapping; + index = pos >> PAGE_CACHE_SHIFT; + off = pos & ~PAGE_CACHE_MASK; + file_size = i_size_read(inode); + end = pos + size; + if (end > file_size) { + file_size = end; + fuse_write_update_size(inode, file_size); + } + + while (size) { + struct page *page; + unsigned int this_num; + + err = -ENOMEM; + page = find_or_create_page(mapping, index, + mapping_gfp_mask(mapping)); + if (!page) + goto out_iput; + + this_num = min_t(unsigned, size, PAGE_CACHE_SIZE - off); + err = fuse_copy_page(cs, &page, off, this_num, 0); + if (!err && off == 0 && (size != 0 || file_size == end)) + SetPageUptodate(page); + unlock_page(page); + page_cache_release(page); + + if (err) + goto out_iput; + + size -= this_num; + off = 0; + index++; + } + + err = 0; + +out_iput: + iput(inode); +out_up_killsb: + up_read(&fc->killsb); + + return err; +} + +static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) +{ + release_pages(req->pages, req->num_pages, 0); +} + +static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, + struct fuse_notify_retrieve_out *outarg) +{ + int err; + struct address_space *mapping = inode->i_mapping; + struct fuse_req *req; + pgoff_t index; + loff_t file_size; + unsigned int num; + unsigned int offset; + size_t total_len = 0; + + req = fuse_get_req(fc, 0); + if (IS_ERR(req)) + return PTR_ERR(req); + + offset = outarg->offset & ~PAGE_CACHE_MASK; + + req->in.h.opcode = FUSE_NOTIFY_REPLY; + req->in.h.nodeid = outarg->nodeid; + req->in.numargs = 2; + req->in.argpages = 1; + req->end = fuse_retrieve_end; + + index = outarg->offset >> PAGE_CACHE_SHIFT; + file_size = i_size_read(inode); + num = outarg->size; + if (outarg->offset > file_size) + num = 0; + else if (outarg->offset + num > file_size) + num = file_size - outarg->offset; + + while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { + struct page *page; + unsigned int this_num; + + page = find_get_page(mapping, index); + if (!page) + break; + + this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); + req->pages[req->num_pages] = page; + req->num_pages++; + + num -= this_num; + total_len += this_num; + index++; + } + req->misc.retrieve_in.offset = outarg->offset; + req->misc.retrieve_in.size = total_len; + req->in.args[0].size = sizeof(req->misc.retrieve_in); + req->in.args[0].value = &req->misc.retrieve_in; + req->in.args[1].size = total_len; + + err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); + if (err) + fuse_retrieve_end(fc, req); + + return err; +} + +static int fuse_notify_retrieve_from_inode(struct fuse_conn *fc, + struct fuse_notify_retrieve_out *outarg) +{ + struct inode *inode; + int err; + + down_read(&fc->killsb); + err = -ENOENT; + if (fc->sb) { + u64 nodeid = outarg->nodeid; + + inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); + if (inode) { + err = fuse_retrieve(fc, inode, outarg); + iput(inode); + } + } + up_read(&fc->killsb); + + return err; +} + +static const struct fuse_conn_operations fuse_default_ops = { + .release = fuse_free_conn, + .notify_store = fuse_notify_store_to_inode, + .notify_retrieve = fuse_notify_retrieve_from_inode, +}; + static int fuse_fill_super(struct super_block *sb, void *data, int silent) { struct fuse_dev *fud; @@ -1077,7 +1239,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) goto err_fput; fuse_conn_init(fc); - fc->release = fuse_free_conn; + fc->ops = &fuse_default_ops; fud = fuse_dev_alloc(fc); if (!fud) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index c9aca04..3f4c54b 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -102,6 +102,7 @@ * - add ctime and ctimensec to fuse_setattr_in * - add FUSE_RENAME2 request * - add FUSE_NO_OPEN_SUPPORT flag + * - add FUSE_MMAP and FUSE_MUNMAP */ #ifndef _LINUX_FUSE_H @@ -358,6 +359,8 @@ enum fuse_opcode { FUSE_FALLOCATE = 43, FUSE_READDIRPLUS = 44, FUSE_RENAME2 = 45, + FUSE_MMAP = 46, + FUSE_MUNMAP = 47, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -670,6 +673,29 @@ struct fuse_fallocate_in { uint32_t padding; }; +struct fuse_mmap_in { + __u64 fh; + __u64 addr; + __u64 len; + __u32 prot; + __u32 flags; + __u64 offset; +}; + +struct fuse_mmap_out { + __u64 mapid; /* Mmap ID, same namespace as Inode ID */ + __u64 size; /* Size of memory region */ + __u64 reserved; +}; + +struct fuse_munmap_in { + __u64 fh; + __u64 mapid; + __u64 size; /* Size of memory region */ + __u64 offset; + __u64 reserved; +}; + struct fuse_in_header { uint32_t len; uint32_t opcode; -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/