2009-03-30 20:01:21

by Miklos Szeredi

[permalink] [raw]
Subject: [patch 0/3] fuse: implement missing functionality on "direct_io" files

Fuse allows filesystems to bypass the page cache using the "direct_io"
flag. This is not the same as O_DIRECT, it's dictated by the
filesystem not the application.

Some functionality didn't work in this mode, however. This short
patchset improves that:

1. fuse: allow kernel to access "direct_io" files
2. fuse: allow private mappings of "direct_io" files
3. splice: implement default splice_read method

The third patch is not fuse specific, it implements ->splice_read()
for any filesystem which doesn't provide one.

Comments are welcome.

Thanks,
Miklos


2009-03-30 20:02:29

by Miklos Szeredi

[permalink] [raw]
Subject: [patch 1/3] fuse: allow kernel to access "direct_io" files

From: Miklos Szeredi <[email protected]>

Allow the kernel read and write on "direct_io" files. This is
necessary for nfs export and execute support.

The implementation is simple: if an access from the kernel is
detected, don't perform get_user_pages(), just use the kernel address
provided by the requester to copy from/to the userspace filesystem.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/dir.c | 1 +
fs/fuse/file.c | 53 ++++++++++++++++++++++++++++++++++++++++-------------
2 files changed, 41 insertions(+), 13 deletions(-)

Index: linux-2.6/fs/fuse/file.c
===================================================================
--- linux-2.6.orig/fs/fuse/file.c 2009-03-25 10:30:52.000000000 +0100
+++ linux-2.6/fs/fuse/file.c 2009-03-25 10:35:22.000000000 +0100
@@ -386,7 +386,6 @@ void fuse_read_fill(struct fuse_req *req
req->in.numargs = 1;
req->in.args[0].size = sizeof(struct fuse_read_in);
req->in.args[0].value = inarg;
- req->out.argpages = 1;
req->out.argvar = 1;
req->out.numargs = 1;
req->out.args[0].size = count;
@@ -453,6 +452,7 @@ static int fuse_readpage(struct file *fi
attr_ver = fuse_get_attr_version(fc);

req->out.page_zeroing = 1;
+ req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
num_read = fuse_send_read(req, file, inode, pos, count, NULL);
@@ -510,6 +510,8 @@ static void fuse_send_readpages(struct f
struct fuse_conn *fc = get_fuse_conn(inode);
loff_t pos = page_offset(req->pages[0]);
size_t count = req->num_pages << PAGE_CACHE_SHIFT;
+
+ req->out.argpages = 1;
req->out.page_zeroing = 1;
fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
req->misc.read.attr_ver = fuse_get_attr_version(fc);
@@ -621,7 +623,6 @@ static void fuse_write_fill(struct fuse_
inarg->flags = file ? file->f_flags : 0;
req->in.h.opcode = FUSE_WRITE;
req->in.h.nodeid = get_node_id(inode);
- req->in.argpages = 1;
req->in.numargs = 2;
if (fc->minor < 9)
req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
@@ -695,6 +696,7 @@ static int fuse_buffered_write(struct fi
if (IS_ERR(req))
return PTR_ERR(req);

+ req->in.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
req->page_offset = offset;
@@ -771,6 +773,7 @@ static ssize_t fuse_fill_write_pages(str
size_t count = 0;
int err;

+ req->in.argpages = 1;
req->page_offset = offset;

do {
@@ -935,21 +938,28 @@ static void fuse_release_user_pages(stru
}

static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
- unsigned nbytes, int write)
+ unsigned *nbytesp, int write)
{
+ unsigned nbytes = *nbytesp;
unsigned long user_addr = (unsigned long) buf;
unsigned offset = user_addr & ~PAGE_MASK;
int npages;

- /* This doesn't work with nfsd */
- if (!current->mm)
- return -EPERM;
+ /* Special case for kernel I/O: can copy directly into the buffer */
+ if (segment_eq(get_fs(), KERNEL_DS)) {
+ if (write)
+ req->in.args[1].value = (void *) user_addr;
+ else
+ req->out.args[0].value = (void *) user_addr;
+
+ return 0;
+ }

nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
down_read(&current->mm->mmap_sem);
- npages = get_user_pages(current, current->mm, user_addr, npages, write,
+ npages = get_user_pages(current, current->mm, user_addr, npages, !write,
0, req->pages, NULL);
up_read(&current->mm->mmap_sem);
if (npages < 0)
@@ -957,6 +967,15 @@ static int fuse_get_user_pages(struct fu

req->num_pages = npages;
req->page_offset = offset;
+
+ if (write)
+ req->in.argpages = 1;
+ else
+ req->out.argpages = 1;
+
+ nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
+ *nbytesp = min(*nbytesp, nbytes);
+
return 0;
}

@@ -979,15 +998,13 @@ static ssize_t fuse_direct_io(struct fil

while (count) {
size_t nres;
- size_t nbytes_limit = min(count, nmax);
- size_t nbytes;
- int err = fuse_get_user_pages(req, buf, nbytes_limit, !write);
+ size_t nbytes = min(count, nmax);
+ int err = fuse_get_user_pages(req, buf, &nbytes, write);
if (err) {
res = err;
break;
}
- nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
- nbytes = min(nbytes_limit, nbytes);
+
if (write)
nres = fuse_send_write(req, file, inode, pos, nbytes,
current->files);
@@ -1163,6 +1180,7 @@ static int fuse_writepage_locked(struct
fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);

copy_highpage(tmp_page, page);
+ req->in.argpages = 1;
req->num_pages = 1;
req->pages[0] = tmp_page;
req->page_offset = 0;
Index: linux-2.6/fs/fuse/dir.c
===================================================================
--- linux-2.6.orig/fs/fuse/dir.c 2009-03-25 10:31:06.000000000 +0100
+++ linux-2.6/fs/fuse/dir.c 2009-03-25 10:31:10.000000000 +0100
@@ -1032,6 +1032,7 @@ static int fuse_readdir(struct file *fil
fuse_put_request(fc, req);
return -ENOMEM;
}
+ req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);

2009-03-30 20:03:22

by Miklos Szeredi

[permalink] [raw]
Subject: [patch 2/3] fuse: allow private mappings of "direct_io" files

From: Miklos Szeredi <[email protected]>

Allow MAP_PRIVATE mmaps of "direct_io" files. This is necessary for
execute support.

MAP_SHARED mappings require some sort of coherency between the
underlying file and the mapping. With "direct_io" it is difficult to
provide this, so for the moment just disallow shared (read-write and
read-only) mappings altogether.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/file.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)

Index: linux-2.6/fs/fuse/file.c
===================================================================
--- linux-2.6.orig/fs/fuse/file.c 2009-03-30 18:47:55.000000000 +0200
+++ linux-2.6/fs/fuse/file.c 2009-03-30 19:07:11.000000000 +0200
@@ -1291,6 +1291,15 @@ static int fuse_file_mmap(struct file *f
return 0;
}

+static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ /* Can't provide the coherency needed for MAP_SHARED */
+ if (vma->vm_flags & VM_MAYSHARE)
+ return -ENODEV;
+
+ return generic_file_mmap(file, vma);
+}
+
static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
struct file_lock *fl)
{
@@ -1925,6 +1934,7 @@ static const struct file_operations fuse
.llseek = fuse_file_llseek,
.read = fuse_direct_read,
.write = fuse_direct_write,
+ .mmap = fuse_direct_mmap,
.open = fuse_open,
.flush = fuse_flush,
.release = fuse_release,
@@ -1934,7 +1944,7 @@ static const struct file_operations fuse
.unlocked_ioctl = fuse_file_ioctl,
.compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll,
- /* no mmap and splice_read */
+ /* no splice_read */
};

static const struct address_space_operations fuse_file_aops = {

2009-03-30 20:04:42

by Miklos Szeredi

[permalink] [raw]
Subject: [patch 3/3] splice: implement default splice_read method

From: Miklos Szeredi <[email protected]>

If f_op->splice_read() is not implemented fall back to a plain read.
Use vfs_readv() to read into previously allocated pages.

This will allow splice and functions using splice, such as the loop
device, to work on all filesystems. This includes "direct_io" files
in fuse which bypass the page cache.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/pipe.c | 14 +++++
fs/splice.c | 120 ++++++++++++++++++++++++++++++++++++++++++++--
include/linux/fs.h | 2
include/linux/pipe_fs_i.h | 2
4 files changed, 133 insertions(+), 5 deletions(-)

Index: linux-2.6/fs/pipe.c
===================================================================
--- linux-2.6.orig/fs/pipe.c 2009-03-30 21:11:58.000000000 +0200
+++ linux-2.6/fs/pipe.c 2009-03-30 21:22:06.000000000 +0200
@@ -268,6 +268,20 @@ int generic_pipe_buf_confirm(struct pipe
return 0;
}

+/**
+ * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
+ * @pipe: the pipe that the buffer belongs to
+ * @buf: the buffer to put a reference to
+ *
+ * Description:
+ * This function releases a reference to @buf.
+ */
+void generic_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ page_cache_release(buf->page);
+}
+
static const struct pipe_buf_operations anon_pipe_buf_ops = {
.can_merge = 1,
.map = generic_pipe_buf_map,
Index: linux-2.6/fs/splice.c
===================================================================
--- linux-2.6.orig/fs/splice.c 2009-03-30 21:11:58.000000000 +0200
+++ linux-2.6/fs/splice.c 2009-03-30 21:22:06.000000000 +0200
@@ -509,9 +509,116 @@ ssize_t generic_file_splice_read(struct

return ret;
}
-
EXPORT_SYMBOL(generic_file_splice_read);

+static const struct pipe_buf_operations default_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = generic_pipe_buf_release,
+ .steal = generic_pipe_buf_steal,
+ .get = generic_pipe_buf_get,
+};
+
+static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+ unsigned long vlen, loff_t offset)
+{
+ mm_segment_t old_fs;
+ loff_t pos = offset;
+ ssize_t res;
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ /* The cast to a user pointer is valid due to the set_fs() */
+ res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
+ set_fs(old_fs);
+
+ return res;
+}
+
+ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ unsigned int nr_pages;
+ unsigned int nr_freed;
+ size_t offset;
+ struct page *pages[PIPE_BUFFERS];
+ struct partial_page partial[PIPE_BUFFERS];
+ struct iovec vec[PIPE_BUFFERS];
+ pgoff_t index;
+ ssize_t res;
+ size_t this_len;
+ int error;
+ int i;
+ struct splice_pipe_desc spd = {
+ .pages = pages,
+ .partial = partial,
+ .flags = flags,
+ .ops = &default_pipe_buf_ops,
+ .spd_release = spd_release_page,
+ };
+
+ index = *ppos >> PAGE_CACHE_SHIFT;
+ offset = *ppos & ~PAGE_CACHE_MASK;
+ nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
+ struct page *page;
+
+ page = alloc_page(GFP_HIGHUSER);
+ error = -ENOMEM;
+ if (!page)
+ goto err;
+
+ this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
+ vec[i].iov_base = (void __user *) kmap(page);
+ vec[i].iov_len = this_len;
+ pages[i] = page;
+ spd.nr_pages++;
+ len -= this_len;
+ offset = 0;
+ }
+
+ res = kernel_readv(in, vec, spd.nr_pages, *ppos);
+ if (res < 0)
+ goto err;
+
+ error = 0;
+ if (!res)
+ goto err;
+
+ nr_freed = 0;
+ for (i = 0; i < spd.nr_pages; i++) {
+ kunmap(pages[i]);
+ this_len = min_t(size_t, vec[i].iov_len, res);
+ partial[i].offset = 0;
+ partial[i].len = this_len;
+ if (!this_len) {
+ __free_page(pages[i]);
+ pages[i] = NULL;
+ nr_freed++;
+ }
+ res -= this_len;
+ }
+ spd.nr_pages -= nr_freed;
+
+ res = splice_to_pipe(pipe, &spd);
+ if (res > 0)
+ *ppos += res;
+
+ return res;
+
+err:
+ for (i = 0; i < spd.nr_pages; i++) {
+ kunmap(pages[i]);
+ __free_page(pages[i]);
+ }
+ return error;
+}
+EXPORT_SYMBOL(default_file_splice_read);
+
/*
* Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
* using sendpage(). Return the number of bytes sent.
@@ -916,11 +1023,10 @@ static long do_splice_to(struct file *in
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
+ ssize_t (*splice_read)(struct file *, loff_t *,
+ struct pipe_inode_info *, size_t, unsigned int);
int ret;

- if (unlikely(!in->f_op || !in->f_op->splice_read))
- return -EINVAL;
-
if (unlikely(!(in->f_mode & FMODE_READ)))
return -EBADF;

@@ -928,7 +1034,11 @@ static long do_splice_to(struct file *in
if (unlikely(ret < 0))
return ret;

- return in->f_op->splice_read(in, ppos, pipe, len, flags);
+ splice_read = in->f_op->splice_read;
+ if (!splice_read)
+ splice_read = default_file_splice_read;
+
+ return splice_read(in, ppos, pipe, len, flags);
}

/**
Index: linux-2.6/include/linux/pipe_fs_i.h
===================================================================
--- linux-2.6.orig/include/linux/pipe_fs_i.h 2009-03-30 21:11:58.000000000 +0200
+++ linux-2.6/include/linux/pipe_fs_i.h 2009-03-30 21:22:06.000000000 +0200
@@ -147,5 +147,7 @@ void generic_pipe_buf_unmap(struct pipe_
void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
+

#endif
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h 2009-03-30 21:13:11.000000000 +0200
+++ linux-2.6/include/linux/fs.h 2009-03-30 21:22:06.000000000 +0200
@@ -2126,6 +2126,8 @@ extern int generic_segment_checks(const
/* fs/splice.c */
extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
+extern ssize_t default_file_splice_read(struct file *, loff_t *,
+ struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,

2009-03-30 21:00:30

by Brad Boyer

[permalink] [raw]
Subject: Re: [patch 3/3] splice: implement default splice_read method

On Mon, Mar 30, 2009 at 10:04:28PM +0200, Miklos Szeredi wrote:
> From: Miklos Szeredi <[email protected]>
>
> If f_op->splice_read() is not implemented fall back to a plain read.
> Use vfs_readv() to read into previously allocated pages.
>
> This will allow splice and functions using splice, such as the loop
> device, to work on all filesystems. This includes "direct_io" files
> in fuse which bypass the page cache.

Based on your description, I would have expected this patch to make
the loop driver work seamlessly. Unless I'm misreading something, I
think the loop driver will still error out if the fs driver in question
doesn't explicitly set splice_read.o

>From drivers/block/loop.c:
724 /* new backing store needs to support loop (eg splice_read) */
725 if (!inode->i_fop->splice_read)
726 goto out_putf;
727

Did you accidentally leave out some changes? Please let me know if I
am missing something obvious.

Brad Boyer
[email protected]

2009-03-31 10:08:13

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [patch 3/3] splice: implement default splice_read method

On Mon, 30 Mar 2009, Brad Boyer wrote:
> Based on your description, I would have expected this patch to make
> the loop driver work seamlessly. Unless I'm misreading something, I
> think the loop driver will still error out if the fs driver in question
> doesn't explicitly set splice_read.o

You're right. Here's an updated patch.

Thanks,
Miklos

---
From: Miklos Szeredi <[email protected]>

If f_op->splice_read() is not implemented fall back to a plain read.
Use vfs_readv() to read into previously allocated pages.

This will allow splice and functions using splice, such as the loop
device, to work on all filesystems. This includes "direct_io" files
in fuse which bypass the page cache.

Signed-off-by: Miklos Szeredi <[email protected]>
---
drivers/block/loop.c | 11 ----
fs/coda/file.c | 9 ++-
fs/fuse/file.c | 1
fs/pipe.c | 14 +++++
fs/read_write.c | 7 --
fs/splice.c | 120 ++++++++++++++++++++++++++++++++++++++++++++--
include/linux/fs.h | 2
include/linux/pipe_fs_i.h | 1
8 files changed, 140 insertions(+), 25 deletions(-)

Index: linux-2.6/fs/pipe.c
===================================================================
--- linux-2.6.orig/fs/pipe.c 2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/fs/pipe.c 2009-03-31 11:44:12.000000000 +0200
@@ -268,6 +268,20 @@ int generic_pipe_buf_confirm(struct pipe
return 0;
}

+/**
+ * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
+ * @pipe: the pipe that the buffer belongs to
+ * @buf: the buffer to put a reference to
+ *
+ * Description:
+ * This function releases a reference to @buf.
+ */
+void generic_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ page_cache_release(buf->page);
+}
+
static const struct pipe_buf_operations anon_pipe_buf_ops = {
.can_merge = 1,
.map = generic_pipe_buf_map,
Index: linux-2.6/fs/splice.c
===================================================================
--- linux-2.6.orig/fs/splice.c 2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/fs/splice.c 2009-03-31 11:44:12.000000000 +0200
@@ -509,9 +509,116 @@ ssize_t generic_file_splice_read(struct

return ret;
}
-
EXPORT_SYMBOL(generic_file_splice_read);

+static const struct pipe_buf_operations default_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = generic_pipe_buf_release,
+ .steal = generic_pipe_buf_steal,
+ .get = generic_pipe_buf_get,
+};
+
+static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+ unsigned long vlen, loff_t offset)
+{
+ mm_segment_t old_fs;
+ loff_t pos = offset;
+ ssize_t res;
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ /* The cast to a user pointer is valid due to the set_fs() */
+ res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
+ set_fs(old_fs);
+
+ return res;
+}
+
+ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ unsigned int nr_pages;
+ unsigned int nr_freed;
+ size_t offset;
+ struct page *pages[PIPE_BUFFERS];
+ struct partial_page partial[PIPE_BUFFERS];
+ struct iovec vec[PIPE_BUFFERS];
+ pgoff_t index;
+ ssize_t res;
+ size_t this_len;
+ int error;
+ int i;
+ struct splice_pipe_desc spd = {
+ .pages = pages,
+ .partial = partial,
+ .flags = flags,
+ .ops = &default_pipe_buf_ops,
+ .spd_release = spd_release_page,
+ };
+
+ index = *ppos >> PAGE_CACHE_SHIFT;
+ offset = *ppos & ~PAGE_CACHE_MASK;
+ nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
+ struct page *page;
+
+ page = alloc_page(GFP_HIGHUSER);
+ error = -ENOMEM;
+ if (!page)
+ goto err;
+
+ this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
+ vec[i].iov_base = (void __user *) kmap(page);
+ vec[i].iov_len = this_len;
+ pages[i] = page;
+ spd.nr_pages++;
+ len -= this_len;
+ offset = 0;
+ }
+
+ res = kernel_readv(in, vec, spd.nr_pages, *ppos);
+ if (res < 0)
+ goto err;
+
+ error = 0;
+ if (!res)
+ goto err;
+
+ nr_freed = 0;
+ for (i = 0; i < spd.nr_pages; i++) {
+ kunmap(pages[i]);
+ this_len = min_t(size_t, vec[i].iov_len, res);
+ partial[i].offset = 0;
+ partial[i].len = this_len;
+ if (!this_len) {
+ __free_page(pages[i]);
+ pages[i] = NULL;
+ nr_freed++;
+ }
+ res -= this_len;
+ }
+ spd.nr_pages -= nr_freed;
+
+ res = splice_to_pipe(pipe, &spd);
+ if (res > 0)
+ *ppos += res;
+
+ return res;
+
+err:
+ for (i = 0; i < spd.nr_pages; i++) {
+ kunmap(pages[i]);
+ __free_page(pages[i]);
+ }
+ return error;
+}
+EXPORT_SYMBOL(default_file_splice_read);
+
/*
* Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
* using sendpage(). Return the number of bytes sent.
@@ -916,11 +1023,10 @@ static long do_splice_to(struct file *in
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
+ ssize_t (*splice_read)(struct file *, loff_t *,
+ struct pipe_inode_info *, size_t, unsigned int);
int ret;

- if (unlikely(!in->f_op || !in->f_op->splice_read))
- return -EINVAL;
-
if (unlikely(!(in->f_mode & FMODE_READ)))
return -EBADF;

@@ -928,7 +1034,11 @@ static long do_splice_to(struct file *in
if (unlikely(ret < 0))
return ret;

- return in->f_op->splice_read(in, ppos, pipe, len, flags);
+ splice_read = in->f_op->splice_read;
+ if (!splice_read)
+ splice_read = default_file_splice_read;
+
+ return splice_read(in, ppos, pipe, len, flags);
}

/**
Index: linux-2.6/include/linux/pipe_fs_i.h
===================================================================
--- linux-2.6.orig/include/linux/pipe_fs_i.h 2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/include/linux/pipe_fs_i.h 2009-03-31 12:04:19.000000000 +0200
@@ -147,5 +147,6 @@ void generic_pipe_buf_unmap(struct pipe_
void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);

#endif
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h 2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/include/linux/fs.h 2009-03-31 11:44:12.000000000 +0200
@@ -2126,6 +2126,8 @@ extern int generic_segment_checks(const
/* fs/splice.c */
extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
+extern ssize_t default_file_splice_read(struct file *, loff_t *,
+ struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,
Index: linux-2.6/drivers/block/loop.c
===================================================================
--- linux-2.6.orig/drivers/block/loop.c 2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/drivers/block/loop.c 2009-03-31 11:44:12.000000000 +0200
@@ -721,10 +721,6 @@ static int loop_change_fd(struct loop_de
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
goto out_putf;

- /* new backing store needs to support loop (eg splice_read) */
- if (!inode->i_fop->splice_read)
- goto out_putf;
-
/* size of the new backing store needs to be the same */
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
goto out_putf;
@@ -800,12 +796,7 @@ static int loop_set_fd(struct loop_devic
error = -EINVAL;
if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
const struct address_space_operations *aops = mapping->a_ops;
- /*
- * If we can't read - sorry. If we only can't write - well,
- * it's going to be read-only.
- */
- if (!file->f_op->splice_read)
- goto out_putf;
+
if (aops->write_begin)
lo_flags |= LO_FLAGS_USE_AOPS;
if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
Index: linux-2.6/fs/coda/file.c
===================================================================
--- linux-2.6.orig/fs/coda/file.c 2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/fs/coda/file.c 2009-03-31 11:44:12.000000000 +0200
@@ -47,6 +47,8 @@ coda_file_splice_read(struct file *coda_
struct pipe_inode_info *pipe, size_t count,
unsigned int flags)
{
+ ssize_t (*splice_read)(struct file *, loff_t *,
+ struct pipe_inode_info *, size_t, unsigned int);
struct coda_file_info *cfi;
struct file *host_file;

@@ -54,10 +56,11 @@ coda_file_splice_read(struct file *coda_
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;

- if (!host_file->f_op || !host_file->f_op->splice_read)
- return -EINVAL;
+ splice_read = host_file->f_op->splice_read;
+ if (!splice_read)
+ splice_read = default_file_splice_read;

- return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
+ return splice_read(host_file, ppos, pipe, count, flags);
}

static ssize_t
Index: linux-2.6/fs/fuse/file.c
===================================================================
--- linux-2.6.orig/fs/fuse/file.c 2009-03-31 11:44:12.000000000 +0200
+++ linux-2.6/fs/fuse/file.c 2009-03-31 11:44:12.000000000 +0200
@@ -1944,7 +1944,6 @@ static const struct file_operations fuse
.unlocked_ioctl = fuse_file_ioctl,
.compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll,
- /* no splice_read */
};

static const struct address_space_operations fuse_file_aops = {
Index: linux-2.6/fs/read_write.c
===================================================================
--- linux-2.6.orig/fs/read_write.c 2009-03-31 11:44:10.000000000 +0200
+++ linux-2.6/fs/read_write.c 2009-03-31 11:45:48.000000000 +0200
@@ -749,12 +749,6 @@ static ssize_t do_sendfile(int out_fd, i
goto out;
if (!(in_file->f_mode & FMODE_READ))
goto fput_in;
- retval = -EINVAL;
- in_inode = in_file->f_path.dentry->d_inode;
- if (!in_inode)
- goto fput_in;
- if (!in_file->f_op || !in_file->f_op->splice_read)
- goto fput_in;
retval = -ESPIPE;
if (!ppos)
ppos = &in_file->f_pos;
@@ -778,6 +772,7 @@ static ssize_t do_sendfile(int out_fd, i
retval = -EINVAL;
if (!out_file->f_op || !out_file->f_op->sendpage)
goto fput_out;
+ in_inode = in_file->f_path.dentry->d_inode;
out_inode = out_file->f_path.dentry->d_inode;
retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
if (retval < 0)