Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757505AbZDNRty (ORCPT ); Tue, 14 Apr 2009 13:49:54 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1756533AbZDNRtT (ORCPT ); Tue, 14 Apr 2009 13:49:19 -0400 Received: from fxip-0047f.externet.hu ([88.209.222.127]:56521 "EHLO pomaz-ex.szeredi.hu" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752788AbZDNRtP (ORCPT ); Tue, 14 Apr 2009 13:49:15 -0400 Message-Id: <20090414174857.832534682@szeredi.hu> References: <20090414174835.487031939@szeredi.hu> User-Agent: quilt/0.45-1 Date: Tue, 14 Apr 2009 19:48:39 +0200 From: Miklos Szeredi To: jens.axboe@oracle.com, mfasheh@suse.com Cc: akpm@linux-foundation.org, viro@ZenIV.linux.org.uk, torvalds@linux-foundation.org, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [patch 4/6] ocfs2: fix i_mutex locking in ocfs2_splice_to_file() Content-Disposition: inline; filename=ocfs2_splice_locking_fix.patch Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5434 Lines: 180 Rearrange locking of i_mutex on destination and call to ocfs2_rw_lock() so locks are only held while buffers are copied with the pipe_to_file() actor, and not while waiting for more data on the pipe. Signed-off-by: Miklos Szeredi --- fs/ocfs2/file.c | 96 ++++++++++++++++++++++++++++++++++++++----------- fs/splice.c | 5 +- include/linux/splice.h | 2 + 3 files changed, 80 insertions(+), 23 deletions(-) Index: linux-2.6/fs/ocfs2/file.c =================================================================== --- linux-2.6.orig/fs/ocfs2/file.c 2009-04-14 16:35:15.000000000 +0200 +++ linux-2.6/fs/ocfs2/file.c 2009-04-14 18:29:58.000000000 +0200 @@ -1912,6 +1912,22 @@ out_sems: return written ? written : ret; } +static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, + struct file *out, + struct splice_desc *sd) +{ + int ret; + + ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, + sd->total_len, 0, NULL); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + return splice_from_pipe_feed(pipe, sd, pipe_to_file); +} + static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, @@ -1919,38 +1935,76 @@ static ssize_t ocfs2_file_splice_write(s unsigned int flags) { int ret; - struct inode *inode = out->f_path.dentry->d_inode; + struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, (unsigned int)len, out->f_path.dentry->d_name.len, out->f_path.dentry->d_name.name); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - - ret = ocfs2_rw_lock(inode, 1); - if (ret < 0) { - mlog_errno(ret); - goto out; - } + if (pipe->inode) + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); - ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0, - NULL); - if (ret < 0) { - mlog_errno(ret); - goto out_unlock; - } + splice_from_pipe_begin(&sd); + do { + ret = splice_from_pipe_next(pipe, &sd); + if (ret <= 0) + break; + + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + ret = ocfs2_rw_lock(inode, 1); + if (ret < 0) + mlog_errno(ret); + else { + ret = ocfs2_splice_to_file(pipe, out, &sd); + ocfs2_rw_unlock(inode, 1); + } + mutex_unlock(&inode->i_mutex); + } while (ret > 0); + splice_from_pipe_end(pipe, &sd); if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); - ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); - if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); -out_unlock: - ocfs2_rw_unlock(inode, 1); -out: - mutex_unlock(&inode->i_mutex); + if (sd.num_spliced) + ret = sd.num_spliced; + + if (ret > 0) { + unsigned long nr_pages; + + *ppos += ret; + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + /* + * If file or inode is SYNC and we actually wrote some data, + * sync it. + */ + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + int err; + + mutex_lock(&inode->i_mutex); + err = ocfs2_rw_lock(inode, 1); + if (err < 0) { + mlog_errno(err); + } else { + err = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + ocfs2_rw_unlock(inode, 1); + } + mutex_unlock(&inode->i_mutex); + + if (err) + ret = err; + } + balance_dirty_pages_ratelimited_nr(mapping, nr_pages); + } mlog_exit(ret); return ret; Index: linux-2.6/fs/splice.c =================================================================== --- linux-2.6.orig/fs/splice.c 2009-04-14 18:25:13.000000000 +0200 +++ linux-2.6/fs/splice.c 2009-04-14 18:29:58.000000000 +0200 @@ -555,8 +555,8 @@ static int pipe_to_sendpage(struct pipe_ * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create * a new page in the output file page cache and fill/dirty that. */ -static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, - struct splice_desc *sd) +int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) { struct file *file = sd->u.file; struct address_space *mapping = file->f_mapping; @@ -600,6 +600,7 @@ static int pipe_to_file(struct pipe_inod out: return ret; } +EXPORT_SYMBOL(pipe_to_file); static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { Index: linux-2.6/include/linux/splice.h =================================================================== --- linux-2.6.orig/include/linux/splice.h 2009-04-14 17:37:47.000000000 +0200 +++ linux-2.6/include/linux/splice.h 2009-04-14 18:29:58.000000000 +0200 @@ -75,6 +75,8 @@ extern int splice_from_pipe_next(struct extern void splice_from_pipe_begin(struct splice_desc *); extern void splice_from_pipe_end(struct pipe_inode_info *, struct splice_desc *); +extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *, + struct splice_desc *); extern ssize_t splice_to_pipe(struct pipe_inode_info *, struct splice_pipe_desc *); -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/