Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758555AbXKGRlm (ORCPT ); Wed, 7 Nov 2007 12:41:42 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1754657AbXKGRle (ORCPT ); Wed, 7 Nov 2007 12:41:34 -0500 Received: from rgminet01.oracle.com ([148.87.113.118]:62351 "EHLO rgminet01.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753079AbXKGRlc (ORCPT ); Wed, 7 Nov 2007 12:41:32 -0500 Date: Wed, 7 Nov 2007 09:41:20 -0800 From: Mark Fasheh To: Linus Torvalds Cc: Andrew Morton , linux-kernel@vger.kernel.org, ocfs2-devel@oss.oracle.com Subject: [git patches] ocfs2 fixes Message-ID: <20071107174120.GD28607@ca-server1.us.oracle.com> Reply-To: Mark Fasheh MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Organization: Oracle Corporation User-Agent: Mutt/1.5.16 (2007-06-11) X-Brightmail-Tracker: AAAAAQAAAAI= X-Brightmail-Tracker: AAAAAQAAAAI= X-Whitelist: TRUE X-Whitelist: TRUE Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8864 Lines: 275 Hi Linus, Here are some Ocfs2 patches - bug fixes, performance fixes and small cleanups. --Mark Please pull from 'upstream-linus' branch of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2.git upstream-linus to receive the following updates: fs/ocfs2/alloc.c | 2 +- fs/ocfs2/aops.c | 22 ++++++++++++++++++++++ fs/ocfs2/cluster/heartbeat.c | 2 +- fs/ocfs2/dcache.c | 2 +- fs/ocfs2/dir.c | 6 +++--- fs/ocfs2/dlmglue.c | 25 ++++++++++--------------- fs/ocfs2/file.c | 26 +++++++++++++++++++++++++- fs/ocfs2/namei.c | 13 ++++++++++--- 8 files changed, 73 insertions(+), 25 deletions(-) Adrian Bunk (1): [2.6 patch] make ocfs2_find_entry_el() static Jan Kara (1): Fix possibly too long write in o2hb_setup_one_bio() Mark Fasheh (4): ocfs2: Create locks at initially requested level ocfs2: Re-order iput in ocfs2_drop_dentry_lock ocfs2: Commit journal on sync writes ocfs2: fix write() performance regression Roel Kluin (1): Fix priority mistakes in fs/ocfs2/{alloc.c, dlmglue.c} Srinivas Eeda (1): ocfs2: fix rename vs unlink race diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 4ba7f0b..ce62c15 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -3946,7 +3946,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, struct ocfs2_merge_ctxt ctxt; struct ocfs2_extent_list *rightmost_el; - if (!rec->e_flags & OCFS2_EXT_UNWRITTEN) { + if (!(rec->e_flags & OCFS2_EXT_UNWRITTEN)) { ret = -EIO; mlog_errno(ret); goto out; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c69c1b3..556e34c 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -729,6 +729,27 @@ static void ocfs2_clear_page_regions(struct page *page, } /* + * Nonsparse file systems fully allocate before we get to the write + * code. This prevents ocfs2_write() from tagging the write as an + * allocating one, which means ocfs2_map_page_blocks() might try to + * read-in the blocks at the tail of our file. Avoid reading them by + * testing i_size against each block offset. + */ +static int ocfs2_should_read_blk(struct inode *inode, struct page *page, + unsigned int block_start) +{ + u64 offset = page_offset(page) + block_start; + + if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) + return 1; + + if (i_size_read(inode) > offset) + return 1; + + return 0; +} + +/* * Some of this taken from block_prepare_write(). We already have our * mapping by now though, and the entire write will be allocating or * it won't, so not much need to use BH_New. @@ -781,6 +802,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, set_buffer_uptodate(bh); } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_new(bh) && + ocfs2_should_read_blk(inode, page, block_start) && (block_start < from || block_end > to)) { ll_rw_block(READ, 1, &bh); *wait_bh++=bh; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 9cc7c04..f02ccb3 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -267,7 +267,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, current_page = cs / spp; page = reg->hr_slot_data[current_page]; - vec_len = min(PAGE_CACHE_SIZE, + vec_len = min(PAGE_CACHE_SIZE - vec_start, (max_slots-cs) * (PAGE_CACHE_SIZE/spp) ); mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n", diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 3094ddb..1957a5e 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -318,9 +318,9 @@ out_attach: static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, struct ocfs2_dentry_lock *dl) { + iput(dl->dl_inode); ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); ocfs2_lock_res_free(&dl->dl_lockres); - iput(dl->dl_inode); kfree(dl); } diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 6a2f143..63b28fd 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -208,9 +208,9 @@ out: return NULL; } -struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, - struct inode *dir, - struct ocfs2_dir_entry **res_dir) +static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, + struct inode *dir, + struct ocfs2_dir_entry **res_dir) { struct super_block *sb; struct buffer_head *bh_use[NAMEI_RA_SIZE]; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 41c76ff..4e97dcc 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -670,7 +670,7 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc { mlog_entry_void(); - BUG_ON((!lockres->l_flags & OCFS2_LOCK_BUSY)); + BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); if (lockres->l_requested > LKM_NLMODE && @@ -980,18 +980,6 @@ again: goto unlock; } - if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { - /* lock has not been created yet. */ - spin_unlock_irqrestore(&lockres->l_lock, flags); - - ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - goto again; - } - if (lockres->l_flags & OCFS2_LOCK_BLOCKED && !ocfs2_may_continue_on_blocked_lock(lockres, level)) { /* is the lock is currently blocked on behalf of @@ -1006,7 +994,14 @@ again: mlog(ML_ERROR, "lockres %s has action %u pending\n", lockres->l_name, lockres->l_action); - lockres->l_action = OCFS2_AST_CONVERT; + if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { + lockres->l_action = OCFS2_AST_ATTACH; + lkm_flags &= ~LKM_CONVERT; + } else { + lockres->l_action = OCFS2_AST_CONVERT; + lkm_flags |= LKM_CONVERT; + } + lockres->l_requested = level; lockres_or_flags(lockres, OCFS2_LOCK_BUSY); spin_unlock_irqrestore(&lockres->l_lock, flags); @@ -1021,7 +1016,7 @@ again: status = dlmlock(osb->dlm, level, &lockres->l_lksb, - lkm_flags|LKM_CONVERT, + lkm_flags, lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, ocfs2_locking_ast, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index f92fe91..bbac7cd 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1891,9 +1891,11 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, ssize_t written = 0; size_t ocount; /* original count */ size_t count; /* after file limit checks */ - loff_t *ppos = &iocb->ki_pos; + loff_t old_size, *ppos = &iocb->ki_pos; + u32 old_clusters; struct file *file = iocb->ki_filp; struct inode *inode = file->f_path.dentry->d_inode; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry("(0x%p, %u, '%.*s')\n", file, (unsigned int)nr_segs, @@ -1949,6 +1951,13 @@ relock: goto relock; } + /* + * To later detect whether a journal commit for sync writes is + * necessary, we sample i_size, and cluster count here. + */ + old_size = i_size_read(inode); + old_clusters = OCFS2_I(inode)->ip_clusters; + /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); @@ -1978,6 +1987,21 @@ out_dio: /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); + if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { + /* + * The generic write paths have handled getting data + * to disk, but since we don't make use of the dirty + * inode list, a manual journal commit is necessary + * here. + */ + if (old_size != i_size_read(inode) || + old_clusters != OCFS2_I(inode)->ip_clusters) { + ret = journal_force_commit(osb->journal->j_journal); + if (ret < 0) + written = ret; + } + } + /* * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io * function pointer which is called when o_direct io completes so that diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 7292590..989ac27 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1105,9 +1105,16 @@ static int ocfs2_rename(struct inode *old_dir, goto bail; } - if (!new_de && new_inode) - mlog(ML_ERROR, "inode %lu does not exist in it's parent " - "directory!", new_inode->i_ino); + if (!new_de && new_inode) { + /* + * Target was unlinked by another node while we were + * waiting to get to ocfs2_rename(). There isn't + * anything we can do here to help the situation, so + * bubble up the appropriate error. + */ + status = -ENOENT; + goto bail; + } /* In case we need to overwrite an existing file, we blow it * away first */ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/