From: Eric Whitney Subject: [PATCH] ext4: revert i_data_sum locking cleanups for dioread_nolock Date: Fri, 12 Feb 2016 13:25:06 -0500 Message-ID: <20160212182506.GB1592@localhost.localdomain> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: tytso@mit.edu, jack@suse.cz To: linux-ext4@vger.kernel.org Return-path: Received: from mail-qk0-f172.google.com ([209.85.220.172]:33254 "EHLO mail-qk0-f172.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752519AbcBLSY7 (ORCPT ); Fri, 12 Feb 2016 13:24:59 -0500 Received: by mail-qk0-f172.google.com with SMTP id s5so34367058qkd.0 for ; Fri, 12 Feb 2016 10:24:58 -0800 (PST) Content-Disposition: inline Sender: linux-ext4-owner@vger.kernel.org List-ID: Commit 2bcba4781fa3 ("ext4: get rid of EXT4_GET_BLOCKS_NO_LOCK flag") can cause a kernel panic when xfstest generic/300 is run on a file system mounted with dioread_nolock. The panic is typically triggered from check_irqs_on() (fs/buffer.c: 1272), and happens because ext4_end_io_dio() is being called in an interrupt context rather than from a workqueue for AIO. This suggests that buffer_defer_completion may not be set properly when creating an unwritten extent for async direct I/O. Revert the locking changes until this problem can be resolved. Patch applied to 4.5-rc3 and tested with a full xfstest-bld auto group run. Signed-off-by: Eric Whitney --- fs/ext4/ext4.h | 6 ++++-- fs/ext4/inode.c | 43 +++++++++++++++++++++++-------------------- include/trace/events/ext4.h | 1 + 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0662b28..36fcf2a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -563,10 +563,12 @@ enum { #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 /* Request will not result in inode size update (user for fallocate) */ #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 + /* Do not take i_data_sem locking in ext4_map_blocks */ +#define EXT4_GET_BLOCKS_NO_LOCK 0x0100 /* Convert written extents to unwritten */ -#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100 +#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200 /* Write zeros to newly created written extents */ -#define EXT4_GET_BLOCKS_ZERO 0x0200 +#define EXT4_GET_BLOCKS_ZERO 0x0300 #define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\ EXT4_GET_BLOCKS_ZERO) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 83bc8bf..f72dc04 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -418,7 +418,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, * out taking i_data_sem. So at the time the unwritten extent * could be converted. */ - down_read(&EXT4_I(inode)->i_data_sem); + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) + down_read(&EXT4_I(inode)->i_data_sem); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, flags & EXT4_GET_BLOCKS_KEEP_SIZE); @@ -426,7 +427,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, retval = ext4_ind_map_blocks(handle, inode, map, flags & EXT4_GET_BLOCKS_KEEP_SIZE); } - up_read((&EXT4_I(inode)->i_data_sem)); + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) + up_read((&EXT4_I(inode)->i_data_sem)); /* * We don't check m_len because extent will be collpased in status @@ -522,7 +524,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, * Try to see if we can get the block without requesting a new * file system block. */ - down_read(&EXT4_I(inode)->i_data_sem); + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) + down_read(&EXT4_I(inode)->i_data_sem); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, flags & EXT4_GET_BLOCKS_KEEP_SIZE); @@ -553,7 +556,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, if (ret < 0) retval = ret; } - up_read((&EXT4_I(inode)->i_data_sem)); + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) + up_read((&EXT4_I(inode)->i_data_sem)); found: if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { @@ -703,7 +707,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, map.m_lblk = iblock; map.m_len = bh->b_size >> inode->i_blkbits; - if (flags && !handle) { + if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) { /* Direct IO write... */ if (map.m_len > DIO_MAX_BLOCKS) map.m_len = DIO_MAX_BLOCKS; @@ -898,6 +902,9 @@ int do_journal_get_write_access(handle_t *handle, return ret; } +static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); + #ifdef CONFIG_EXT4_FS_ENCRYPTION static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block) @@ -3070,21 +3077,13 @@ int ext4_get_block_write(struct inode *inode, sector_t iblock, EXT4_GET_BLOCKS_IO_CREATE_EXT); } -static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock, +static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - int ret; - - ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n", + ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n", inode->i_ino, create); - ret = _ext4_get_block(inode, iblock, bh_result, 0); - /* - * Blocks should have been preallocated! ext4_file_write_iter() checks - * that. - */ - WARN_ON_ONCE(!buffer_mapped(bh_result)); - - return ret; + return _ext4_get_block(inode, iblock, bh_result, + EXT4_GET_BLOCKS_NO_LOCK); } #ifdef CONFIG_FS_DAX @@ -3230,8 +3229,10 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, /* If we do a overwrite dio, i_mutex locking can be released */ overwrite = *((int *)iocb->private); - if (overwrite) + if (overwrite) { + down_read(&EXT4_I(inode)->i_data_sem); inode_unlock(inode); + } /* * We could direct write to holes and fallocate. @@ -3274,7 +3275,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, } if (overwrite) { - get_block_func = ext4_get_block_overwrite; + get_block_func = ext4_get_block_write_nolock; } else { get_block_func = ext4_get_block_write; dio_flags = DIO_LOCKING; @@ -3330,8 +3331,10 @@ retake_lock: if (iov_iter_rw(iter) == WRITE) inode_dio_end(inode); /* take i_mutex locking again if we do a ovewrite dio */ - if (overwrite) + if (overwrite) { + up_read(&EXT4_I(inode)->i_data_sem); inode_lock(inode); + } return ret; } diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 4e4b2fa..6599e75 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -43,6 +43,7 @@ struct extent_status; { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ + { EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }, \ { EXT4_GET_BLOCKS_ZERO, "ZERO" }) #define show_mflags(flags) __print_flags(flags, "", \ -- 2.1.4