Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id C0231C433F5 for ; Wed, 17 Nov 2021 13:32:02 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 9AAB261B2B for ; Wed, 17 Nov 2021 13:32:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237611AbhKQNfA (ORCPT ); Wed, 17 Nov 2021 08:35:00 -0500 Received: from mail.kernel.org ([198.145.29.99]:47316 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232814AbhKQNe6 (ORCPT ); Wed, 17 Nov 2021 08:34:58 -0500 Received: by mail.kernel.org (Postfix) with ESMTPSA id 2043D61B1B; Wed, 17 Nov 2021 13:31:58 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1637155920; bh=FAKUH17O41y0Ipu1xudnI1GYFTVLohaAW24egYbmMPo=; h=Date:Subject:To:Cc:References:From:In-Reply-To:From; b=GGxU1AVkcOF4zPH7qCV4px7+ZueeyWHReH3Zwlwp/rwaNdJ0SxqArFjapfju4Qdzt FGHdL3DdnOOD06nuPYA2wzBTVCfgkiNz1DSXsGeMY/mxCtgQpvrn4+4B9m3a2DpRQH AVwQ/YSO8Z1z+PDdP8WIm5sWG2d0AVr7IPvb6guP9VB4ZqS70Kafu7nI4zhFdvaxaz ka0ciHeyW7KoBCQtAC5k3rcA2d+Zwo5O8Cx52/+A9EXGsa4V8sLjDpvdGrHm6CBRt2 0QBD/lvbCncTPvuuslecYVq8J7urXP4G+g+gwSpbmLfZndqBrcrah658dlJVvC13mu blGZ1r+AvKz5A== Message-ID: Date: Wed, 17 Nov 2021 21:31:50 +0800 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Thunderbird/91.3.0 Subject: Re: [f2fs-dev] [PATCH 1/6] f2fs: rework write preallocations Content-Language: en-US To: Jaegeuk Kim , linux-kernel@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net Cc: Eric Biggers References: <20211116214510.2934905-1-jaegeuk@kernel.org> From: Chao Yu In-Reply-To: <20211116214510.2934905-1-jaegeuk@kernel.org> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 2021/11/17 5:45, Jaegeuk Kim wrote: > From: Eric Biggers > > f2fs_write_begin() assumes that all blocks were preallocated by > default unless FI_NO_PREALLOC is explicitly set. This invites data > corruption, as there are cases in which not all blocks are preallocated. > Commit 47501f87c61a ("f2fs: preallocate DIO blocks when forcing > buffered_io") fixed one case, but there are others remaining. > > Fix up this logic by replacing this flag with FI_PREALLOCATED_ALL, which > only gets set if all blocks for the current write were preallocated. > > Also clean up f2fs_preallocate_blocks(), move it to file.c, and make it > handle some of the logic that was previously in write_iter() directly. > > Jaegeuk: > DIO to holes are turning into buffered IO in f2fs_direct_IO, so we should > not preallocate blocks. > > Signed-off-by: Eric Biggers > Signed-off-by: Jaegeuk Kim > --- > fs/f2fs/data.c | 55 ++------------------- > fs/f2fs/f2fs.h | 3 +- > fs/f2fs/file.c | 130 +++++++++++++++++++++++++++++++------------------ > 3 files changed, 87 insertions(+), 101 deletions(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index 9f754aaef558..3b27fb7daa8b 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -1384,53 +1384,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) > return 0; > } > > -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) > -{ > - struct inode *inode = file_inode(iocb->ki_filp); > - struct f2fs_map_blocks map; > - int flag; > - int err = 0; > - bool direct_io = iocb->ki_flags & IOCB_DIRECT; > - > - map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); > - map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); > - if (map.m_len > map.m_lblk) > - map.m_len -= map.m_lblk; > - else > - map.m_len = 0; > - > - map.m_next_pgofs = NULL; > - map.m_next_extent = NULL; > - map.m_seg_type = NO_CHECK_TYPE; > - map.m_may_create = true; > - > - if (direct_io) { > - map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); > - flag = f2fs_force_buffered_io(inode, iocb, from) ? > - F2FS_GET_BLOCK_PRE_AIO : > - F2FS_GET_BLOCK_PRE_DIO; > - goto map_blocks; > - } > - if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { > - err = f2fs_convert_inline_inode(inode); > - if (err) > - return err; > - } > - if (f2fs_has_inline_data(inode)) > - return err; > - > - flag = F2FS_GET_BLOCK_PRE_AIO; > - > -map_blocks: > - err = f2fs_map_blocks(inode, &map, 1, flag); > - if (map.m_len > 0 && err == -ENOSPC) { > - if (!direct_io) > - set_inode_flag(inode, FI_NO_PREALLOC); > - err = 0; > - } > - return err; > -} > - > void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) > { > if (flag == F2FS_GET_BLOCK_PRE_AIO) { > @@ -3339,12 +3292,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > int flag; > > /* > - * we already allocated all the blocks, so we don't need to get > - * the block addresses when there is no need to fill the page. > + * If a whole page is being written and we already preallocated all the > + * blocks, then there is no need to get a block address now. > */ > - if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE && > - !is_inode_flag_set(inode, FI_NO_PREALLOC) && > - !f2fs_verity_in_progress(inode)) > + if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL)) > return 0; > > /* f2fs_lock_op avoids race between write CP and convert_inline_page */ > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index ce9fc9f13000..be871a79c634 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -715,7 +715,7 @@ enum { > FI_INLINE_DOTS, /* indicate inline dot dentries */ > FI_DO_DEFRAG, /* indicate defragment is running */ > FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ > - FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ > + FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */ > FI_HOT_DATA, /* indicate file is hot */ > FI_EXTRA_ATTR, /* indicate file has extra attribute */ > FI_PROJ_INHERIT, /* indicate file inherits projectid */ > @@ -3614,7 +3614,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); > int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); > int f2fs_reserve_new_block(struct dnode_of_data *dn); > int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); > -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); > int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); > struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, > int op_flags, bool for_write); > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 92ec2699bc85..4bf77a5bf998 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -4235,10 +4235,76 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) > return ret; > } > > +/* > + * Preallocate blocks for a write request, if it is possible and helpful to do > + * so. Returns a positive number if blocks may have been preallocated, 0 if no > + * blocks were preallocated, or a negative errno value if something went > + * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the > + * requested blocks (not just some of them) have been allocated. > + */ > +static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter) > +{ > + struct inode *inode = file_inode(iocb->ki_filp); > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > + const loff_t pos = iocb->ki_pos; > + const size_t count = iov_iter_count(iter); > + struct f2fs_map_blocks map = {}; > + bool dio = (iocb->ki_flags & IOCB_DIRECT) && > + !f2fs_force_buffered_io(inode, iocb, iter); > + int flag; > + int ret; > + > + /* If it will be an out-of-place direct write, don't bother. */ > + if (dio && f2fs_lfs_mode(sbi)) > + return 0; > + > + /* No-wait I/O can't allocate blocks. */ > + if (iocb->ki_flags & IOCB_NOWAIT) > + return 0; > + > + /* If it will be a short write, don't bother. */ > + if (fault_in_iov_iter_readable(iter, count)) > + return 0; > + > + if (f2fs_has_inline_data(inode)) { > + /* If the data will fit inline, don't bother. */ > + if (pos + count <= MAX_INLINE_DATA(inode)) > + return 0; > + ret = f2fs_convert_inline_inode(inode); > + if (ret) > + return ret; > + } > + /* do not preallocate block which is partially written */ Otherwise, it looks good to me. Reviewed-by: Chao Yu Thanks, > + map.m_lblk = F2FS_BLK_ALIGN(pos); > + map.m_len = F2FS_BYTES_TO_BLK(pos + count); > + if (map.m_len > map.m_lblk) > + map.m_len -= map.m_lblk; > + else > + map.m_len = 0; > + map.m_may_create = true; > + if (dio) { > + map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); > + flag = F2FS_GET_BLOCK_PRE_DIO; > + } else { > + map.m_seg_type = NO_CHECK_TYPE; > + flag = F2FS_GET_BLOCK_PRE_AIO; > + } > + > + ret = f2fs_map_blocks(inode, &map, 1, flag); > + /* -ENOSPC is only a fatal error if no blocks could be allocated. */ > + if (ret < 0 && !(ret == -ENOSPC && map.m_len > 0)) > + return ret; > + if (ret == 0) > + set_inode_flag(inode, FI_PREALLOCATED_ALL); > + return map.m_len; > +} > + > static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) > { > struct file *file = iocb->ki_filp; > struct inode *inode = file_inode(file); > + loff_t target_size; > + int preallocated; > ssize_t ret; > > if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { > @@ -4262,84 +4328,54 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) > > if (unlikely(IS_IMMUTABLE(inode))) { > ret = -EPERM; > - goto unlock; > + goto out_unlock; > } > > if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { > ret = -EPERM; > - goto unlock; > + goto out_unlock; > } > > ret = generic_write_checks(iocb, from); > if (ret > 0) { > - bool preallocated = false; > - size_t target_size = 0; > - int err; > - > - if (fault_in_iov_iter_readable(from, iov_iter_count(from))) > - set_inode_flag(inode, FI_NO_PREALLOC); > - > - if ((iocb->ki_flags & IOCB_NOWAIT)) { > + if (iocb->ki_flags & IOCB_NOWAIT) { > if (!f2fs_overwrite_io(inode, iocb->ki_pos, > iov_iter_count(from)) || > f2fs_has_inline_data(inode) || > f2fs_force_buffered_io(inode, iocb, from)) { > - clear_inode_flag(inode, FI_NO_PREALLOC); > - inode_unlock(inode); > ret = -EAGAIN; > - goto out; > + goto out_unlock; > } > - goto write; > } > - > - if (is_inode_flag_set(inode, FI_NO_PREALLOC)) > - goto write; > - > if (iocb->ki_flags & IOCB_DIRECT) { > - /* > - * Convert inline data for Direct I/O before entering > - * f2fs_direct_IO(). > - */ > - err = f2fs_convert_inline_inode(inode); > - if (err) > - goto out_err; > - /* > - * If force_buffere_io() is true, we have to allocate > - * blocks all the time, since f2fs_direct_IO will fall > - * back to buffered IO. > - */ > - if (!f2fs_force_buffered_io(inode, iocb, from) && > - f2fs_lfs_mode(F2FS_I_SB(inode))) > - goto write; > + ret = f2fs_convert_inline_inode(inode); > + if (ret) > + goto out_unlock; > } > - preallocated = true; > + /* Possibly preallocate the blocks for the write. */ > target_size = iocb->ki_pos + iov_iter_count(from); > - > - err = f2fs_preallocate_blocks(iocb, from); > - if (err) { > -out_err: > - clear_inode_flag(inode, FI_NO_PREALLOC); > - inode_unlock(inode); > - ret = err; > - goto out; > + preallocated = f2fs_preallocate_blocks(iocb, from); > + if (preallocated < 0) { > + ret = preallocated; > + goto out_unlock; > } > -write: > + > ret = __generic_file_write_iter(iocb, from); > - clear_inode_flag(inode, FI_NO_PREALLOC); > > - /* if we couldn't write data, we should deallocate blocks. */ > - if (preallocated && i_size_read(inode) < target_size) { > + /* Don't leave any preallocated blocks around past i_size. */ > + if (preallocated > 0 && i_size_read(inode) < target_size) { > down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); > filemap_invalidate_lock(inode->i_mapping); > f2fs_truncate(inode); > filemap_invalidate_unlock(inode->i_mapping); > up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); > } > + clear_inode_flag(inode, FI_PREALLOCATED_ALL); > > if (ret > 0) > f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); > } > -unlock: > +out_unlock: > inode_unlock(inode); > out: > trace_f2fs_file_write_iter(inode, iocb->ki_pos, >