From: Grant Coady Subject: Re: [04/24] ext4: Make sure all the block allocation paths reserve blocks Date: Tue, 25 May 2010 17:21:43 +1000 Message-ID: <5humv51rdjr8vl3c7qlroekj2k004gl665@4ax.com> References: <20100524223544.GA13721@kroah.com> <20100524223014.777274118@clark.site> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Cc: linux-kernel@vger.kernel.org, stable@kernel.org, stable-review@kernel.org, torvalds@linux-foundation.org, akpm@linux-foundation.org, alan@lxorguk.ukuu.org.uk, Ext4 Developers List , "Theodore Tso" , "Jayson R. King" , "Aneesh Kumar K.V" To: Greg KH Return-path: Received: from mail-px0-f174.google.com ([209.85.212.174]:59131 "EHLO mail-px0-f174.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751208Ab0EYHVy (ORCPT ); Tue, 25 May 2010 03:21:54 -0400 In-Reply-To: <20100524223014.777274118@clark.site> Sender: linux-ext4-owner@vger.kernel.org List-ID: On Mon, 24 May 2010 15:28:00 -0700, you wrote: >2.6.27-stable review patch. If anyone has any objections, please let us know. > >------------------ > > >From: Aneesh Kumar K.V > >commit a30d542a0035b886ffaafd0057ced0a2b28c3a4f upstream. > >With delayed allocation we need to make sure block are reserved before >we attempt to allocate them. Otherwise we get block allocation failure >(ENOSPC) during writepages which cannot be handled. This would mean >silent data loss (We do a printk stating data will be lost). This patch >updates the DIO and fallocate code path to do block reservation before >block allocation. This is needed to make sure parallel DIO and fallocate >request doesn't take block out of delayed reserve space. > >When free blocks count go below a threshold we switch to a slow patch s/patch/path/ ?? Or, are these patch comments locked in stone for -stable? Grant. >which looks at other CPU's accumulated percpu counter values. > >Signed-off-by: Aneesh Kumar K.V >Signed-off-by: "Theodore Ts'o" >Signed-off-by: Jayson R. King >Signed-off-by: Theodore Ts'o >Signed-off-by: Greg Kroah-Hartman > >--- > fs/ext4/balloc.c | 58 +++++++++++++++++++++++++++++++++++++++--------------- > fs/ext4/ext4.h | 13 ++++++++++++ > fs/ext4/inode.c | 5 ---- > fs/ext4/mballoc.c | 23 ++++++++++++--------- > 4 files changed, 69 insertions(+), 30 deletions(-) > >--- a/fs/ext4/balloc.c >+++ b/fs/ext4/balloc.c >@@ -1754,6 +1754,32 @@ out: > return ret; > } > >+int ext4_claim_free_blocks(struct ext4_sb_info *sbi, >+ ext4_fsblk_t nblocks) >+{ >+ s64 free_blocks; >+ ext4_fsblk_t root_blocks = 0; >+ struct percpu_counter *fbc = &sbi->s_freeblocks_counter; >+ >+ free_blocks = percpu_counter_read(fbc); >+ >+ if (!capable(CAP_SYS_RESOURCE) && >+ sbi->s_resuid != current->fsuid && >+ (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) >+ root_blocks = ext4_r_blocks_count(sbi->s_es); >+ >+ if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK) >+ free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter); >+ >+ if (free_blocks < (root_blocks + nblocks)) >+ /* we don't have free space */ >+ return -ENOSPC; >+ >+ /* reduce fs free blocks counter */ >+ percpu_counter_sub(fbc, nblocks); >+ return 0; >+} >+ > /** > * ext4_has_free_blocks() > * @sbi: in-core super block structure. >@@ -1775,18 +1801,17 @@ ext4_fsblk_t ext4_has_free_blocks(struct > sbi->s_resuid != current->fsuid && > (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) > root_blocks = ext4_r_blocks_count(sbi->s_es); >-#ifdef CONFIG_SMP >- if (free_blocks - root_blocks < FBC_BATCH) >- free_blocks = >- percpu_counter_sum(&sbi->s_freeblocks_counter); >-#endif >+ >+ if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK) >+ free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); >+ > if (free_blocks <= root_blocks) > /* we don't have free space */ > return 0; > if (free_blocks - root_blocks < nblocks) > return free_blocks - root_blocks; > return nblocks; >- } >+} > > > /** >@@ -1865,14 +1890,11 @@ ext4_fsblk_t ext4_old_new_blocks(handle_ > /* > * With delalloc we already reserved the blocks > */ >- *count = ext4_has_free_blocks(sbi, *count); >- } >- if (*count == 0) { >- *errp = -ENOSPC; >- return 0; /*return with ENOSPC error */ >+ if (ext4_claim_free_blocks(sbi, *count)) { >+ *errp = -ENOSPC; >+ return 0; /*return with ENOSPC error */ >+ } > } >- num = *count; >- > /* > * Check quota for allocation of this block. > */ >@@ -2067,9 +2089,13 @@ allocated: > le16_add_cpu(&gdp->bg_free_blocks_count, -num); > gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); > spin_unlock(sb_bgl_lock(sbi, group_no)); >- if (!EXT4_I(inode)->i_delalloc_reserved_flag) >- percpu_counter_sub(&sbi->s_freeblocks_counter, num); >- >+ if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) { >+ /* >+ * we allocated less blocks than we >+ * claimed. Add the difference back. >+ */ >+ percpu_counter_add(&sbi->s_freeblocks_counter, *count - num); >+ } > if (sbi->s_log_groups_per_flex) { > ext4_group_t flex_group = ext4_flex_group(sbi, group_no); > spin_lock(sb_bgl_lock(sbi, flex_group)); >--- a/fs/ext4/ext4.h >+++ b/fs/ext4/ext4.h >@@ -1015,6 +1015,8 @@ extern ext4_fsblk_t ext4_new_blocks(hand > unsigned long *count, int *errp); > extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, > ext4_fsblk_t goal, unsigned long *count, int *errp); >+extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, >+ ext4_fsblk_t nblocks); > extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, > ext4_fsblk_t nblocks); > extern void ext4_free_blocks (handle_t *handle, struct inode *inode, >@@ -1245,6 +1247,17 @@ do { \ > __ext4_std_error((sb), __func__, (errno)); \ > } while (0) > >+#ifdef CONFIG_SMP >+/* Each CPU can accumulate FBC_BATCH blocks in their local >+ * counters. So we need to make sure we have free blocks more >+ * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times. >+ */ >+#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids)) >+#else >+#define EXT4_FREEBLOCKS_WATERMARK 0 >+#endif >+ >+ > /* > * Inodes and files operations > */ >--- a/fs/ext4/inode.c >+++ b/fs/ext4/inode.c >@@ -1564,13 +1564,10 @@ static int ext4_da_reserve_space(struct > md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; > total = md_needed + nrblocks; > >- if (ext4_has_free_blocks(sbi, total) < total) { >+ if (ext4_claim_free_blocks(sbi, total)) { > spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); > return -ENOSPC; > } >- /* reduce fs free blocks counter */ >- percpu_counter_sub(&sbi->s_freeblocks_counter, total); >- > EXT4_I(inode)->i_reserved_data_blocks += nrblocks; > EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; > >--- a/fs/ext4/mballoc.c >+++ b/fs/ext4/mballoc.c >@@ -3194,9 +3194,15 @@ ext4_mb_mark_diskspace_used(struct ext4_ > * at write_begin() time for delayed allocation > * do not double accounting > */ >- if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) >- percpu_counter_sub(&sbi->s_freeblocks_counter, >- ac->ac_b_ex.fe_len); >+ if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) && >+ ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) { >+ /* >+ * we allocated less blocks than we calimed >+ * Add the difference back >+ */ >+ percpu_counter_add(&sbi->s_freeblocks_counter, >+ ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len); >+ } > > if (sbi->s_log_groups_per_flex) { > ext4_group_t flex_group = ext4_flex_group(sbi, >@@ -4649,14 +4655,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t > /* > * With delalloc we already reserved the blocks > */ >- ar->len = ext4_has_free_blocks(sbi, ar->len); >- } >- >- if (ar->len == 0) { >- *errp = -ENOSPC; >- return 0; >+ if (ext4_claim_free_blocks(sbi, ar->len)) { >+ *errp = -ENOSPC; >+ return 0; >+ } > } >- > while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { > ar->flags |= EXT4_MB_HINT_NOPREALLOC; > ar->len--; -- http://bugs.id.au/