2010-02-06 17:32:17

by marcus.husar

[permalink] [raw]
Subject: [PATCH 1/4][2.6.32-stable] ext4: Prepare for backporting first quota patch

Hi all,

this patch series serves the purpose to solve problems with quota
support of ext4 in the stable tree of kernel 2.6.32. If the changes
are not too extensive it would be great if you could send them over to
Greg Kroah-Hartman. All patches apply cleanly, compile and have been
tested with quota support enabled.

Aneesh Kumar has posted two patches to improve quota support that have
been applied to 2.6.33-rc. Patch 2 and 4 are backports of these
patches on top of
2.6.32.7. Patch 1 and 3 are necessary to be able to backport Aneeshs patches.
Changes to some functions between 2.6.32.7 and 2.6.33-rc were to many.
So patch 1 and 3 adapt some functions from 2.6.33-rc3
(74d2e4f8d79ae0c4b6ec027958d5b18058662eea, parent of patch 2 and
1db913823c0f8360fccbd24ca67eb073966a5ffd, parent of patch 4) to
2.6.32.7.

Adapt ext4_da_reserve_space, ext4_calc_metadata_amount,
ext4_indirect_calc_metadata_amount from 2.6.33-rc3 to apply on top of
2.6.32.7. Do also a small adjustment to ext4_da_get_block_prep.

Signed-off-by: Marcus Husar <[email protected]>
---
ext4.h | 2 +
inode.c | 88 ++++++++++++++++++++++++++++++++----------------------
2 files changed, 55 insertions(+), 35 deletions(-)

diff -uprN a/fs/ext4/ext4.h b/fs/ext4/ext4.h
--- a/fs/ext4/ext4.h 2010-02-05 09:11:41.900599000 +0100
+++ b/fs/ext4/ext4.h 2010-02-05 10:05:39.088239463 +0100
@@ -693,6 +693,8 @@ struct ext4_inode_info {
unsigned int i_reserved_meta_blocks;
unsigned int i_allocated_meta_blocks;
unsigned short i_delalloc_reserved_flag;
+ sector_t i_da_metadata_calc_last_lblock;
+ int i_da_metadata_calc_len;

/* on-disk additional length */
__u16 i_extra_isize;
diff -uprN a/fs/ext4/inode.c b/fs/ext4/inode.c
--- a/fs/ext4/inode.c 2010-02-05 09:11:41.941245000 +0100
+++ b/fs/ext4/inode.c 2010-02-05 09:55:16.068240477 +0100
@@ -1051,38 +1051,44 @@ qsize_t *ext4_get_reserved_space(struct
return &EXT4_I(inode)->i_reserved_quota;
}
#endif
+
/*
* Calculate the number of metadata blocks need to reserve
- * to allocate @blocks for non extent file based file
+ * to allocate a new block at @lblocks for non extent file based file
*/
-static int ext4_indirect_calc_metadata_amount(struct inode *inode,
int blocks)
+static int ext4_indirect_calc_metadata_amount(struct inode *inode,
+ sector_t lblock)
{
- int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ind_blks, dind_blks, tind_blks;
-
- /* number of new indirect blocks needed */
- ind_blks = (blocks + icap - 1) / icap;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
+ int blk_bits;

- dind_blks = (ind_blks + icap - 1) / icap;
+ if (lblock < EXT4_NDIR_BLOCKS)
+ return 0;

- tind_blks = 1;
+ lblock -= EXT4_NDIR_BLOCKS;

- return ind_blks + dind_blks + tind_blks;
+ if (ei->i_da_metadata_calc_len &&
+ (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
+ ei->i_da_metadata_calc_len++;
+ return 0;
+ }
+ ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
+ ei->i_da_metadata_calc_len = 1;
+ blk_bits = roundup_pow_of_two(lblock + 1);
+ return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
}

/*
* Calculate the number of metadata blocks need to reserve
- * to allocate given number of blocks
+ * to allocate a block located at @lblock
*/
-static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
{
- if (!blocks)
- return 0;
-
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
- return ext4_ext_calc_metadata_amount(inode, blocks);
+ return ext4_ext_calc_metadata_amount(inode, lblock);

- return ext4_indirect_calc_metadata_amount(inode, blocks);
+ return ext4_indirect_calc_metadata_amount(inode, lblock);
}

static void ext4_da_update_reserve_space(struct inode *inode, int used)
@@ -1834,11 +1840,15 @@ static int ext4_journalled_write_end(str
return ret ? ret : copied;
}

-static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
+/*
+ * Reserve a single block located at lblock
+ */
+static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
{
int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- unsigned long md_needed, mdblocks, total = 0;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned long md_needed, md_reserved;

/*
* recalculate the amount of metadata blocks to reserve
@@ -1846,35 +1856,43 @@ static int ext4_da_reserve_space(struct
* worse case is one extent per block
*/
repeat:
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
- mdblocks = ext4_calc_metadata_amount(inode, total);
- BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
-
- md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
- total = md_needed + nrblocks;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ spin_lock(&ei->i_block_reservation_lock);
+ md_reserved = ei->i_reserved_meta_blocks;
+ md_needed = ext4_calc_metadata_amount(inode, lblock);
+ spin_unlock(&ei->i_block_reservation_lock);

/*
* Make quota reservation here to prevent quota overflow
* later. Real quota accounting is done at pages writeout
* time.
*/
- if (vfs_dq_reserve_block(inode, total))
+ if (vfs_dq_reserve_block(inode, md_needed + 1)) {
+ /*
+ * We tend to badly over-estimate the amount of
+ * metadata blocks which are needed, so if we have
+ * reserved any metadata blocks, try to force out the
+ * inode and see if we have any better luck.
+ */
+ if (md_reserved && retries++ <= 3)
+ goto retry;
return -EDQUOT;
+ }

- if (ext4_claim_free_blocks(sbi, total)) {
- vfs_dq_release_reservation_block(inode, total);
+ if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
+ vfs_dq_release_reservation_block(inode, md_needed + 1);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+ retry:
+ if (md_reserved)
+ write_inode_now(inode, (retries == 3));
yield();
goto repeat;
}
return -ENOSPC;
}
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
- EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ spin_lock(&ei->i_block_reservation_lock);
+ ei->i_reserved_data_blocks++;
+ ei->i_reserved_meta_blocks += md_needed;
+ spin_unlock(&ei->i_block_reservation_lock);

return 0; /* success */
}
@@ -2530,7 +2548,7 @@ static int ext4_da_get_block_prep(struct
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
- ret = ext4_da_reserve_space(inode, 1);
+ ret = ext4_da_reserve_space(inode, iblock);
if (ret)
/* not enough space to reserve */
return ret;





2010-02-06 19:00:24

by marcus.husar

[permalink] [raw]
Subject: Re: [PATCH 1/4][2.6.32-stable] ext4: Prepare for backporting first quota patch

Hi again,

before I sent my patches out I have done a lot of testing. Everything
seemed to work fine. But an aideupdate started by a cron-job revealed
that the patches even introduce two new bugs. At least I have never
hit these bugs before. There were no identical traces in the logfiles
of the last year.

Perhaps they are also to be found in 2.6.33-rc. I'll do more testing
and try it another time again.

Sincerely yours,
Marcus

First trace:
Feb 6 19:02:11 pauling kernel: [17124.290892] ------------[ cut here
]------------
Feb 6 19:02:11 pauling kernel: [17124.290901] WARNING: at
fs/quota/dquot.c:964
dquot_claim_space+0x98/0x131()
Feb 6 19:02:11 pauling kernel: [17124.290904] Hardware name: GA-MA790FX-DQ6
Feb 6 19:02:11 pauling kernel: [17124.290908] Pid: 10823, comm: mv
Tainted: G W
2.6.32.7-2-amd64 #1
Feb 6 19:02:11 pauling kernel: [17124.290911] Call Trace:
Feb 6 19:02:11 pauling kernel: [17124.290918] [<ffffffff81485258>] ?
__down_read+0x15/0x9d
Feb 6 19:02:11 pauling kernel: [17124.290922] [<ffffffff810f6c74>] ?
dquot_claim_space+0x98/0x131
Feb 6 19:02:11 pauling kernel: [17124.290927] [<ffffffff81058f04>] ?
warn_slowpath_common+0x72/0x9e
Feb 6 19:02:11 pauling kernel: [17124.290931] [<ffffffff810f6c74>] ?
dquot_claim_space+0x98/0x131
Feb 6 19:02:11 pauling kernel: [17124.290935] [<ffffffff81148b48>] ?
ext4_mb_mark_diskspace_used+0x37e/0x433
Feb 6 19:02:11 pauling kernel: [17124.290939] [<ffffffff8114b51d>] ?
ext4_mb_new_blocks+0x1d1/0x3aa
Feb 6 19:02:11 pauling kernel: [17124.290944] [<ffffffff81144da3>] ?
ext4_ext_get_blocks+0x141d/0x1702
Feb 6 19:02:11 pauling kernel: [17124.290948] [<ffffffff81142797>] ?
ext4_ext_find_extent+0x4b/0x285
Feb 6 19:02:11 pauling kernel: [17124.290952] [<ffffffff811321b4>] ?
ext4_get_blocks+0x15a/0x23d
Feb 6 19:02:11 pauling kernel: [17124.290956] [<ffffffff81132644>] ?
mpage_da_map_blocks+0xa4/0x5a1
Feb 6 19:02:11 pauling kernel: [17124.290960] [<ffffffff8109971a>] ?
pagevec_lookup_tag+0x1a/0x21
Feb 6 19:02:11 pauling kernel: [17124.290963] [<ffffffff810983bc>] ?
write_cache_pages+0x162/0x322
Feb 6 19:02:11 pauling kernel: [17124.290967] [<ffffffff8113320c>] ?
__mpage_da_writepage+0x0/0x146
Feb 6 19:02:11 pauling kernel: [17124.290971] [<ffffffff81132fd4>] ?
ext4_da_writepages+0x493/0x62a
Feb 6 19:02:11 pauling kernel: [17124.290980] [<ffffffff81069f7f>] ?
bit_waitqueue+0x10/0xa0
Feb 6 19:02:11 pauling kernel: [17124.290985] [<ffffffff810929b1>] ?
__filemap_fdatawrite_range+0x4b/0x54
Feb 6 19:02:11 pauling kernel: [17124.290989] [<ffffffff81139007>] ?
ext4_rename+0x758/0x790
Feb 6 19:02:11 pauling kernel: [17124.290994] [<ffffffff810c9adc>] ?
vfs_rename+0x2a4/0x4b3
Feb 6 19:02:11 pauling kernel: [17124.290998] [<ffffffff810c887b>] ?
__lookup_hash+0x4e/0x11f
Feb 6 19:02:11 pauling kernel: [17124.291002] [<ffffffff810cb766>] ?
sys_renameat+0x16d/0x1e9
Feb 6 19:02:11 pauling kernel: [17124.291005] [<ffffffff810a616c>] ?
handle_mm_fault+0x368/0x72e
Feb 6 19:02:11 pauling kernel: [17124.291010] [<ffffffff810435cd>] ?
do_page_fault+0x1e5/0x1fa
Feb 6 19:02:11 pauling kernel: [17124.291014] [<ffffffff81088feb>] ?
audit_syscall_entry+0x15b/0x186
Feb 6 19:02:11 pauling kernel: [17124.291019] [<ffffffff811fab3e>] ?
__up_read+0x13/0x8d
Feb 6 19:02:11 pauling kernel: [17124.291022] [<ffffffff8102986b>] ?
system_call_fastpath+0x16/0x1b
Feb 6 19:02:11 pauling kernel: [17124.291025] ---[ end trace
64137460f91d3829 ]---


Second trace:
Feb 6 19:02:33 pauling kernel: [17146.000977] ------------[ cut here
]------------
Feb 6 19:02:33 pauling kernel: [17146.000984] WARNING: at
fs/quota/dquot.c:964
dquot_claim_space+0xc1/0x131()
Feb 6 19:02:33 pauling kernel: [17146.000991] Hardware name: GA-MA790FX-DQ6
Feb 6 19:02:33 pauling kernel: [17146.000996] Pid: 10238, comm:
flush-251:1 Tainted: G
W 2.6.32.7-2-amd64 #1
Feb 6 19:02:33 pauling kernel: [17146.001002] Call Trace:
Feb 6 19:02:33 pauling kernel: [17146.001008] [<ffffffff81485258>] ?
__down_read+0x15/0x9d
Feb 6 19:02:33 pauling kernel: [17146.001019] [<ffffffff810f6c9d>] ?
dquot_claim_space+0xc1/0x131
Feb 6 19:02:33 pauling kernel: [17146.001031] [<ffffffff81058f04>] ?
warn_slowpath_common+0x72/0x9e
Feb 6 19:02:33 pauling kernel: [17146.001042] [<ffffffff810f6c9d>] ?
dquot_claim_space+0xc1/0x131
Feb 6 19:02:33 pauling kernel: [17146.001049] [<ffffffff81131efe>] ?
ext4_da_update_reserve_space+0x167/0x2c3
Feb 6 19:02:33 pauling kernel: [17146.001058] [<ffffffff81144ef6>] ?
ext4_ext_get_blocks+0x1570/0x1702
Feb 6 19:02:33 pauling kernel: [17146.001067] [<ffffffff81336a70>] ?
get_active_stripe+0x44d/0x542
Feb 6 19:02:33 pauling kernel: [17146.001078] [<ffffffff81333dd8>] ?
release_stripe+0x30/0x44
Feb 6 19:02:33 pauling kernel: [17146.001088] [<ffffffff811321b4>] ?
ext4_get_blocks+0x15a/0x23d
Feb 6 19:02:33 pauling kernel: [17146.001097] [<ffffffff8134a40d>] ?
__split_and_process_bio+0x55a/0x569
Feb 6 19:02:33 pauling kernel: [17146.001104] [<ffffffff81132644>] ?
mpage_da_map_blocks+0xa4/0x5a1
Feb 6 19:02:33 pauling kernel: [17146.001111] [<ffffffff8109971a>] ?
pagevec_lookup_tag+0x1a/0x21
Feb 6 19:02:33 pauling kernel: [17146.001118] [<ffffffff810983bc>] ?
write_cache_pages+0x162/0x322
Feb 6 19:02:33 pauling kernel: [17146.001127] [<ffffffff8113320c>] ?
__mpage_da_writepage+0x0/0x146
Feb 6 19:02:33 pauling kernel: [17146.001136] [<ffffffff81132fd4>] ?
ext4_da_writepages+0x493/0x62a
Feb 6 19:02:33 pauling kernel: [17146.001148] [<ffffffff8104ae76>] ?
scale_rt_power+0x1f/0x64
Feb 6 19:02:33 pauling kernel: [17146.001155] [<ffffffff810da654>] ?
writeback_single_inode+0xe6/0x2d9
Feb 6 19:02:33 pauling kernel: [17146.001166] [<ffffffff810db2c5>] ?
writeback_inodes_wb+0x421/0x4fa
Feb 6 19:02:33 pauling kernel: [17146.001173] [<ffffffff810db4ca>] ?
wb_writeback+0x12c/0x1aa
Feb 6 19:02:33 pauling kernel: [17146.001182] [<ffffffff810db72e>] ?
wb_do_writeback+0x145/0x15b
Feb 6 19:02:33 pauling kernel: [17146.001190] [<ffffffff810db76e>] ?
bdi_writeback_task+0x2a/0x85
Feb 6 19:02:33 pauling kernel: [17146.001197] [<ffffffff810a2c0a>] ?
bdi_start_fn+0x0/0xca
Feb 6 19:02:33 pauling kernel: [17146.001203] [<ffffffff810a2c7a>] ?
bdi_start_fn+0x70/0xca
Feb 6 19:02:33 pauling kernel: [17146.001209] [<ffffffff810a2c0a>] ?
bdi_start_fn+0x0/0xca
Feb 6 19:02:33 pauling kernel: [17146.001216] [<ffffffff81069ddb>] ?
kthread+0x79/0x81
Feb 6 19:02:33 pauling kernel: [17146.001225] [<ffffffff8102a7fa>] ?
child_rip+0xa/0x20
Feb 6 19:02:33 pauling kernel: [17146.001232] [<ffffffff81069d62>] ?
kthread+0x0/0x81
Feb 6 19:02:33 pauling kernel: [17146.001243] [<ffffffff8102a7f0>] ?
child_rip+0x0/0x20
Feb 6 19:02:33 pauling kernel: [17146.001247] ---[ end trace
64137460f91d389c ]---
Feb 6 19:02:33 pauling kernel: [17146.105248] ------------[ cut here
]------------