2009-12-10 01:42:30

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH] ext4: fix sleep inside spinlock issue aka #14739

drop i_block_reservation_lock before vfs_dq_reserve_block().
this patch fix http://bugzilla.kernel.org/show_bug.cgi?id=14739

Signed-off-by: Dmitry Monakhov <[email protected]>
---
fs/ext4/inode.c | 29 ++++++++++++++++-------------
1 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 942e183..f693768 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1852,19 +1852,8 @@ repeat:
md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
total = md_needed + nrblocks;

- /*
- * Make quota reservation here to prevent quota overflow
- * later. Real quota accounting is done at pages writeout
- * time.
- */
- if (vfs_dq_reserve_block(inode, total)) {
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- return -EDQUOT;
- }
-
if (ext4_claim_free_blocks(sbi, total)) {
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- vfs_dq_release_reservation_block(inode, total);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield();
goto repeat;
@@ -1872,10 +1861,24 @@ repeat:
return -ENOSPC;
}
EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
- EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
+ EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
+ /*
+ * Make quota reservation here to prevent quota overflow
+ * later. Real quota accounting is done at pages writeout
+ * time.
+ */
+ if (!vfs_dq_reserve_block(inode, total))
+ return 0; /* success */

+ /* Quota reservation has failed, revert inode's reservation */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, total);
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ EXT4_I(inode)->i_reserved_data_blocks -= nrblocks;
+ EXT4_I(inode)->i_reserved_meta_blocks -= md_needed;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- return 0; /* success */
+ return -EDQUOT;
}

static void ext4_da_release_space(struct inode *inode, int to_free)
--
1.6.0.4



2009-12-10 16:15:47

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix sleep inside spinlock issue aka #14739

> drop i_block_reservation_lock before vfs_dq_reserve_block().
> this patch fix http://bugzilla.kernel.org/show_bug.cgi?id=14739
>
> Signed-off-by: Dmitry Monakhov <[email protected]>
Sorry if someone already refused this (I didn't follow the previous
discussion too closely) but: Looking at the code I see no reason why
ext4_claim_free_blocks needs i_block_reservation_lock. In fact mballoc
calls this function without the lock. So could not we just compute
'total' under the lock, release it, reserve quota and then claim free
blocks? You'd get rid of undoing the block reservation and obtain quota
and blocks in the usual order...

Honza

> ---
> fs/ext4/inode.c | 29 ++++++++++++++++-------------
> 1 files changed, 16 insertions(+), 13 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 942e183..f693768 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1852,19 +1852,8 @@ repeat:
> md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
> total = md_needed + nrblocks;
>
> - /*
> - * Make quota reservation here to prevent quota overflow
> - * later. Real quota accounting is done at pages writeout
> - * time.
> - */
> - if (vfs_dq_reserve_block(inode, total)) {
> - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> - return -EDQUOT;
> - }
> -
> if (ext4_claim_free_blocks(sbi, total)) {
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> - vfs_dq_release_reservation_block(inode, total);
> if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
> yield();
> goto repeat;
> @@ -1872,10 +1861,24 @@ repeat:
> return -ENOSPC;
> }
> EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
> - EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
> + EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
> + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +
> + /*
> + * Make quota reservation here to prevent quota overflow
> + * later. Real quota accounting is done at pages writeout
> + * time.
> + */
> + if (!vfs_dq_reserve_block(inode, total))
> + return 0; /* success */
>
> + /* Quota reservation has failed, revert inode's reservation */
> + percpu_counter_sub(&sbi->s_dirtyblocks_counter, total);
> + spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> + EXT4_I(inode)->i_reserved_data_blocks -= nrblocks;
> + EXT4_I(inode)->i_reserved_meta_blocks -= md_needed;
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> - return 0; /* success */
> + return -EDQUOT;
> }
>
> static void ext4_da_release_space(struct inode *inode, int to_free)
> --
> 1.6.0.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Jan Kara <[email protected]>
SuSE CR Labs

2009-12-10 17:22:44

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH] ext4: fix sleep inside spinlock issue aka #14739 V2


drop i_block_reservation_lock before vfs_dq_reserve_block().
this patch fix http://bugzilla.kernel.org/show_bug.cgi?id=14739

changes from previous version:
- simplify the patch according to Jan's comments

Signed-off-by: Dmitry Monakhov <[email protected]>
---
fs/ext4/inode.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 942e183..2327f7a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1851,6 +1851,7 @@ repeat:

md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
total = md_needed + nrblocks;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);

/*
* Make quota reservation here to prevent quota overflow
@@ -1858,12 +1859,10 @@ repeat:
* time.
*/
if (vfs_dq_reserve_block(inode, total)) {
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
return -EDQUOT;
}

if (ext4_claim_free_blocks(sbi, total)) {
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
vfs_dq_release_reservation_block(inode, total);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield();
@@ -1871,10 +1870,11 @@ repeat:
}
return -ENOSPC;
}
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;

2009-12-15 21:48:14

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix sleep inside spinlock issue aka #14739 V2

On Thu 10-12-09 20:22:16, Dmitry Monakhov wrote:
>
> drop i_block_reservation_lock before vfs_dq_reserve_block().
> this patch fix http://bugzilla.kernel.org/show_bug.cgi?id=14739
>
> changes from previous version:
> - simplify the patch according to Jan's comments
Dmitry, I suppose I should also merge this patch together with your other
fixes, right? For some reason, it was not part of the last submission of
your patch series...

Honza

> Signed-off-by: Dmitry Monakhov <[email protected]>
> ---
> fs/ext4/inode.c | 6 +++---
> 1 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 942e183..2327f7a 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1851,6 +1851,7 @@ repeat:
>
> md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
> total = md_needed + nrblocks;
> + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
>
> /*
> * Make quota reservation here to prevent quota overflow
> @@ -1858,12 +1859,10 @@ repeat:
> * time.
> */
> if (vfs_dq_reserve_block(inode, total)) {
> - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> return -EDQUOT;
> }
>
> if (ext4_claim_free_blocks(sbi, total)) {
> - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> vfs_dq_release_reservation_block(inode, total);
> if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
> yield();
> @@ -1871,10 +1870,11 @@ repeat:
> }
> return -ENOSPC;
> }
> + spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
> EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
> -
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +
> return 0; /* success */
> }
>
> --
> 1.6.0.4
>
--
Jan Kara <[email protected]>
SUSE Labs, CR

2009-12-15 23:16:38

by Dmitry Monakhov

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix sleep inside spinlock issue aka #14739 V2

2009/12/16 Jan Kara <[email protected]>:
> On Thu 10-12-09 20:22:16, Dmitry Monakhov wrote:
>>
>> drop i_block_reservation_lock before vfs_dq_reserve_block().
>> this patch fix http://bugzilla.kernel.org/show_bug.cgi?id=14739
>>
>> changes from previous version:
>>  - simplify the patch according to Jan's comments
>  Dmitry, I suppose I should also merge this patch together with your other
> fixes, right? For some reason, it was not part of the last submission of
> your patch series...
yes. it is not in the series because it is another bug, and it is not
depended from
other patches.
Please merge newst version of the patch http://patchwork.ozlabs.org/patch/40896/
This version is affected by stupid bug, see at the bottom of the patch.
In fact this patch does help Justin Maggard to overcome his issue.

BTW i've also have another patch http://patchwork.ozlabs.org/patch/40805/
it was acked by you, but ASAIK it was't pushed to the ext4 queue yet.
>
>                                                                Honza
>
>> Signed-off-by: Dmitry Monakhov <[email protected]>
>> ---
>>  fs/ext4/inode.c |    6 +++---
>>  1 files changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
>> index 942e183..2327f7a 100644
>> --- a/fs/ext4/inode.c
>> +++ b/fs/ext4/inode.c
>> @@ -1851,6 +1851,7 @@ repeat:
>>
>>       md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
>>       total = md_needed + nrblocks;
>> +     spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
>>
>>       /*
>>        * Make quota reservation here to prevent quota overflow
>> @@ -1858,12 +1859,10 @@ repeat:
>>        * time.
>>        */
>>       if (vfs_dq_reserve_block(inode, total)) {
>> -             spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
>>               return -EDQUOT;
>>       }
>>
>>       if (ext4_claim_free_blocks(sbi, total)) {
>> -             spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
>>               vfs_dq_release_reservation_block(inode, total);
>>               if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
>>                       yield();
>> @@ -1871,10 +1870,11 @@ repeat:
>>               }
>>               return -ENOSPC;
>>       }
>> +     spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
>>       EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
>>       EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
meta_blocks may be changed after we dropped the lock, so we have to
use add here:
EXT4_I(inode)->i_reserved_meta_blocks += mb_needed;
I've overlooked this simple bug, and in fact i'm able to catch it only
with havy io load
./fsstress -p16 .....
>> -
>>       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
>> +
>>       return 0;       /* success */
>>  }
>>
>> --
>> 1.6.0.4
>>
> --
> Jan Kara <[email protected]>
> SUSE Labs, CR
>

2009-12-15 23:44:21

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix sleep inside spinlock issue aka #14739 V2

On Wed 16-12-09 02:16:36, Dmitry Monakhov wrote:
> 2009/12/16 Jan Kara <[email protected]>:
> > On Thu 10-12-09 20:22:16, Dmitry Monakhov wrote:
> >>
> >> drop i_block_reservation_lock before vfs_dq_reserve_block().
> >> this patch fix http://bugzilla.kernel.org/show_bug.cgi?id=14739
> >>
> >> changes from previous version:
> >> ?- simplify the patch according to Jan's comments
> > ?Dmitry, I suppose I should also merge this patch together with your other
> > fixes, right? For some reason, it was not part of the last submission of
> > your patch series...
> yes. it is not in the series because it is another bug, and it is not
> depended from
> other patches.
> Please merge newst version of the patch http://patchwork.ozlabs.org/patch/40896/
> This version is affected by stupid bug, see at the bottom of the patch.
> In fact this patch does help Justin Maggard to overcome his issue.
>
> BTW i've also have another patch http://patchwork.ozlabs.org/patch/40805/
> it was acked by you, but ASAIK it was't pushed to the ext4 queue yet.
OK, I've added both patches to my tree. Usually Ted takes care of ext4
fixes but given that I carry also other ext4 quota fixes, I think I can
merge these as well if he does not object.

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2009-12-18 10:02:47

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix sleep inside spinlock issue aka #14739

On Thu, Dec 10, 2009 at 05:15:53PM +0100, Jan Kara wrote:
> > drop i_block_reservation_lock before vfs_dq_reserve_block().
> > this patch fix http://bugzilla.kernel.org/show_bug.cgi?id=14739
> >
> > Signed-off-by: Dmitry Monakhov <[email protected]>
> Sorry if someone already refused this (I didn't follow the previous
> discussion too closely) but: Looking at the code I see no reason why
> ext4_claim_free_blocks needs i_block_reservation_lock. In fact mballoc
> calls this function without the lock. So could not we just compute
> 'total' under the lock, release it, reserve quota and then claim free
> blocks? You'd get rid of undoing the block reservation and obtain quota
> and blocks in the usual order...

The code is protecting i_reserved_meta_blocks. We are recalculating the
the value and need to make sure we don't get the value wrong. I guess
we need to hold i_block_reservation_lock while we recalculate the
meta data block.

-aneesh

2009-12-18 10:06:08

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix sleep inside spinlock issue aka #14739 V2

On Thu, Dec 10, 2009 at 08:22:16PM +0300, Dmitry Monakhov wrote:
>
> drop i_block_reservation_lock before vfs_dq_reserve_block().
> this patch fix http://bugzilla.kernel.org/show_bug.cgi?id=14739
>
> changes from previous version:
> - simplify the patch according to Jan's comments
>
> Signed-off-by: Dmitry Monakhov <[email protected]>
> ---
> fs/ext4/inode.c | 6 +++---
> 1 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 942e183..2327f7a 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1851,6 +1851,7 @@ repeat:
>
> md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
> total = md_needed + nrblocks;
> + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
>
> /*
> * Make quota reservation here to prevent quota overflow
> @@ -1858,12 +1859,10 @@ repeat:
> * time.
> */
> if (vfs_dq_reserve_block(inode, total)) {
> - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> return -EDQUOT;
> }
>
> if (ext4_claim_free_blocks(sbi, total)) {
> - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> vfs_dq_release_reservation_block(inode, total);
> if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
> yield();
> @@ -1871,10 +1870,11 @@ repeat:
> }
> return -ENOSPC;
> }
> + spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
> EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
> -
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +
> return 0; /* success */


NACK
I guess we would end up setting i_reserved_meta_blocks wrongly because
mdblocks could be based on the old value of i_reserved_meta_blocks
because we are dropping i_block_reservation_lock lock.

-aneesh

2009-12-19 15:11:53

by Dmitry Monakhov

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix sleep inside spinlock issue aka #14739 V2

2009/12/18 Aneesh Kumar K.V <[email protected]>:
> On Thu, Dec 10, 2009 at 08:22:16PM +0300, Dmitry Monakhov wrote:
>>
>> drop i_block_reservation_lock before vfs_dq_reserve_block().
>> this patch fix http://bugzilla.kernel.org/show_bug.cgi?id=14739
>>
>> changes from previous version:
>>  - simplify the patch according to Jan's comments
>>
>> Signed-off-by: Dmitry Monakhov <[email protected]>
>> ---
>>  fs/ext4/inode.c |    6 +++---
>>  1 files changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
>> index 942e183..2327f7a 100644
>> --- a/fs/ext4/inode.c
>> +++ b/fs/ext4/inode.c
>> @@ -1851,6 +1851,7 @@ repeat:
>>
>>       md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
>>       total = md_needed + nrblocks;
>> +     spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
>>
>>       /*
>>        * Make quota reservation here to prevent quota overflow
>> @@ -1858,12 +1859,10 @@ repeat:
>>        * time.
>>        */
>>       if (vfs_dq_reserve_block(inode, total)) {
>> -             spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
>>               return -EDQUOT;
>>       }
>>
>>       if (ext4_claim_free_blocks(sbi, total)) {
>> -             spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
>>               vfs_dq_release_reservation_block(inode, total);
>>               if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
>>                       yield();
>> @@ -1871,10 +1870,11 @@ repeat:
>>               }
>>               return -ENOSPC;
>>       }
>> +     spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
>>       EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
>>       EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
>> -
>>       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
>> +
>>       return 0;       /* success */
>
>
> NACK
> I guess we would end up setting i_reserved_meta_blocks wrongly because
> mdblocks could be based on the old value of i_reserved_meta_blocks
> because we are dropping i_block_reservation_lock lock.
Yes. you right. I've already fixed this.
Please take a look at committed version:
http://git.kernel.org/?p=linux/kernel/git/jack/linux-fs-2.6.git;a=commitdiff;h=ef22d6deda461ef32c72944f662863c022253571
>
> -aneesh
>