2009-01-06 04:40:55

by Mingming Cao

[permalink] [raw]
Subject: [PATCH V5 3/5]ext4: quota handling for delayed allocation

ext4: quota reservation for delayed allocation

Uses quota reservation/claim/release to handle quota properly for delayed
allocation in the three steps: 1) quotas are reserved when data being copied
to cache when block allocation is defered 2) when new blocks are allocated.
reserved quotas are converted to the real allocated quota, 2) over-booked
quotas for metadata blocks are released back.


Signed-off-by: Mingming Cao <[email protected]>
---
fs/ext4/ext4.h | 1 +
fs/ext4/inode.c | 36 +++++++++++++++++++++++++++++++++---
fs/ext4/mballoc.c | 44 ++++++++++++++++++++++++++------------------
fs/ext4/super.c | 4 ++++
4 files changed, 64 insertions(+), 21 deletions(-)

Index: linux-2.6.28-git7/fs/ext4/inode.c
===================================================================
--- linux-2.6.28-git7.orig/fs/ext4/inode.c 2009-01-05 17:45:01.000000000 -0800
+++ linux-2.6.28-git7/fs/ext4/inode.c 2009-01-05 17:45:30.000000000 -0800
@@ -973,6 +973,17 @@ out:
return err;
}

+unsigned long long ext4_get_reserved_space(struct inode *inode)
+{
+ unsigned long long total;
+
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ total = EXT4_I(inode)->i_reserved_data_blocks +
+ EXT4_I(inode)->i_reserved_meta_blocks;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
+ return total;
+}
/*
* Calculate the number of metadata blocks need to reserve
* to allocate @blocks for non extent file based file
@@ -1034,8 +1045,14 @@ static void ext4_da_update_reserve_space
/* update per-inode reservations */
BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
EXT4_I(inode)->i_reserved_data_blocks -= used;
-
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
+ /*
+ * free those over-booking quota for metadata blocks
+ */
+
+ if (mdb_free)
+ vfs_dq_release_reservation_block(inode, mdb_free);
}

/*
@@ -1547,8 +1564,8 @@ static int ext4_journalled_write_end(str
static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
{
int retries = 0;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- unsigned long md_needed, mdblocks, total = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ unsigned long md_needed, mdblocks, total = 0;

/*
* recalculate the amount of metadata blocks to reserve
@@ -1564,12 +1581,23 @@ repeat:
md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
total = md_needed + nrblocks;

+ /*
+ * Make quota reservation here to prevent quota overflow
+ * later. Real quota accounting is done at pages writeout
+ * time.
+ */
+ if (vfs_dq_reserve_block(inode, total)) {
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ return -EDQUOT;
+ }
+
if (ext4_claim_free_blocks(sbi, total)) {
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield();
goto repeat;
}
+ vfs_dq_release_reservation_block(inode, total);
return -ENOSPC;
}
EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
@@ -1623,6 +1651,8 @@ static void ext4_da_release_space(struct
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
EXT4_I(inode)->i_reserved_meta_blocks = mdb;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
+ vfs_dq_release_reservation_block(inode, release);
}

static void ext4_da_page_release_reservation(struct page *page,
Index: linux-2.6.28-git7/fs/ext4/super.c
===================================================================
--- linux-2.6.28-git7.orig/fs/ext4/super.c 2009-01-05 17:45:01.000000000 -0800
+++ linux-2.6.28-git7/fs/ext4/super.c 2009-01-05 17:45:30.000000000 -0800
@@ -948,6 +948,10 @@ static struct dquot_operations ext4_quot
.initialize = ext4_dquot_initialize,
.drop = ext4_dquot_drop,
.alloc_space = dquot_alloc_space,
+ .reserve_space = dquot_reserve_space,
+ .claim_space = dquot_claim_space,
+ .release_rsv = dquot_release_reserved_space,
+ .get_reserved_space = ext4_get_reserved_space,
.alloc_inode = dquot_alloc_inode,
.free_space = dquot_free_space,
.free_inode = dquot_free_inode,
Index: linux-2.6.28-git7/fs/ext4/mballoc.c
===================================================================
--- linux-2.6.28-git7.orig/fs/ext4/mballoc.c 2009-01-05 17:45:01.000000000 -0800
+++ linux-2.6.28-git7/fs/ext4/mballoc.c 2009-01-05 17:46:38.000000000 -0800
@@ -3086,9 +3086,12 @@ ext4_mb_mark_diskspace_used(struct ext4_
if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
/* release all the reserved blocks if non delalloc */
percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
- else
+ else {
percpu_counter_sub(&sbi->s_dirtyblocks_counter,
ac->ac_b_ex.fe_len);
+ /* convert reserved quota blocks to real quota blocks */
+ vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
+ }

if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -4533,7 +4536,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
struct ext4_sb_info *sbi;
struct super_block *sb;
ext4_fsblk_t block = 0;
- unsigned int inquota;
+ unsigned int inquota = 0;
unsigned int reserv_blks = 0;

sb = ar->inode->i_sb;
@@ -4551,9 +4554,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
(unsigned long long) ar->pleft,
(unsigned long long) ar->pright);

- if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
- /*
- * With delalloc we already reserved the blocks
+ /*
+ * For delayed allocation, we could skip the ENOSPC and
+ * EDQUOT check, as blocks and quotas have been already
+ * reserved when data being copied into pagecache.
+ */
+ if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+ ar->flags |= EXT4_MB_DELALLOC_RESERVED;
+ else {
+ /* Without delayed allocation we need to verify
+ * there is enough free blocks to do block allocation
+ * and verify allocation doesn't exceed the quota limits.
*/
while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
/* let others to free the space */
@@ -4565,19 +4576,16 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
return 0;
}
reserv_blks = ar->len;
+ while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
+ ar->flags |= EXT4_MB_HINT_NOPREALLOC;
+ ar->len--;
+ }
+ inquota = ar->len;
+ if (ar->len == 0) {
+ *errp = -EDQUOT;
+ goto out3;
+ }
}
- while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
- ar->flags |= EXT4_MB_HINT_NOPREALLOC;
- ar->len--;
- }
- if (ar->len == 0) {
- *errp = -EDQUOT;
- goto out3;
- }
- inquota = ar->len;
-
- if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
- ar->flags |= EXT4_MB_DELALLOC_RESERVED;

ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (!ac) {
@@ -4643,7 +4651,7 @@ repeat:
out2:
kmem_cache_free(ext4_ac_cachep, ac);
out1:
- if (ar->len < inquota)
+ if (inquota && ar->len < inquota)
DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
out3:
if (!ar->len) {
Index: linux-2.6.28-git7/fs/ext4/ext4.h
===================================================================
--- linux-2.6.28-git7.orig/fs/ext4/ext4.h 2009-01-05 17:45:01.000000000 -0800
+++ linux-2.6.28-git7/fs/ext4/ext4.h 2009-01-05 17:45:30.000000000 -0800
@@ -1124,6 +1124,7 @@ extern int ext4_chunk_trans_blocks(struc
extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
+extern unsigned long long ext4_get_reserved_space(struct inode *inode);

/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);




2009-01-06 09:35:30

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH V5 3/5]ext4: quota handling for delayed allocation

On Mon 05-01-09 20:40:55, Mingming Cao wrote:
> ext4: quota reservation for delayed allocation
>
> Uses quota reservation/claim/release to handle quota properly for delayed
> allocation in the three steps: 1) quotas are reserved when data being copied
> to cache when block allocation is defered 2) when new blocks are allocated.
> reserved quotas are converted to the real allocated quota, 2) over-booked
> quotas for metadata blocks are released back.
>
> Signed-off-by: Mingming Cao <[email protected]>
The patch looks fine as far as I can tell :). But I'm not to familiar
with the code so I won't add my Acked-by...

Honza
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/inode.c | 36 +++++++++++++++++++++++++++++++++---
> fs/ext4/mballoc.c | 44 ++++++++++++++++++++++++++------------------
> fs/ext4/super.c | 4 ++++
> 4 files changed, 64 insertions(+), 21 deletions(-)
>
> Index: linux-2.6.28-git7/fs/ext4/inode.c
> ===================================================================
> --- linux-2.6.28-git7.orig/fs/ext4/inode.c 2009-01-05 17:45:01.000000000 -0800
> +++ linux-2.6.28-git7/fs/ext4/inode.c 2009-01-05 17:45:30.000000000 -0800
> @@ -973,6 +973,17 @@ out:
> return err;
> }
>
> +unsigned long long ext4_get_reserved_space(struct inode *inode)
> +{
> + unsigned long long total;
> +
> + spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> + total = EXT4_I(inode)->i_reserved_data_blocks +
> + EXT4_I(inode)->i_reserved_meta_blocks;
> + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +
> + return total;
> +}
> /*
> * Calculate the number of metadata blocks need to reserve
> * to allocate @blocks for non extent file based file
> @@ -1034,8 +1045,14 @@ static void ext4_da_update_reserve_space
> /* update per-inode reservations */
> BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
> EXT4_I(inode)->i_reserved_data_blocks -= used;
> -
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +
> + /*
> + * free those over-booking quota for metadata blocks
> + */
> +
> + if (mdb_free)
> + vfs_dq_release_reservation_block(inode, mdb_free);
> }
>
> /*
> @@ -1547,8 +1564,8 @@ static int ext4_journalled_write_end(str
> static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
> {
> int retries = 0;
> - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> - unsigned long md_needed, mdblocks, total = 0;
> + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> + unsigned long md_needed, mdblocks, total = 0;
>
> /*
> * recalculate the amount of metadata blocks to reserve
> @@ -1564,12 +1581,23 @@ repeat:
> md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
> total = md_needed + nrblocks;
>
> + /*
> + * Make quota reservation here to prevent quota overflow
> + * later. Real quota accounting is done at pages writeout
> + * time.
> + */
> + if (vfs_dq_reserve_block(inode, total)) {
> + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> + return -EDQUOT;
> + }
> +
> if (ext4_claim_free_blocks(sbi, total)) {
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
> yield();
> goto repeat;
> }
> + vfs_dq_release_reservation_block(inode, total);
> return -ENOSPC;
> }
> EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
> @@ -1623,6 +1651,8 @@ static void ext4_da_release_space(struct
> BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
> EXT4_I(inode)->i_reserved_meta_blocks = mdb;
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +
> + vfs_dq_release_reservation_block(inode, release);
> }
>
> static void ext4_da_page_release_reservation(struct page *page,
> Index: linux-2.6.28-git7/fs/ext4/super.c
> ===================================================================
> --- linux-2.6.28-git7.orig/fs/ext4/super.c 2009-01-05 17:45:01.000000000 -0800
> +++ linux-2.6.28-git7/fs/ext4/super.c 2009-01-05 17:45:30.000000000 -0800
> @@ -948,6 +948,10 @@ static struct dquot_operations ext4_quot
> .initialize = ext4_dquot_initialize,
> .drop = ext4_dquot_drop,
> .alloc_space = dquot_alloc_space,
> + .reserve_space = dquot_reserve_space,
> + .claim_space = dquot_claim_space,
> + .release_rsv = dquot_release_reserved_space,
> + .get_reserved_space = ext4_get_reserved_space,
> .alloc_inode = dquot_alloc_inode,
> .free_space = dquot_free_space,
> .free_inode = dquot_free_inode,
> Index: linux-2.6.28-git7/fs/ext4/mballoc.c
> ===================================================================
> --- linux-2.6.28-git7.orig/fs/ext4/mballoc.c 2009-01-05 17:45:01.000000000 -0800
> +++ linux-2.6.28-git7/fs/ext4/mballoc.c 2009-01-05 17:46:38.000000000 -0800
> @@ -3086,9 +3086,12 @@ ext4_mb_mark_diskspace_used(struct ext4_
> if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
> /* release all the reserved blocks if non delalloc */
> percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
> - else
> + else {
> percpu_counter_sub(&sbi->s_dirtyblocks_counter,
> ac->ac_b_ex.fe_len);
> + /* convert reserved quota blocks to real quota blocks */
> + vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
> + }
>
> if (sbi->s_log_groups_per_flex) {
> ext4_group_t flex_group = ext4_flex_group(sbi,
> @@ -4533,7 +4536,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
> struct ext4_sb_info *sbi;
> struct super_block *sb;
> ext4_fsblk_t block = 0;
> - unsigned int inquota;
> + unsigned int inquota = 0;
> unsigned int reserv_blks = 0;
>
> sb = ar->inode->i_sb;
> @@ -4551,9 +4554,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
> (unsigned long long) ar->pleft,
> (unsigned long long) ar->pright);
>
> - if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
> - /*
> - * With delalloc we already reserved the blocks
> + /*
> + * For delayed allocation, we could skip the ENOSPC and
> + * EDQUOT check, as blocks and quotas have been already
> + * reserved when data being copied into pagecache.
> + */
> + if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
> + ar->flags |= EXT4_MB_DELALLOC_RESERVED;
> + else {
> + /* Without delayed allocation we need to verify
> + * there is enough free blocks to do block allocation
> + * and verify allocation doesn't exceed the quota limits.
> */
> while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
> /* let others to free the space */
> @@ -4565,19 +4576,16 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
> return 0;
> }
> reserv_blks = ar->len;
> + while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
> + ar->flags |= EXT4_MB_HINT_NOPREALLOC;
> + ar->len--;
> + }
> + inquota = ar->len;
> + if (ar->len == 0) {
> + *errp = -EDQUOT;
> + goto out3;
> + }
> }
> - while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
> - ar->flags |= EXT4_MB_HINT_NOPREALLOC;
> - ar->len--;
> - }
> - if (ar->len == 0) {
> - *errp = -EDQUOT;
> - goto out3;
> - }
> - inquota = ar->len;
> -
> - if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
> - ar->flags |= EXT4_MB_DELALLOC_RESERVED;
>
> ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
> if (!ac) {
> @@ -4643,7 +4651,7 @@ repeat:
> out2:
> kmem_cache_free(ext4_ac_cachep, ac);
> out1:
> - if (ar->len < inquota)
> + if (inquota && ar->len < inquota)
> DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
> out3:
> if (!ar->len) {
> Index: linux-2.6.28-git7/fs/ext4/ext4.h
> ===================================================================
> --- linux-2.6.28-git7.orig/fs/ext4/ext4.h 2009-01-05 17:45:01.000000000 -0800
> +++ linux-2.6.28-git7/fs/ext4/ext4.h 2009-01-05 17:45:30.000000000 -0800
> @@ -1124,6 +1124,7 @@ extern int ext4_chunk_trans_blocks(struc
> extern int ext4_block_truncate_page(handle_t *handle,
> struct address_space *mapping, loff_t from);
> extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
> +extern unsigned long long ext4_get_reserved_space(struct inode *inode);
>
> /* ioctl.c */
> extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
>
>
--
Jan Kara <[email protected]>
SUSE Labs, CR

2009-01-06 09:39:38

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH V5 3/5]ext4: quota handling for delayed allocation

On Tue 06-01-09 10:35:29, Jan Kara wrote:
> On Mon 05-01-09 20:40:55, Mingming Cao wrote:
> > ext4: quota reservation for delayed allocation
> >
> > Uses quota reservation/claim/release to handle quota properly for delayed
> > allocation in the three steps: 1) quotas are reserved when data being copied
> > to cache when block allocation is defered 2) when new blocks are allocated.
> > reserved quotas are converted to the real allocated quota, 2) over-booked
> > quotas for metadata blocks are released back.
> >
> > Signed-off-by: Mingming Cao <[email protected]>
> The patch looks fine as far as I can tell :). But I'm not to familiar
> with the code so I won't add my Acked-by...
On a second thought:

> > +unsigned long long ext4_get_reserved_space(struct inode *inode)
> > +{
> > + unsigned long long total;
> > +
> > + spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> > + total = EXT4_I(inode)->i_reserved_data_blocks +
> > + EXT4_I(inode)->i_reserved_meta_blocks;
> > + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> > +
> > + return total;
> > +}
Shouldn't we return here (total << inode->i_blksize)? Quota expects
bytes as output...

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2009-01-13 00:47:09

by Mingming Cao

[permalink] [raw]
Subject: Re: [PATCH V5 3/5]ext4: quota handling for delayed allocation


在 2009-01-06二的 10:39 +0100,Jan Kara写道:
> On Tue 06-01-09 10:35:29, Jan Kara wrote:
> > On Mon 05-01-09 20:40:55, Mingming Cao wrote:
> > > ext4: quota reservation for delayed allocation
> > >
> > > Uses quota reservation/claim/release to handle quota properly for delayed
> > > allocation in the three steps: 1) quotas are reserved when data being copied
> > > to cache when block allocation is defered 2) when new blocks are allocated.
> > > reserved quotas are converted to the real allocated quota, 2) over-booked
> > > quotas for metadata blocks are released back.
> > >
> > > Signed-off-by: Mingming Cao <[email protected]>
> > The patch looks fine as far as I can tell :). But I'm not to familiar
> > with the code so I won't add my Acked-by...
> On a second thought:
>
> > > +unsigned long long ext4_get_reserved_space(struct inode *inode)
> > > +{
> > > + unsigned long long total;
> > > +
> > > + spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> > > + total = EXT4_I(inode)->i_reserved_data_blocks +
> > > + EXT4_I(inode)->i_reserved_meta_blocks;
> > > + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> > > +
> > > + return total;
> > > +}
> Shouldn't we return here (total << inode->i_blksize)? Quota expects
> bytes as output...
>

This is updated ext4 part to return number of bytes reserved as output.

Thanks,
Mingming


Attachments:
ext4-delalloc-quota-spt.patch (7.32 kB)

2009-01-13 15:09:50

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH V5 3/5]ext4: quota handling for delayed allocation

On Mon 12-01-09 16:47:09, Mingming Cao wrote:
> 在 2009-01-06二的 10:39 +0100,Jan Kara写道:
> > On Tue 06-01-09 10:35:29, Jan Kara wrote:
> > > On Mon 05-01-09 20:40:55, Mingming Cao wrote:
> > > > ext4: quota reservation for delayed allocation
> > > >
> > > > Uses quota reservation/claim/release to handle quota properly for delayed
> > > > allocation in the three steps: 1) quotas are reserved when data being copied
> > > > to cache when block allocation is defered 2) when new blocks are allocated.
> > > > reserved quotas are converted to the real allocated quota, 2) over-booked
> > > > quotas for metadata blocks are released back.
> > > >
> > > > Signed-off-by: Mingming Cao <[email protected]>
> > > The patch looks fine as far as I can tell :). But I'm not to familiar
> > > with the code so I won't add my Acked-by...
> > On a second thought:
> >
> > > > +unsigned long long ext4_get_reserved_space(struct inode *inode)
> > > > +{
> > > > + unsigned long long total;
> > > > +
> > > > + spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> > > > + total = EXT4_I(inode)->i_reserved_data_blocks +
> > > > + EXT4_I(inode)->i_reserved_meta_blocks;
> > > > + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> > > > +
> > > > + return total;
> > > > +}
> > Shouldn't we return here (total << inode->i_blksize)? Quota expects
> > bytes as output...
> >
>
> This is updated ext4 part to return number of bytes reserved as output.
OK, it looks fine now.
Acked-by: Jan Kara <[email protected]>

Honza
>
> Thanks,
> Mingming

> ext4: quota reservation for delayed allocation
>
> Uses quota reservation/claim/release to handle quota properly for delayed
> allocation in the three steps: 1) quotas are reserved when data being copied
> to cache when block allocation is defered 2) when new blocks are allocated.
> reserved quotas are converted to the real allocated quota, 2) over-booked
> quotas for metadata blocks are released back.
>
>
> Signed-off-by: Mingming Cao <[email protected]>
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/inode.c | 36 +++++++++++++++++++++++++++++++++---
> fs/ext4/mballoc.c | 44 ++++++++++++++++++++++++++------------------
> fs/ext4/super.c | 4 ++++
> 4 files changed, 64 insertions(+), 21 deletions(-)
>
> Index: linux-2.6.29-rc1/fs/ext4/inode.c
> ===================================================================
> --- linux-2.6.29-rc1.orig/fs/ext4/inode.c 2009-01-10 15:43:05.000000000 -0800
> +++ linux-2.6.29-rc1/fs/ext4/inode.c 2009-01-12 16:31:34.000000000 -0800
> @@ -973,6 +973,17 @@ out:
> return err;
> }
>
> +qsize_t ext4_get_reserved_space(struct inode *inode)
> +{
> + unsigned long long total;
> +
> + spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> + total = EXT4_I(inode)->i_reserved_data_blocks +
> + EXT4_I(inode)->i_reserved_meta_blocks;
> + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +
> + return (qsize_t)total << inode->i_blkbits;
> +}
> /*
> * Calculate the number of metadata blocks need to reserve
> * to allocate @blocks for non extent file based file
> @@ -1034,8 +1045,14 @@ static void ext4_da_update_reserve_space
> /* update per-inode reservations */
> BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
> EXT4_I(inode)->i_reserved_data_blocks -= used;
> -
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +
> + /*
> + * free those over-booking quota for metadata blocks
> + */
> +
> + if (mdb_free)
> + vfs_dq_release_reservation_block(inode, mdb_free);
> }
>
> /*
> @@ -1547,8 +1564,8 @@ static int ext4_journalled_write_end(str
> static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
> {
> int retries = 0;
> - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> - unsigned long md_needed, mdblocks, total = 0;
> + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> + unsigned long md_needed, mdblocks, total = 0;
>
> /*
> * recalculate the amount of metadata blocks to reserve
> @@ -1564,12 +1581,23 @@ repeat:
> md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
> total = md_needed + nrblocks;
>
> + /*
> + * Make quota reservation here to prevent quota overflow
> + * later. Real quota accounting is done at pages writeout
> + * time.
> + */
> + if (vfs_dq_reserve_block(inode, total)) {
> + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> + return -EDQUOT;
> + }
> +
> if (ext4_claim_free_blocks(sbi, total)) {
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
> yield();
> goto repeat;
> }
> + vfs_dq_release_reservation_block(inode, total);
> return -ENOSPC;
> }
> EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
> @@ -1623,6 +1651,8 @@ static void ext4_da_release_space(struct
> BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
> EXT4_I(inode)->i_reserved_meta_blocks = mdb;
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +
> + vfs_dq_release_reservation_block(inode, release);
> }
>
> static void ext4_da_page_release_reservation(struct page *page,
> Index: linux-2.6.29-rc1/fs/ext4/super.c
> ===================================================================
> --- linux-2.6.29-rc1.orig/fs/ext4/super.c 2009-01-10 15:43:05.000000000 -0800
> +++ linux-2.6.29-rc1/fs/ext4/super.c 2009-01-12 16:28:44.000000000 -0800
> @@ -945,6 +945,10 @@ static struct dquot_operations ext4_quot
> .initialize = ext4_dquot_initialize,
> .drop = ext4_dquot_drop,
> .alloc_space = dquot_alloc_space,
> + .reserve_space = dquot_reserve_space,
> + .claim_space = dquot_claim_space,
> + .release_rsv = dquot_release_reserved_space,
> + .get_reserved_space = ext4_get_reserved_space,
> .alloc_inode = dquot_alloc_inode,
> .free_space = dquot_free_space,
> .free_inode = dquot_free_inode,
> Index: linux-2.6.29-rc1/fs/ext4/mballoc.c
> ===================================================================
> --- linux-2.6.29-rc1.orig/fs/ext4/mballoc.c 2009-01-10 15:43:05.000000000 -0800
> +++ linux-2.6.29-rc1/fs/ext4/mballoc.c 2009-01-12 16:28:44.000000000 -0800
> @@ -3086,9 +3086,12 @@ ext4_mb_mark_diskspace_used(struct ext4_
> if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
> /* release all the reserved blocks if non delalloc */
> percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
> - else
> + else {
> percpu_counter_sub(&sbi->s_dirtyblocks_counter,
> ac->ac_b_ex.fe_len);
> + /* convert reserved quota blocks to real quota blocks */
> + vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
> + }
>
> if (sbi->s_log_groups_per_flex) {
> ext4_group_t flex_group = ext4_flex_group(sbi,
> @@ -4533,7 +4536,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
> struct ext4_sb_info *sbi;
> struct super_block *sb;
> ext4_fsblk_t block = 0;
> - unsigned int inquota;
> + unsigned int inquota = 0;
> unsigned int reserv_blks = 0;
>
> sb = ar->inode->i_sb;
> @@ -4551,9 +4554,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
> (unsigned long long) ar->pleft,
> (unsigned long long) ar->pright);
>
> - if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
> - /*
> - * With delalloc we already reserved the blocks
> + /*
> + * For delayed allocation, we could skip the ENOSPC and
> + * EDQUOT check, as blocks and quotas have been already
> + * reserved when data being copied into pagecache.
> + */
> + if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
> + ar->flags |= EXT4_MB_DELALLOC_RESERVED;
> + else {
> + /* Without delayed allocation we need to verify
> + * there is enough free blocks to do block allocation
> + * and verify allocation doesn't exceed the quota limits.
> */
> while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
> /* let others to free the space */
> @@ -4565,19 +4576,16 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
> return 0;
> }
> reserv_blks = ar->len;
> + while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
> + ar->flags |= EXT4_MB_HINT_NOPREALLOC;
> + ar->len--;
> + }
> + inquota = ar->len;
> + if (ar->len == 0) {
> + *errp = -EDQUOT;
> + goto out3;
> + }
> }
> - while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
> - ar->flags |= EXT4_MB_HINT_NOPREALLOC;
> - ar->len--;
> - }
> - if (ar->len == 0) {
> - *errp = -EDQUOT;
> - goto out3;
> - }
> - inquota = ar->len;
> -
> - if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
> - ar->flags |= EXT4_MB_DELALLOC_RESERVED;
>
> ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
> if (!ac) {
> @@ -4643,7 +4651,7 @@ repeat:
> out2:
> kmem_cache_free(ext4_ac_cachep, ac);
> out1:
> - if (ar->len < inquota)
> + if (inquota && ar->len < inquota)
> DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
> out3:
> if (!ar->len) {
> Index: linux-2.6.29-rc1/fs/ext4/ext4.h
> ===================================================================
> --- linux-2.6.29-rc1.orig/fs/ext4/ext4.h 2009-01-10 15:43:05.000000000 -0800
> +++ linux-2.6.29-rc1/fs/ext4/ext4.h 2009-01-12 16:28:44.000000000 -0800
> @@ -1098,6 +1098,7 @@ extern int ext4_chunk_trans_blocks(struc
> extern int ext4_block_truncate_page(handle_t *handle,
> struct address_space *mapping, loff_t from);
> extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
> +extern qsize_t ext4_get_reserved_space(struct inode *inode);
>
> /* ioctl.c */
> extern long ext4_ioctl(struct file *, unsigned int, unsigned long);

--
Jan Kara <[email protected]>
SUSE Labs, CR