2009-12-09 02:11:12

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 1/3] [RFC] vfs: add generic reserved space management interface

Add new field "i_rsv_blocks" to generic inode. This value is
managed similar to i_blocks, i_bytes fileds (protected by i_lock).
This generic interface will be used by generic quota code similar
to i_blocks.


diff --git a/fs/inode.c b/fs/inode.c
index 4d8e3be..81eee5a 100644

Signed-off-by: Dmitry Monakhov <[email protected]>
---
fs/inode.c | 1 +
fs/stack.c | 1 +
fs/stat.c | 41 ++++++++++++++++++++++++++++++++++++++++-
include/linux/fs.h | 8 +++++++-
4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 4d8e3be..81eee5a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -143,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_gid = 0;
atomic_set(&inode->i_writecount, 0);
inode->i_size = 0;
+ inode->i_rsv_blocks = 0;
inode->i_blocks = 0;
inode->i_bytes = 0;
inode->i_generation = 0;
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6..b6c4aa7 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -11,6 +11,7 @@ void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
{
i_size_write(dst, i_size_read((struct inode *)src));
dst->i_blocks = src->i_blocks;
+ dst->i_rsv_blocks = src->i_rsv_blocks;
}
EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);

diff --git a/fs/stat.c b/fs/stat.c
index 075694e..ea55419 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -31,7 +31,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
stat->mtime = inode->i_mtime;
stat->ctime = inode->i_ctime;
stat->size = i_size_read(inode);
- stat->blocks = inode->i_blocks;
+ stat->blocks = inode->i_blocks + inode->i_rsv_blocks;
stat->blksize = (1 << inode->i_blkbits);
}

@@ -452,3 +452,42 @@ void inode_set_bytes(struct inode *inode, loff_t bytes)
}

EXPORT_SYMBOL(inode_set_bytes);
+
+void inode_add_rsv_blocks(struct inode *inode, blkcnt_t blocks)
+{
+ spin_lock(&inode->i_lock);
+ inode->i_rsv_blocks += blocks;
+ spin_unlock(&inode->i_lock);
+}
+
+EXPORT_SYMBOL(inode_add_rsv_blocks);
+
+void inode_claim_rsv_blocks(struct inode *inode, blkcnt_t blocks)
+{
+ spin_lock(&inode->i_lock);
+ inode->i_rsv_blocks -= blocks;
+ inode->i_blocks += blocks;
+ spin_unlock(&inode->i_lock);
+}
+
+EXPORT_SYMBOL(inode_claim_rsv_blocks);
+
+void inode_sub_rsv_blocks(struct inode *inode, blkcnt_t blocks)
+{
+ spin_lock(&inode->i_lock);
+ inode->i_rsv_blocks -= blocks;
+ spin_unlock(&inode->i_lock);
+}
+
+EXPORT_SYMBOL(inode_sub_rsv_blocks);
+
+blkcnt_t inode_get_rsv_blocks(struct inode *inode)
+{
+ blkcnt_t ret;
+ spin_lock(&inode->i_lock);
+ ret = inode->i_rsv_blocks;
+ spin_unlock(&inode->i_lock);
+ return ret;
+}
+
+EXPORT_SYMBOL(inode_get_rsv_blocks);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2620a8c..af19bdb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -736,10 +736,12 @@ struct inode {
struct timespec i_mtime;
struct timespec i_ctime;
blkcnt_t i_blocks;
+ blkcnt_t i_rsv_blocks;
unsigned int i_blkbits;
unsigned short i_bytes;
umode_t i_mode;
- spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
+ spinlock_t i_lock; /* i_blocks, i_bytes, i_rsv_blocks
+ maybe i_size */
struct mutex i_mutex;
struct rw_semaphore i_alloc_sem;
const struct inode_operations *i_op;
@@ -2318,6 +2320,10 @@ void inode_add_bytes(struct inode *inode, loff_t bytes);
void inode_sub_bytes(struct inode *inode, loff_t bytes);
loff_t inode_get_bytes(struct inode *inode);
void inode_set_bytes(struct inode *inode, loff_t bytes);
+void inode_add_rsv_blocks(struct inode *inode, blkcnt_t blocks);
+void inode_claim_rsv_blocks(struct inode *inode, blkcnt_t blocks);
+void inode_sub_rsv_blocks(struct inode *inode, blkcnt_t blocks);
+blkcnt_t inode_get_rsv_blocks(struct inode *inode);

extern int vfs_readdir(struct file *, filldir_t, void *);

--
1.6.0.4



2009-12-09 02:11:13

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 2/3] quota: convert to generic reserved space management

Before this patch fs_rsv_space must be always equals to
quot_rsv_space all the time. Otherwise dquot_transfer()
will result in incorrect quota(because fs_rsv_space is used
on transfer). This result in complex locking order issues
aka http://bugzilla.kernel.org/show_bug.cgi?id=14739
After this patch quot_rsv_space will be transferred on
dquot_transfer(), this automatically solves locking issues.

- Unify dquot_reserve_space() and dquot_reserve_space()
- Unify dquot_release_reserved_space() and dquot_free_space()
- Also this patch add missing warning update to release_rsv()
dquot_release_reserved_space() must call flush_warnings() as
dquot_free_space() does.

Signed-off-by: Dmitry Monakhov <[email protected]>
---
fs/quota/dquot.c | 169 +++++++++++++++++++++---------------------------
include/linux/quota.h | 2 -
2 files changed, 74 insertions(+), 97 deletions(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 39b49c4..4bc5ac8 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1396,6 +1396,22 @@ EXPORT_SYMBOL(vfs_dq_drop);
* inode write go into the same transaction.
*/

+static void __inode_add_bytes(struct inode *inode, qsize_t number, int reserve)
+{
+ if (reserve)
+ inode_add_rsv_blocks(inode, number >> 9);
+ else
+ inode_add_bytes(inode, number);
+}
+
+static void __inode_sub_bytes(struct inode *inode, qsize_t number, int reserve)
+{
+ if (reserve)
+ inode_sub_rsv_blocks(inode, number >> 9);
+ else
+ inode_sub_bytes(inode, number);
+}
+
/*
* This operation can block, but only after everything is updated
*/
@@ -1405,6 +1421,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
int cnt, ret = QUOTA_OK;
char warntype[MAXQUOTAS];

+ /*
+ * First test before acquiring mutex - solves deadlocks when we
+ * re-enter the quota code and are already holding the mutex
+ */
+ if (IS_NOQUOTA(inode)) {
+ __inode_add_bytes(inode, number, reserve);
+ goto out;
+ }
+
+ down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ if (IS_NOQUOTA(inode)) {
+ __inode_add_bytes(inode, number, reserve);
+ goto out_unlock;
+ }
+
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warntype[cnt] = QUOTA_NL_NOWARN;

@@ -1415,7 +1446,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
== NO_QUOTA) {
ret = NO_QUOTA;
- goto out_unlock;
+ goto out_data_unlock;
}
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1426,64 +1457,32 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
else
dquot_incr_space(inode->i_dquot[cnt], number);
}
- if (!reserve)
- inode_add_bytes(inode, number);
-out_unlock:
- spin_unlock(&dq_data_lock);
- flush_warnings(inode->i_dquot, warntype);
- return ret;
-}
-
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
-{
- int cnt, ret = QUOTA_OK;
-
- /*
- * First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex
- */
- if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
- goto out;
- }
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
- goto out_unlock;
- }
-
- ret = __dquot_alloc_space(inode, number, warn, 0);
- if (ret == NO_QUOTA)
- goto out_unlock;
+ __inode_add_bytes(inode, number, reserve);

+ if (reserve)
+ goto out_data_unlock;
/* Dirtify all the dquots - this can block when journalling */
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (inode->i_dquot[cnt])
mark_dquot_dirty(inode->i_dquot[cnt]);
+out_data_unlock:
+ spin_unlock(&dq_data_lock);
+ flush_warnings(inode->i_dquot, warntype);
out_unlock:
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
out:
return ret;
}
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
+{
+ return __dquot_alloc_space(inode, number, warn, 0);
+}
EXPORT_SYMBOL(dquot_alloc_space);

int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
{
- int ret = QUOTA_OK;
-
- if (IS_NOQUOTA(inode))
- goto out;
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode))
- goto out_unlock;
-
- ret = __dquot_alloc_space(inode, number, warn, 1);
-out_unlock:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
- return ret;
+ return __dquot_alloc_space(inode, number, warn, 1);
}
EXPORT_SYMBOL(dquot_reserve_space);

@@ -1540,14 +1539,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
int ret = QUOTA_OK;

if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
+ inode_claim_rsv_blocks(inode, number >> 9);
goto out;
}

down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
if (IS_NOQUOTA(inode)) {
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- inode_add_bytes(inode, number);
+ inode_claim_rsv_blocks(inode, number >> 9);
goto out;
}

@@ -1559,7 +1558,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
number);
}
/* Update inode bytes */
- inode_add_bytes(inode, number);
+ inode_claim_rsv_blocks(inode, number >> 9);
spin_unlock(&dq_data_lock);
/* Dirtify all the dquots - this can block when journalling */
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
@@ -1572,38 +1571,9 @@ out:
EXPORT_SYMBOL(dquot_claim_space);

/*
- * Release reserved quota space
- */
-void dquot_release_reserved_space(struct inode *inode, qsize_t number)
-{
- int cnt;
-
- if (IS_NOQUOTA(inode))
- goto out;
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode))
- goto out_unlock;
-
- spin_lock(&dq_data_lock);
- /* Release reserved dquots */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt])
- dquot_free_reserved_space(inode->i_dquot[cnt], number);
- }
- spin_unlock(&dq_data_lock);
-
-out_unlock:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
- return;
-}
-EXPORT_SYMBOL(dquot_release_reserved_space);
-
-/*
* This operation can block, but only after everything is updated
*/
-int dquot_free_space(struct inode *inode, qsize_t number)
+int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
{
unsigned int cnt;
char warntype[MAXQUOTAS];
@@ -1612,7 +1582,7 @@ int dquot_free_space(struct inode *inode, qsize_t number)
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode)) {
out_sub:
- inode_sub_bytes(inode, number);
+ __inode_sub_bytes(inode, number, reserve);
return QUOTA_OK;
}

@@ -1627,21 +1597,43 @@ out_sub:
if (!inode->i_dquot[cnt])
continue;
warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
- dquot_decr_space(inode->i_dquot[cnt], number);
+ if (reserve)
+ dquot_free_reserved_space(inode->i_dquot[cnt], number);
+ else
+ dquot_decr_space(inode->i_dquot[cnt], number);
}
- inode_sub_bytes(inode, number);
+ __inode_sub_bytes(inode, number, reserve);
spin_unlock(&dq_data_lock);
+
+ if (reserve)
+ goto out_unlock;
/* Dirtify all the dquots - this can block when journalling */
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (inode->i_dquot[cnt])
mark_dquot_dirty(inode->i_dquot[cnt]);
+out_unlock:
flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
+
+int dquot_free_space(struct inode *inode, qsize_t number)
+{
+ return __dquot_free_space(inode, number, 0);
+}
EXPORT_SYMBOL(dquot_free_space);

/*
+ * Release reserved quota space
+ */
+void dquot_release_reserved_space(struct inode *inode, qsize_t number)
+{
+ return (void )__dquot_free_space(inode, number, 1);
+
+}
+EXPORT_SYMBOL(dquot_release_reserved_space);
+
+/*
* This operation can block, but only after everything is updated
*/
int dquot_free_inode(const struct inode *inode, qsize_t number)
@@ -1679,19 +1671,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
EXPORT_SYMBOL(dquot_free_inode);

/*
- * call back function, get reserved quota space from underlying fs
- */
-qsize_t dquot_get_reserved_space(struct inode *inode)
-{
- qsize_t reserved_space = 0;
-
- if (sb_any_quota_active(inode->i_sb) &&
- inode->i_sb->dq_op->get_reserved_space)
- reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
- return reserved_space;
-}
-
-/*
* Transfer the number of inode and blocks from one diskquota to an other.
*
* This operation can block, but only after everything is updated
@@ -1734,7 +1713,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
}
spin_lock(&dq_data_lock);
cur_space = inode_get_bytes(inode);
- rsv_space = dquot_get_reserved_space(inode);
+ rsv_space = inode_get_rsv_blocks(inode) << 9;
space = cur_space + rsv_space;
/* Build the transfer_from list and check the limits */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 78c4889..daa6274 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -313,8 +313,6 @@ struct dquot_operations {
int (*claim_space) (struct inode *, qsize_t);
/* release rsved quota for delayed alloc */
void (*release_rsv) (struct inode *, qsize_t);
- /* get reserved quota for delayed alloc */
- qsize_t (*get_reserved_space) (struct inode *);
};

/* Operations handling requests from userspace */
--
1.6.0.4


2009-12-09 02:11:14

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 3/3] ext4: Convert to generic reserved quota's space management.

- ext4_getattr() not longer required
- ext4_get_reserved_space() not longer required
- drop i_block_reservation_lock before vfs_dq_reserve_block().
this fix http://bugzilla.kernel.org/show_bug.cgi?id=14739diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 26d3cf8..6198751 100644

Signed-off-by: Dmitry Monakhov <[email protected]>
---
fs/ext4/ext4.h | 2 -
fs/ext4/file.c | 1 -
fs/ext4/inode.c | 67 +++++++++++++-----------------------------------------
fs/ext4/super.c | 1 -
4 files changed, 16 insertions(+), 55 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 26d3cf8..6198751 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1407,8 +1407,6 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
extern struct inode *ext4_iget(struct super_block *, unsigned long);
extern int ext4_write_inode(struct inode *, int);
extern int ext4_setattr(struct dentry *, struct iattr *);
-extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat);
extern void ext4_delete_inode(struct inode *);
extern int ext4_sync_inode(handle_t *, struct inode *);
extern void ext4_dirty_inode(struct inode *);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 9630583..36a75e7 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -151,7 +151,6 @@ const struct file_operations ext4_file_operations = {
const struct inode_operations ext4_file_inode_operations = {
.truncate = ext4_truncate,
.setattr = ext4_setattr,
- .getattr = ext4_getattr,
#ifdef CONFIG_EXT4_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cc4737e..9a27505 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1048,17 +1048,6 @@ out:
return err;
}

-qsize_t ext4_get_reserved_space(struct inode *inode)
-{
- unsigned long long total;
-
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- total = EXT4_I(inode)->i_reserved_data_blocks +
- EXT4_I(inode)->i_reserved_meta_blocks;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-
- return (total << inode->i_blkbits);
-}
/*
* Calculate the number of metadata blocks need to reserve
* to allocate @blocks for non extent file based file
@@ -1852,19 +1841,8 @@ repeat:
md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
total = md_needed + nrblocks;

- /*
- * Make quota reservation here to prevent quota overflow
- * later. Real quota accounting is done at pages writeout
- * time.
- */
- if (vfs_dq_reserve_block(inode, total)) {
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- return -EDQUOT;
- }
-
if (ext4_claim_free_blocks(sbi, total)) {
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- vfs_dq_release_reservation_block(inode, total);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield();
goto repeat;
@@ -1872,10 +1850,24 @@ repeat:
return -ENOSPC;
}
EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
- EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
+ EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
+ /*
+ * Make quota reservation here to prevent quota overflow
+ * later. Real quota accounting is done at pages writeout
+ * time.
+ */
+ if (!vfs_dq_reserve_block(inode, total))
+ return 0; /* success */

+ /* Quota reservation has failed, revert inode's reservation */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, total);
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ EXT4_I(inode)->i_reserved_data_blocks -= nrblocks;
+ EXT4_I(inode)->i_reserved_meta_blocks -= md_needed;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- return 0; /* success */
+ return -EDQUOT;
}

static void ext4_da_release_space(struct inode *inode, int to_free)
@@ -5511,33 +5503,6 @@ err_out:
return error;
}

-int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
-{
- struct inode *inode;
- unsigned long delalloc_blocks;
-
- inode = dentry->d_inode;
- generic_fillattr(inode, stat);
-
- /*
- * We can't update i_blocks if the block allocation is delayed
- * otherwise in the case of system crash before the real block
- * allocation is done, we will have i_blocks inconsistent with
- * on-disk file blocks.
- * We always keep i_blocks updated together with real
- * allocation. But to not confuse with user, stat
- * will return the blocks that include the delayed allocation
- * blocks for this file.
- */
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-
- stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
- return 0;
-}

2009-12-09 10:45:42

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/3] [RFC] vfs: add generic reserved space management interface

On Wed, Dec 09, 2009 at 05:11:24AM +0300, Dmitry Monakhov wrote:
> Add new field "i_rsv_blocks" to generic inode. This value is
> managed similar to i_blocks, i_bytes fileds (protected by i_lock).
> This generic interface will be used by generic quota code similar
> to i_blocks.

Please don't bloat the VFS inode for this information.


2009-12-09 11:08:36

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/3] [RFC] vfs: add generic reserved space management interface

On Wed 09-12-09 05:11:24, Dmitry Monakhov wrote:
> Add new field "i_rsv_blocks" to generic inode. This value is
> managed similar to i_blocks, i_bytes fileds (protected by i_lock).
> This generic interface will be used by generic quota code similar
> to i_blocks.
I guess some people won't like bloating the VFS inode. It personally
makes my "quota" life easier but not that many filesystems need this
(currently I'm aware of only ext4) so their complaints are reasonable...
If we would eventually decide to go this way, I'd account reserved
space in bytes - quota is really accounted in bytes (as some filesystems
need this).

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2009-12-09 14:24:59

by Dmitry Monakhov

[permalink] [raw]
Subject: Re: [PATCH 1/3] [RFC] vfs: add generic reserved space management interface

Christoph Hellwig <[email protected]> writes:

> On Wed, Dec 09, 2009 at 05:11:24AM +0300, Dmitry Monakhov wrote:
>> Add new field "i_rsv_blocks" to generic inode. This value is
>> managed similar to i_blocks, i_bytes fileds (protected by i_lock).
>> This generic interface will be used by generic quota code similar
>> to i_blocks.
>
> Please don't bloat the VFS inode for this information.
But almost all recent file systems using extends and delayed allocation.
This means that we have to think about generic space reservation
management quota interface. Otherwise each fs will invent it's own
callbacks. What do you think about hide it with CONFIG_QUOTA option?
May be introduce new CONFIG_QUOTA_RESERVATION?

2009-12-09 14:51:17

by Dmitry Monakhov

[permalink] [raw]
Subject: Re: [PATCH 1/3] [RFC] vfs: add generic reserved space management interface

Jan Kara <[email protected]> writes:

> On Wed 09-12-09 05:11:24, Dmitry Monakhov wrote:
>> Add new field "i_rsv_blocks" to generic inode. This value is
>> managed similar to i_blocks, i_bytes fileds (protected by i_lock).
>> This generic interface will be used by generic quota code similar
>> to i_blocks.
> I guess some people won't like bloating the VFS inode. It personally
> makes my "quota" life easier but not that many filesystems need this
> (currently I'm aware of only ext4) so their complaints are reasonable...
Please read my answer to Christoph.
>From other point of view, we may change inode structure like this:
struct quota_ptr
{
struct dquot *dquot[MAXQUOTAS];
};
struct quota_rsv_ptr
{
struct dquot *dquot[MAXQUOTAS];
qsize_t reservation;
};
struct inode {
....
#ifdef CONFIG_QUOTA
union {
struct quota_ptr i_dquot;
struct quota_rsv_ptr i_dquot_rsv;
};
#endif
....
};
Most file systems will use i_dquot, and file systems with reservation
will use quota_rsv_ptr.
Imho this is even better than, macro approach. But this requires
huge patch for each fs, but who cares.
> If we would eventually decide to go this way, I'd account reserved
> space in bytes - quota is really accounted in bytes (as some filesystems
> need this).
Yes you right, I wasn't happy about this too, Will redo.
>
> Honza

2009-12-09 17:00:19

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH 1/3] [RFC] vfs: add generic reserved space management interface

On Wed, Dec 09, 2009 at 05:51:17PM +0300, Dmitry Monakhov wrote:
> >From other point of view, we may change inode structure like this:
> struct quota_ptr
> {
> struct dquot *dquot[MAXQUOTAS];
> };
> struct quota_rsv_ptr
> {
> struct dquot *dquot[MAXQUOTAS];
> qsize_t reservation;
> };
> struct inode {
> ....
> #ifdef CONFIG_QUOTA
> union {
> struct quota_ptr i_dquot;
> struct quota_rsv_ptr i_dquot_rsv;
> };
> #endif
> ....
> };

What's wrong with:

#ifdef CONFIG_QUOTA
union {
struct dquot *i_dquot[MAXQUOTAS];
struct quota_rsv_ptr i_dquot_rsv;
};
#endif


voila, no changes needed.

--
Matthew Wilcox Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."

2009-12-10 00:16:48

by Mingming Cao

[permalink] [raw]
Subject: Re: [PATCH 3/3] ext4: Convert to generic reserved quota's space management.

On Wed, 2009-12-09 at 05:11 +0300, Dmitry Monakhov wrote:
> - ext4_getattr() not longer required
> - ext4_get_reserved_space() not longer required
> - drop i_block_reservation_lock before vfs_dq_reserve_block().
> this fix http://bugzilla.kernel.org/show_bug.cgi?id=14739diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 26d3cf8..6198751 100644
>

How about split this patch to just drop i_block_reservation_lock before
vfs_dq_reserve_block()? This deadlock could be fixed right away without
depending on the rest of the VFS changes you proposed.

Mingming
> Signed-off-by: Dmitry Monakhov <[email protected]>
> ---
> fs/ext4/ext4.h | 2 -
> fs/ext4/file.c | 1 -
> fs/ext4/inode.c | 67 +++++++++++++-----------------------------------------
> fs/ext4/super.c | 1 -
> 4 files changed, 16 insertions(+), 55 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 26d3cf8..6198751 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1407,8 +1407,6 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
> extern struct inode *ext4_iget(struct super_block *, unsigned long);
> extern int ext4_write_inode(struct inode *, int);
> extern int ext4_setattr(struct dentry *, struct iattr *);
> -extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
> - struct kstat *stat);
> extern void ext4_delete_inode(struct inode *);
> extern int ext4_sync_inode(handle_t *, struct inode *);
> extern void ext4_dirty_inode(struct inode *);
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index 9630583..36a75e7 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -151,7 +151,6 @@ const struct file_operations ext4_file_operations = {
> const struct inode_operations ext4_file_inode_operations = {
> .truncate = ext4_truncate,
> .setattr = ext4_setattr,
> - .getattr = ext4_getattr,
> #ifdef CONFIG_EXT4_FS_XATTR
> .setxattr = generic_setxattr,
> .getxattr = generic_getxattr,
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index cc4737e..9a27505 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1048,17 +1048,6 @@ out:
> return err;
> }
>
> -qsize_t ext4_get_reserved_space(struct inode *inode)
> -{
> - unsigned long long total;
> -
> - spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> - total = EXT4_I(inode)->i_reserved_data_blocks +
> - EXT4_I(inode)->i_reserved_meta_blocks;
> - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> -
> - return (total << inode->i_blkbits);
> -}
> /*
> * Calculate the number of metadata blocks need to reserve
> * to allocate @blocks for non extent file based file
> @@ -1852,19 +1841,8 @@ repeat:
> md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
> total = md_needed + nrblocks;
>
> - /*
> - * Make quota reservation here to prevent quota overflow
> - * later. Real quota accounting is done at pages writeout
> - * time.
> - */
> - if (vfs_dq_reserve_block(inode, total)) {
> - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> - return -EDQUOT;
> - }
> -
> if (ext4_claim_free_blocks(sbi, total)) {
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> - vfs_dq_release_reservation_block(inode, total);
> if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
> yield();
> goto repeat;
> @@ -1872,10 +1850,24 @@ repeat:
> return -ENOSPC;
> }
> EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
> - EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
> + EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
> + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +
> + /*
> + * Make quota reservation here to prevent quota overflow
> + * later. Real quota accounting is done at pages writeout
> + * time.
> + */
> + if (!vfs_dq_reserve_block(inode, total))
> + return 0; /* success */
>
> + /* Quota reservation has failed, revert inode's reservation */
> + percpu_counter_sub(&sbi->s_dirtyblocks_counter, total);
> + spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> + EXT4_I(inode)->i_reserved_data_blocks -= nrblocks;
> + EXT4_I(inode)->i_reserved_meta_blocks -= md_needed;
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> - return 0; /* success */
> + return -EDQUOT;
> }
>
> static void ext4_da_release_space(struct inode *inode, int to_free)
> @@ -5511,33 +5503,6 @@ err_out:
> return error;
> }
>
> -int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
> - struct kstat *stat)
> -{
> - struct inode *inode;
> - unsigned long delalloc_blocks;
> -
> - inode = dentry->d_inode;
> - generic_fillattr(inode, stat);
> -
> - /*
> - * We can't update i_blocks if the block allocation is delayed
> - * otherwise in the case of system crash before the real block
> - * allocation is done, we will have i_blocks inconsistent with
> - * on-disk file blocks.
> - * We always keep i_blocks updated together with real
> - * allocation. But to not confuse with user, stat
> - * will return the blocks that include the delayed allocation
> - * blocks for this file.
> - */
> - spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> - delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
> - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> -
> - stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
> - return 0;
> -}
> -
> static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
> int chunk)
> {
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 4c87d97..d87c7f7 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1007,7 +1007,6 @@ static const struct dquot_operations ext4_quota_operations = {
> .reserve_space = dquot_reserve_space,
> .claim_space = dquot_claim_space,
> .release_rsv = dquot_release_reserved_space,
> - .get_reserved_space = ext4_get_reserved_space,
> .alloc_inode = dquot_alloc_inode,
> .free_space = dquot_free_space,
> .free_inode = dquot_free_inode,



2009-12-10 00:19:39

by Mingming Cao

[permalink] [raw]
Subject: Re: [PATCH 2/3] quota: convert to generic reserved space management

On Wed, 2009-12-09 at 05:11 +0300, Dmitry Monakhov wrote:
> Before this patch fs_rsv_space must be always equals to
> quot_rsv_space all the time. Otherwise dquot_transfer()
> will result in incorrect quota(because fs_rsv_space is used
> on transfer).

By incorrect quota I assume you mean nagetive reserved quota?

I have asked Jan before whether we should transfer the reserved
quota...I still not quit sure..in fact the more I am thinking, I think
the reserved quota should not transfered until they are claimed.

I am going to cook a patch for this...

After that patch, we shall not see the negative quota reservation
values.

> This result in complex locking order issues
> aka http://bugzilla.kernel.org/show_bug.cgi?id=14739
> After this patch quot_rsv_space will be transferred on
> dquot_transfer(), this automatically solves locking issues.
>

This bug could be easily fixed by just drop the i_block_reservation lock
before release reserved quota.

> - Unify dquot_reserve_space() and dquot_reserve_space()
> - Unify dquot_release_reserved_space() and dquot_free_space()
> - Also this patch add missing warning update to release_rsv()
> dquot_release_reserved_space() must call flush_warnings() as
> dquot_free_space() does.
>
> Signed-off-by: Dmitry Monakhov <[email protected]>
> ---
> fs/quota/dquot.c | 169 +++++++++++++++++++++---------------------------
> include/linux/quota.h | 2 -
> 2 files changed, 74 insertions(+), 97 deletions(-)
>
> diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
> index 39b49c4..4bc5ac8 100644
> --- a/fs/quota/dquot.c
> +++ b/fs/quota/dquot.c
> @@ -1396,6 +1396,22 @@ EXPORT_SYMBOL(vfs_dq_drop);
> * inode write go into the same transaction.
> */
>
> +static void __inode_add_bytes(struct inode *inode, qsize_t number, int reserve)
> +{
> + if (reserve)
> + inode_add_rsv_blocks(inode, number >> 9);
> + else
> + inode_add_bytes(inode, number);
> +}
> +
> +static void __inode_sub_bytes(struct inode *inode, qsize_t number, int reserve)
> +{
> + if (reserve)
> + inode_sub_rsv_blocks(inode, number >> 9);
> + else
> + inode_sub_bytes(inode, number);
> +}
> +
> /*
> * This operation can block, but only after everything is updated
> */
> @@ -1405,6 +1421,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
> int cnt, ret = QUOTA_OK;
> char warntype[MAXQUOTAS];
>
> + /*
> + * First test before acquiring mutex - solves deadlocks when we
> + * re-enter the quota code and are already holding the mutex
> + */
> + if (IS_NOQUOTA(inode)) {
> + __inode_add_bytes(inode, number, reserve);
> + goto out;
> + }
> +
> + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> + if (IS_NOQUOTA(inode)) {
> + __inode_add_bytes(inode, number, reserve);
> + goto out_unlock;
> + }
> +
> for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> warntype[cnt] = QUOTA_NL_NOWARN;
>
> @@ -1415,7 +1446,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
> if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
> == NO_QUOTA) {
> ret = NO_QUOTA;
> - goto out_unlock;
> + goto out_data_unlock;
> }
> }
> for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> @@ -1426,64 +1457,32 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
> else
> dquot_incr_space(inode->i_dquot[cnt], number);
> }
> - if (!reserve)
> - inode_add_bytes(inode, number);
> -out_unlock:
> - spin_unlock(&dq_data_lock);
> - flush_warnings(inode->i_dquot, warntype);
> - return ret;
> -}
> -
> -int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
> -{
> - int cnt, ret = QUOTA_OK;
> -
> - /*
> - * First test before acquiring mutex - solves deadlocks when we
> - * re-enter the quota code and are already holding the mutex
> - */
> - if (IS_NOQUOTA(inode)) {
> - inode_add_bytes(inode, number);
> - goto out;
> - }
> -
> - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> - if (IS_NOQUOTA(inode)) {
> - inode_add_bytes(inode, number);
> - goto out_unlock;
> - }
> -
> - ret = __dquot_alloc_space(inode, number, warn, 0);
> - if (ret == NO_QUOTA)
> - goto out_unlock;
> + __inode_add_bytes(inode, number, reserve);
>
> + if (reserve)
> + goto out_data_unlock;
> /* Dirtify all the dquots - this can block when journalling */
> for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> if (inode->i_dquot[cnt])
> mark_dquot_dirty(inode->i_dquot[cnt]);
> +out_data_unlock:
> + spin_unlock(&dq_data_lock);
> + flush_warnings(inode->i_dquot, warntype);
> out_unlock:
> up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> out:
> return ret;
> }
> +
> +int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
> +{
> + return __dquot_alloc_space(inode, number, warn, 0);
> +}
> EXPORT_SYMBOL(dquot_alloc_space);
>
> int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
> {
> - int ret = QUOTA_OK;
> -
> - if (IS_NOQUOTA(inode))
> - goto out;
> -
> - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> - if (IS_NOQUOTA(inode))
> - goto out_unlock;
> -
> - ret = __dquot_alloc_space(inode, number, warn, 1);
> -out_unlock:
> - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> -out:
> - return ret;
> + return __dquot_alloc_space(inode, number, warn, 1);
> }
> EXPORT_SYMBOL(dquot_reserve_space);
>
> @@ -1540,14 +1539,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
> int ret = QUOTA_OK;
>
> if (IS_NOQUOTA(inode)) {
> - inode_add_bytes(inode, number);
> + inode_claim_rsv_blocks(inode, number >> 9);
> goto out;
> }
>
> down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> if (IS_NOQUOTA(inode)) {
> up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> - inode_add_bytes(inode, number);
> + inode_claim_rsv_blocks(inode, number >> 9);
> goto out;
> }
>
> @@ -1559,7 +1558,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
> number);
> }
> /* Update inode bytes */
> - inode_add_bytes(inode, number);
> + inode_claim_rsv_blocks(inode, number >> 9);
> spin_unlock(&dq_data_lock);
> /* Dirtify all the dquots - this can block when journalling */
> for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> @@ -1572,38 +1571,9 @@ out:
> EXPORT_SYMBOL(dquot_claim_space);
>
> /*
> - * Release reserved quota space
> - */
> -void dquot_release_reserved_space(struct inode *inode, qsize_t number)
> -{
> - int cnt;
> -
> - if (IS_NOQUOTA(inode))
> - goto out;
> -
> - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> - if (IS_NOQUOTA(inode))
> - goto out_unlock;
> -
> - spin_lock(&dq_data_lock);
> - /* Release reserved dquots */
> - for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> - if (inode->i_dquot[cnt])
> - dquot_free_reserved_space(inode->i_dquot[cnt], number);
> - }
> - spin_unlock(&dq_data_lock);
> -
> -out_unlock:
> - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> -out:
> - return;
> -}
> -EXPORT_SYMBOL(dquot_release_reserved_space);
> -
> -/*
> * This operation can block, but only after everything is updated
> */
> -int dquot_free_space(struct inode *inode, qsize_t number)
> +int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
> {
> unsigned int cnt;
> char warntype[MAXQUOTAS];
> @@ -1612,7 +1582,7 @@ int dquot_free_space(struct inode *inode, qsize_t number)
> * re-enter the quota code and are already holding the mutex */
> if (IS_NOQUOTA(inode)) {
> out_sub:
> - inode_sub_bytes(inode, number);
> + __inode_sub_bytes(inode, number, reserve);
> return QUOTA_OK;
> }
>
> @@ -1627,21 +1597,43 @@ out_sub:
> if (!inode->i_dquot[cnt])
> continue;
> warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
> - dquot_decr_space(inode->i_dquot[cnt], number);
> + if (reserve)
> + dquot_free_reserved_space(inode->i_dquot[cnt], number);
> + else
> + dquot_decr_space(inode->i_dquot[cnt], number);
> }
> - inode_sub_bytes(inode, number);
> + __inode_sub_bytes(inode, number, reserve);
> spin_unlock(&dq_data_lock);
> +
> + if (reserve)
> + goto out_unlock;
> /* Dirtify all the dquots - this can block when journalling */
> for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> if (inode->i_dquot[cnt])
> mark_dquot_dirty(inode->i_dquot[cnt]);
> +out_unlock:
> flush_warnings(inode->i_dquot, warntype);
> up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> return QUOTA_OK;
> }
> +
> +int dquot_free_space(struct inode *inode, qsize_t number)
> +{
> + return __dquot_free_space(inode, number, 0);
> +}
> EXPORT_SYMBOL(dquot_free_space);
>
> /*
> + * Release reserved quota space
> + */
> +void dquot_release_reserved_space(struct inode *inode, qsize_t number)
> +{
> + return (void )__dquot_free_space(inode, number, 1);
> +
> +}
> +EXPORT_SYMBOL(dquot_release_reserved_space);
> +
> +/*
> * This operation can block, but only after everything is updated
> */
> int dquot_free_inode(const struct inode *inode, qsize_t number)
> @@ -1679,19 +1671,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
> EXPORT_SYMBOL(dquot_free_inode);
>
> /*
> - * call back function, get reserved quota space from underlying fs
> - */
> -qsize_t dquot_get_reserved_space(struct inode *inode)
> -{
> - qsize_t reserved_space = 0;
> -
> - if (sb_any_quota_active(inode->i_sb) &&
> - inode->i_sb->dq_op->get_reserved_space)
> - reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
> - return reserved_space;
> -}
> -
> -/*
> * Transfer the number of inode and blocks from one diskquota to an other.
> *
> * This operation can block, but only after everything is updated
> @@ -1734,7 +1713,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
> }
> spin_lock(&dq_data_lock);
> cur_space = inode_get_bytes(inode);
> - rsv_space = dquot_get_reserved_space(inode);
> + rsv_space = inode_get_rsv_blocks(inode) << 9;
> space = cur_space + rsv_space;
> /* Build the transfer_from list and check the limits */
> for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> diff --git a/include/linux/quota.h b/include/linux/quota.h
> index 78c4889..daa6274 100644
> --- a/include/linux/quota.h
> +++ b/include/linux/quota.h
> @@ -313,8 +313,6 @@ struct dquot_operations {
> int (*claim_space) (struct inode *, qsize_t);
> /* release rsved quota for delayed alloc */
> void (*release_rsv) (struct inode *, qsize_t);
> - /* get reserved quota for delayed alloc */
> - qsize_t (*get_reserved_space) (struct inode *);
> };
>
> /* Operations handling requests from userspace */



2009-12-10 01:12:50

by Dmitry Monakhov

[permalink] [raw]
Subject: Re: [PATCH 2/3] quota: convert to generic reserved space management

Mingming <[email protected]> writes:

> On Wed, 2009-12-09 at 05:11 +0300, Dmitry Monakhov wrote:
>> Before this patch fs_rsv_space must be always equals to
>> quot_rsv_space all the time. Otherwise dquot_transfer()
>> will result in incorrect quota(because fs_rsv_space is used
>> on transfer).
>
> By incorrect quota I assume you mean nagetive reserved quota?
>
> I have asked Jan before whether we should transfer the reserved
> quota...I still not quit sure..in fact the more I am thinking, I think
> the reserved quota should not transfered until they are claimed.
>
> I am going to cook a patch for this...
>
> After that patch, we shall not see the negative quota reservation
> values.
>
This can't help, because we still have situation where
SUM_for_each_inode{inode_rsv} != dquot->dq_dqb.dqb_rsvspace
Seems that you still don't get the core of the issue.
In order to simplify things just consider one inode.
Let's inode has some reservation an we are doing dquot_transfer()
But we not guarantee what inode_rsv == dquot->dq_dqb.dqb_rsvspace
(because of race condition) at the moment of quota_transfer().
then just try to answer following questions:
what should we do with from_dquot->dq_dqb.dqb_rsvspace
what should we do with to_dquot->dq_dqb.dqb_rsvspace
what should we do on dquot_claim_space() right after dquot_transfer().
>> This result in complex locking order issues
>> aka http://bugzilla.kernel.org/show_bug.cgi?id=14739
>> After this patch quot_rsv_space will be transferred on
>> dquot_transfer(), this automatically solves locking issues.
>>
>
> This bug could be easily fixed by just drop the i_block_reservation lock
> before release reserved quota.
Yes. but this make race even longer.
I've prepared new version of the patch, will send in a minute.
>
>> - Unify dquot_reserve_space() and dquot_reserve_space()
>> - Unify dquot_release_reserved_space() and dquot_free_space()
>> - Also this patch add missing warning update to release_rsv()
>> dquot_release_reserved_space() must call flush_warnings() as
>> dquot_free_space() does.
>>
>> Signed-off-by: Dmitry Monakhov <[email protected]>
>> ---
>> fs/quota/dquot.c | 169 +++++++++++++++++++++---------------------------
>> include/linux/quota.h | 2 -
>> 2 files changed, 74 insertions(+), 97 deletions(-)
>>
>> diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
>> index 39b49c4..4bc5ac8 100644
>> --- a/fs/quota/dquot.c
>> +++ b/fs/quota/dquot.c
>> @@ -1396,6 +1396,22 @@ EXPORT_SYMBOL(vfs_dq_drop);
>> * inode write go into the same transaction.
>> */
>>
>> +static void __inode_add_bytes(struct inode *inode, qsize_t number, int reserve)
>> +{
>> + if (reserve)
>> + inode_add_rsv_blocks(inode, number >> 9);
>> + else
>> + inode_add_bytes(inode, number);
>> +}
>> +
>> +static void __inode_sub_bytes(struct inode *inode, qsize_t number, int reserve)
>> +{
>> + if (reserve)
>> + inode_sub_rsv_blocks(inode, number >> 9);
>> + else
>> + inode_sub_bytes(inode, number);
>> +}
>> +
>> /*
>> * This operation can block, but only after everything is updated
>> */
>> @@ -1405,6 +1421,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
>> int cnt, ret = QUOTA_OK;
>> char warntype[MAXQUOTAS];
>>
>> + /*
>> + * First test before acquiring mutex - solves deadlocks when we
>> + * re-enter the quota code and are already holding the mutex
>> + */
>> + if (IS_NOQUOTA(inode)) {
>> + __inode_add_bytes(inode, number, reserve);
>> + goto out;
>> + }
>> +
>> + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
>> + if (IS_NOQUOTA(inode)) {
>> + __inode_add_bytes(inode, number, reserve);
>> + goto out_unlock;
>> + }
>> +
>> for (cnt = 0; cnt < MAXQUOTAS; cnt++)
>> warntype[cnt] = QUOTA_NL_NOWARN;
>>
>> @@ -1415,7 +1446,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
>> if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
>> == NO_QUOTA) {
>> ret = NO_QUOTA;
>> - goto out_unlock;
>> + goto out_data_unlock;
>> }
>> }
>> for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
>> @@ -1426,64 +1457,32 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
>> else
>> dquot_incr_space(inode->i_dquot[cnt], number);
>> }
>> - if (!reserve)
>> - inode_add_bytes(inode, number);
>> -out_unlock:
>> - spin_unlock(&dq_data_lock);
>> - flush_warnings(inode->i_dquot, warntype);
>> - return ret;
>> -}
>> -
>> -int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
>> -{
>> - int cnt, ret = QUOTA_OK;
>> -
>> - /*
>> - * First test before acquiring mutex - solves deadlocks when we
>> - * re-enter the quota code and are already holding the mutex
>> - */
>> - if (IS_NOQUOTA(inode)) {
>> - inode_add_bytes(inode, number);
>> - goto out;
>> - }
>> -
>> - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
>> - if (IS_NOQUOTA(inode)) {
>> - inode_add_bytes(inode, number);
>> - goto out_unlock;
>> - }
>> -
>> - ret = __dquot_alloc_space(inode, number, warn, 0);
>> - if (ret == NO_QUOTA)
>> - goto out_unlock;
>> + __inode_add_bytes(inode, number, reserve);
>>
>> + if (reserve)
>> + goto out_data_unlock;
>> /* Dirtify all the dquots - this can block when journalling */
>> for (cnt = 0; cnt < MAXQUOTAS; cnt++)
>> if (inode->i_dquot[cnt])
>> mark_dquot_dirty(inode->i_dquot[cnt]);
>> +out_data_unlock:
>> + spin_unlock(&dq_data_lock);
>> + flush_warnings(inode->i_dquot, warntype);
>> out_unlock:
>> up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
>> out:
>> return ret;
>> }
>> +
>> +int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
>> +{
>> + return __dquot_alloc_space(inode, number, warn, 0);
>> +}
>> EXPORT_SYMBOL(dquot_alloc_space);
>>
>> int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
>> {
>> - int ret = QUOTA_OK;
>> -
>> - if (IS_NOQUOTA(inode))
>> - goto out;
>> -
>> - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
>> - if (IS_NOQUOTA(inode))
>> - goto out_unlock;
>> -
>> - ret = __dquot_alloc_space(inode, number, warn, 1);
>> -out_unlock:
>> - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
>> -out:
>> - return ret;
>> + return __dquot_alloc_space(inode, number, warn, 1);
>> }
>> EXPORT_SYMBOL(dquot_reserve_space);
>>
>> @@ -1540,14 +1539,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
>> int ret = QUOTA_OK;
>>
>> if (IS_NOQUOTA(inode)) {
>> - inode_add_bytes(inode, number);
>> + inode_claim_rsv_blocks(inode, number >> 9);
>> goto out;
>> }
>>
>> down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
>> if (IS_NOQUOTA(inode)) {
>> up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
>> - inode_add_bytes(inode, number);
>> + inode_claim_rsv_blocks(inode, number >> 9);
>> goto out;
>> }
>>
>> @@ -1559,7 +1558,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
>> number);
>> }
>> /* Update inode bytes */
>> - inode_add_bytes(inode, number);
>> + inode_claim_rsv_blocks(inode, number >> 9);
>> spin_unlock(&dq_data_lock);
>> /* Dirtify all the dquots - this can block when journalling */
>> for (cnt = 0; cnt < MAXQUOTAS; cnt++)
>> @@ -1572,38 +1571,9 @@ out:
>> EXPORT_SYMBOL(dquot_claim_space);
>>
>> /*
>> - * Release reserved quota space
>> - */
>> -void dquot_release_reserved_space(struct inode *inode, qsize_t number)
>> -{
>> - int cnt;
>> -
>> - if (IS_NOQUOTA(inode))
>> - goto out;
>> -
>> - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
>> - if (IS_NOQUOTA(inode))
>> - goto out_unlock;
>> -
>> - spin_lock(&dq_data_lock);
>> - /* Release reserved dquots */
>> - for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
>> - if (inode->i_dquot[cnt])
>> - dquot_free_reserved_space(inode->i_dquot[cnt], number);
>> - }
>> - spin_unlock(&dq_data_lock);
>> -
>> -out_unlock:
>> - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
>> -out:
>> - return;
>> -}
>> -EXPORT_SYMBOL(dquot_release_reserved_space);
>> -
>> -/*
>> * This operation can block, but only after everything is updated
>> */
>> -int dquot_free_space(struct inode *inode, qsize_t number)
>> +int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
>> {
>> unsigned int cnt;
>> char warntype[MAXQUOTAS];
>> @@ -1612,7 +1582,7 @@ int dquot_free_space(struct inode *inode, qsize_t number)
>> * re-enter the quota code and are already holding the mutex */
>> if (IS_NOQUOTA(inode)) {
>> out_sub:
>> - inode_sub_bytes(inode, number);
>> + __inode_sub_bytes(inode, number, reserve);
>> return QUOTA_OK;
>> }
>>
>> @@ -1627,21 +1597,43 @@ out_sub:
>> if (!inode->i_dquot[cnt])
>> continue;
>> warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
>> - dquot_decr_space(inode->i_dquot[cnt], number);
>> + if (reserve)
>> + dquot_free_reserved_space(inode->i_dquot[cnt], number);
>> + else
>> + dquot_decr_space(inode->i_dquot[cnt], number);
>> }
>> - inode_sub_bytes(inode, number);
>> + __inode_sub_bytes(inode, number, reserve);
>> spin_unlock(&dq_data_lock);
>> +
>> + if (reserve)
>> + goto out_unlock;
>> /* Dirtify all the dquots - this can block when journalling */
>> for (cnt = 0; cnt < MAXQUOTAS; cnt++)
>> if (inode->i_dquot[cnt])
>> mark_dquot_dirty(inode->i_dquot[cnt]);
>> +out_unlock:
>> flush_warnings(inode->i_dquot, warntype);
>> up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
>> return QUOTA_OK;
>> }
>> +
>> +int dquot_free_space(struct inode *inode, qsize_t number)
>> +{
>> + return __dquot_free_space(inode, number, 0);
>> +}
>> EXPORT_SYMBOL(dquot_free_space);
>>
>> /*
>> + * Release reserved quota space
>> + */
>> +void dquot_release_reserved_space(struct inode *inode, qsize_t number)
>> +{
>> + return (void )__dquot_free_space(inode, number, 1);
>> +
>> +}
>> +EXPORT_SYMBOL(dquot_release_reserved_space);
>> +
>> +/*
>> * This operation can block, but only after everything is updated
>> */
>> int dquot_free_inode(const struct inode *inode, qsize_t number)
>> @@ -1679,19 +1671,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
>> EXPORT_SYMBOL(dquot_free_inode);
>>
>> /*
>> - * call back function, get reserved quota space from underlying fs
>> - */
>> -qsize_t dquot_get_reserved_space(struct inode *inode)
>> -{
>> - qsize_t reserved_space = 0;
>> -
>> - if (sb_any_quota_active(inode->i_sb) &&
>> - inode->i_sb->dq_op->get_reserved_space)
>> - reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
>> - return reserved_space;
>> -}
>> -
>> -/*
>> * Transfer the number of inode and blocks from one diskquota to an other.
>> *
>> * This operation can block, but only after everything is updated
>> @@ -1734,7 +1713,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
>> }
>> spin_lock(&dq_data_lock);
>> cur_space = inode_get_bytes(inode);
>> - rsv_space = dquot_get_reserved_space(inode);
>> + rsv_space = inode_get_rsv_blocks(inode) << 9;
>> space = cur_space + rsv_space;
>> /* Build the transfer_from list and check the limits */
>> for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
>> diff --git a/include/linux/quota.h b/include/linux/quota.h
>> index 78c4889..daa6274 100644
>> --- a/include/linux/quota.h
>> +++ b/include/linux/quota.h
>> @@ -313,8 +313,6 @@ struct dquot_operations {
>> int (*claim_space) (struct inode *, qsize_t);
>> /* release rsved quota for delayed alloc */
>> void (*release_rsv) (struct inode *, qsize_t);
>> - /* get reserved quota for delayed alloc */
>> - qsize_t (*get_reserved_space) (struct inode *);
>> };
>>
>> /* Operations handling requests from userspace */