I'm sending for review patches fixing some bugs and implementing new quota
features. First four patches fix possible deadlocks in OCFS2 quotas. I plan
to send them to Linus next week if nobody objects. The remaining patches
are going to linux-next and I plan to merge them in the next merge window.
Honza
It could happen that some limit has been set via quotactl() and in parallel
->mark_dirty() is called from another thread doing e.g. dquot_alloc_space(). In
such case ocfs2_write_dquot() must not try to sync the dquot because that needs
global quota lock but that ranks above transaction start.
Signed-off-by: Jan Kara <[email protected]>
Acked-by: Mark Fasheh <[email protected]>
---
fs/ocfs2/quota_global.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index f4efa89..1ed0f7c 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -754,7 +754,9 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
if (dquot->dq_flags & mask)
sync = 1;
spin_unlock(&dq_data_lock);
- if (!sync) {
+ /* This is a slight hack but we can't afford getting global quota
+ * lock if we already have a transaction started. */
+ if (!sync || journal_current_handle()) {
status = ocfs2_write_dquot(dquot);
goto out;
}
--
1.6.0.2
Dropping of last reference to dentry lock is a complicated operation involving
dropping of reference to inode. This can get complicated and quota code in
particular needs to obtain some quota locks which leads to potential deadlock.
Thus we defer dropping of inode reference to ocfs2_wq.
Signed-off-by: Jan Kara <[email protected]>
Acked-by: Mark Fasheh <[email protected]>
---
fs/ocfs2/dcache.c | 42 +++++++++++++++++++++++++++++++++++++++---
fs/ocfs2/dcache.h | 9 ++++++++-
fs/ocfs2/ocfs2.h | 6 ++++++
fs/ocfs2/super.c | 3 +++
4 files changed, 56 insertions(+), 4 deletions(-)
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b1cc7c3..e9d7c20 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -38,6 +38,7 @@
#include "dlmglue.h"
#include "file.h"
#include "inode.h"
+#include "super.h"
static int ocfs2_dentry_revalidate(struct dentry *dentry,
@@ -294,6 +295,34 @@ out_attach:
return ret;
}
+static DEFINE_SPINLOCK(dentry_list_lock);
+
+/* We limit the number of dentry locks to drop in one go. We have
+ * this limit so that we don't starve other users of ocfs2_wq. */
+#define DL_INODE_DROP_COUNT 64
+
+/* Drop inode references from dentry locks */
+void ocfs2_drop_dl_inodes(struct work_struct *work)
+{
+ struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
+ dentry_lock_work);
+ struct ocfs2_dentry_lock *dl;
+ int drop_count = DL_INODE_DROP_COUNT;
+
+ spin_lock(&dentry_list_lock);
+ while (osb->dentry_lock_list && drop_count--) {
+ dl = osb->dentry_lock_list;
+ osb->dentry_lock_list = dl->dl_next;
+ spin_unlock(&dentry_list_lock);
+ iput(dl->dl_inode);
+ kfree(dl);
+ spin_lock(&dentry_list_lock);
+ }
+ if (osb->dentry_lock_list)
+ queue_work(ocfs2_wq, &osb->dentry_lock_work);
+ spin_unlock(&dentry_list_lock);
+}
+
/*
* ocfs2_dentry_iput() and friends.
*
@@ -318,16 +347,23 @@ out_attach:
static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl)
{
- iput(dl->dl_inode);
ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
ocfs2_lock_res_free(&dl->dl_lockres);
- kfree(dl);
+
+ /* We leave dropping of inode reference to ocfs2_wq as that can
+ * possibly lead to inode deletion which gets tricky */
+ spin_lock(&dentry_list_lock);
+ if (!osb->dentry_lock_list)
+ queue_work(ocfs2_wq, &osb->dentry_lock_work);
+ dl->dl_next = osb->dentry_lock_list;
+ osb->dentry_lock_list = dl;
+ spin_unlock(&dentry_list_lock);
}
void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl)
{
- int unlock = 0;
+ int unlock;
BUG_ON(dl->dl_count == 0);
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index c091c34..d06e16c 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -29,8 +29,13 @@
extern struct dentry_operations ocfs2_dentry_ops;
struct ocfs2_dentry_lock {
+ /* Use count of dentry lock */
unsigned int dl_count;
- u64 dl_parent_blkno;
+ union {
+ /* Linked list of dentry locks to release */
+ struct ocfs2_dentry_lock *dl_next;
+ u64 dl_parent_blkno;
+ };
/*
* The ocfs2_dentry_lock keeps an inode reference until
@@ -47,6 +52,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl);
+void ocfs2_drop_dl_inodes(struct work_struct *work);
+
struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
int skip_unhashed);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index ad5c24a..0773841 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -210,6 +210,7 @@ struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
struct ocfs2_quota_recovery;
+struct ocfs2_dentry_lock;
struct ocfs2_super
{
struct task_struct *commit_task;
@@ -325,6 +326,11 @@ struct ocfs2_super
struct list_head blocked_lock_list;
unsigned long blocked_lock_count;
+ /* List of dentry locks to release. Anyone can add locks to
+ * the list, ocfs2_wq processes the list */
+ struct ocfs2_dentry_lock *dentry_lock_list;
+ struct work_struct dentry_lock_work;
+
wait_queue_head_t osb_mount_event;
/* Truncate log info */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 43ed113..b1cb38f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1887,6 +1887,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
journal->j_state = OCFS2_JOURNAL_FREE;
+ INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes);
+ osb->dentry_lock_list = NULL;
+
/* get some pseudo constants for clustersize bits */
osb->s_clustersize_bits =
le32_to_cpu(di->id2.i_super.s_clustersize_bits);
--
1.6.0.2
Since ->acquire_dquot and ->release_dquot callbacks aren't called under
dqptr_sem anymore, we don't have to start a transaction and obtain locks
so early. So we can just remove all this complicated stuff.
Signed-off-by: Jan Kara <[email protected]>
---
fs/ocfs2/quota_global.c | 169 +----------------------------------------------
1 files changed, 2 insertions(+), 167 deletions(-)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 6aff8f2..f4efa89 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -810,171 +810,6 @@ out:
return status;
}
-/* This is difficult. We have to lock quota inode and start transaction
- * in this function but we don't want to take the penalty of exlusive
- * quota file lock when we are just going to use cached structures. So
- * we just take read lock check whether we have dquot cached and if so,
- * we don't have to take the write lock... */
-static int ocfs2_dquot_initialize(struct inode *inode, int type)
-{
- handle_t *handle = NULL;
- int status = 0;
- struct super_block *sb = inode->i_sb;
- struct ocfs2_mem_dqinfo *oinfo;
- int exclusive = 0;
- int cnt;
- qid_t id;
-
- mlog_entry_void();
-
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (type != -1 && cnt != type)
- continue;
- if (!sb_has_quota_active(sb, cnt))
- continue;
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- status = ocfs2_lock_global_qf(oinfo, 0);
- if (status < 0)
- goto out;
- /* This is just a performance optimization not a reliable test.
- * Since we hold an inode lock, noone can actually release
- * the structure until we are finished with initialization. */
- if (inode->i_dquot[cnt] != NODQUOT) {
- ocfs2_unlock_global_qf(oinfo, 0);
- continue;
- }
- /* When we have inode lock, we know that no dquot_release() can
- * run and thus we can safely check whether we need to
- * read+modify global file to get quota information or whether
- * our node already has it. */
- if (cnt == USRQUOTA)
- id = inode->i_uid;
- else if (cnt == GRPQUOTA)
- id = inode->i_gid;
- else
- BUG();
- /* Obtain exclusion from quota off... */
- down_write(&sb_dqopt(sb)->dqptr_sem);
- exclusive = !dquot_is_cached(sb, id, cnt);
- up_write(&sb_dqopt(sb)->dqptr_sem);
- if (exclusive) {
- status = ocfs2_lock_global_qf(oinfo, 1);
- if (status < 0) {
- exclusive = 0;
- mlog_errno(status);
- goto out_ilock;
- }
- handle = ocfs2_start_trans(OCFS2_SB(sb),
- ocfs2_calc_qinit_credits(sb, cnt));
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto out_ilock;
- }
- }
- dquot_initialize(inode, cnt);
- if (exclusive) {
- ocfs2_commit_trans(OCFS2_SB(sb), handle);
- ocfs2_unlock_global_qf(oinfo, 1);
- }
- ocfs2_unlock_global_qf(oinfo, 0);
- }
- mlog_exit(0);
- return 0;
-out_ilock:
- if (exclusive)
- ocfs2_unlock_global_qf(oinfo, 1);
- ocfs2_unlock_global_qf(oinfo, 0);
-out:
- mlog_exit(status);
- return status;
-}
-
-static int ocfs2_dquot_drop_slow(struct inode *inode)
-{
- int status = 0;
- int cnt;
- int got_lock[MAXQUOTAS] = {0, 0};
- handle_t *handle;
- struct super_block *sb = inode->i_sb;
- struct ocfs2_mem_dqinfo *oinfo;
-
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!sb_has_quota_active(sb, cnt))
- continue;
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- status = ocfs2_lock_global_qf(oinfo, 1);
- if (status < 0)
- goto out;
- got_lock[cnt] = 1;
- }
- handle = ocfs2_start_trans(OCFS2_SB(sb),
- ocfs2_calc_qinit_credits(sb, USRQUOTA) +
- ocfs2_calc_qinit_credits(sb, GRPQUOTA));
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto out;
- }
- dquot_drop(inode);
- ocfs2_commit_trans(OCFS2_SB(sb), handle);
-out:
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (got_lock[cnt]) {
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- ocfs2_unlock_global_qf(oinfo, 1);
- }
- return status;
-}
-
-/* See the comment before ocfs2_dquot_initialize. */
-static int ocfs2_dquot_drop(struct inode *inode)
-{
- int status = 0;
- struct super_block *sb = inode->i_sb;
- struct ocfs2_mem_dqinfo *oinfo;
- int exclusive = 0;
- int cnt;
- int got_lock[MAXQUOTAS] = {0, 0};
-
- mlog_entry_void();
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!sb_has_quota_active(sb, cnt))
- continue;
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- status = ocfs2_lock_global_qf(oinfo, 0);
- if (status < 0)
- goto out;
- got_lock[cnt] = 1;
- }
- /* Lock against anyone releasing references so that when when we check
- * we know we are not going to be last ones to release dquot */
- down_write(&sb_dqopt(sb)->dqptr_sem);
- /* Urgh, this is a terrible hack :( */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] != NODQUOT &&
- atomic_read(&inode->i_dquot[cnt]->dq_count) > 1) {
- exclusive = 1;
- break;
- }
- }
- if (!exclusive)
- dquot_drop_locked(inode);
- up_write(&sb_dqopt(sb)->dqptr_sem);
-out:
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (got_lock[cnt]) {
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- ocfs2_unlock_global_qf(oinfo, 0);
- }
- /* In case we bailed out because we had to do expensive locking
- * do it now... */
- if (exclusive)
- status = ocfs2_dquot_drop_slow(inode);
- mlog_exit(status);
- return status;
-}
-
static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type)
{
struct ocfs2_dquot *dquot =
@@ -991,8 +826,8 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
}
struct dquot_operations ocfs2_quota_operations = {
- .initialize = ocfs2_dquot_initialize,
- .drop = ocfs2_dquot_drop,
+ .initialize = dquot_initialize,
+ .drop = dquot_drop,
.alloc_space = dquot_alloc_space,
.alloc_inode = dquot_alloc_inode,
.free_space = dquot_free_space,
--
1.6.0.2
reiserfs_dquot_initialize() and reiserfs_dquot_drop() is no longer
needed because of modified quota locking.
Signed-off-by: Jan Kara <[email protected]>
---
fs/reiserfs/super.c | 58 +-------------------------------------------------
1 files changed, 2 insertions(+), 56 deletions(-)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f3c820b..317c035 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -629,8 +629,6 @@ static const struct super_operations reiserfs_sops = {
#ifdef CONFIG_QUOTA
#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
-static int reiserfs_dquot_initialize(struct inode *, int);
-static int reiserfs_dquot_drop(struct inode *);
static int reiserfs_write_dquot(struct dquot *);
static int reiserfs_acquire_dquot(struct dquot *);
static int reiserfs_release_dquot(struct dquot *);
@@ -639,8 +637,8 @@ static int reiserfs_write_info(struct super_block *, int);
static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
static struct dquot_operations reiserfs_quota_operations = {
- .initialize = reiserfs_dquot_initialize,
- .drop = reiserfs_dquot_drop,
+ .initialize = dquot_initialize,
+ .drop = dquot_drop,
.alloc_space = dquot_alloc_space,
.alloc_inode = dquot_alloc_inode,
.free_space = dquot_free_space,
@@ -1896,58 +1894,6 @@ static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf)
}
#ifdef CONFIG_QUOTA
-static int reiserfs_dquot_initialize(struct inode *inode, int type)
-{
- struct reiserfs_transaction_handle th;
- int ret, err;
-
- /* We may create quota structure so we need to reserve enough blocks */
- reiserfs_write_lock(inode->i_sb);
- ret =
- journal_begin(&th, inode->i_sb,
- 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
- if (ret)
- goto out;
- ret = dquot_initialize(inode, type);
- err =
- journal_end(&th, inode->i_sb,
- 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
- if (!ret && err)
- ret = err;
- out:
- reiserfs_write_unlock(inode->i_sb);
- return ret;
-}
-
-static int reiserfs_dquot_drop(struct inode *inode)
-{
- struct reiserfs_transaction_handle th;
- int ret, err;
-
- /* We may delete quota structure so we need to reserve enough blocks */
- reiserfs_write_lock(inode->i_sb);
- ret =
- journal_begin(&th, inode->i_sb,
- 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
- if (ret) {
- /*
- * We call dquot_drop() anyway to at least release references
- * to quota structures so that umount does not hang.
- */
- dquot_drop(inode);
- goto out;
- }
- ret = dquot_drop(inode);
- err =
- journal_end(&th, inode->i_sb,
- 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
- if (!ret && err)
- ret = err;
- out:
- reiserfs_write_unlock(inode->i_sb);
- return ret;
-}
-
static int reiserfs_write_dquot(struct dquot *dquot)
{
struct reiserfs_transaction_handle th;
--
1.6.0.2
From: Mingming Cao <[email protected]>
Andrew has suggested to use inode->i_blkbits to get the block bits info,
rather than use super block's blockbits. That should be faster and emit
less code.
Signed-off-by: Mingming Cao <[email protected]>
Signed-off-by: Jan Kara <[email protected]>
---
include/linux/quotaops.h | 22 ++++++++--------------
1 files changed, 8 insertions(+), 14 deletions(-)
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 7369d04..69b502e 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -410,38 +410,32 @@ static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
{
- return vfs_dq_prealloc_space_nodirty(inode,
- nr << inode->i_sb->s_blocksize_bits);
+ return vfs_dq_prealloc_space_nodirty(inode, nr << inode->i_blkbits);
}
static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
{
- return vfs_dq_prealloc_space(inode,
- nr << inode->i_sb->s_blocksize_bits);
+ return vfs_dq_prealloc_space(inode, nr << inode->i_blkbits);
}
static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
{
- return vfs_dq_alloc_space_nodirty(inode,
- nr << inode->i_sb->s_blocksize_bits);
+ return vfs_dq_alloc_space_nodirty(inode, nr << inode->i_blkbits);
}
static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
{
- return vfs_dq_alloc_space(inode,
- nr << inode->i_sb->s_blocksize_bits);
+ return vfs_dq_alloc_space(inode, nr << inode->i_blkbits);
}
static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr)
{
- return vfs_dq_reserve_space(inode,
- nr << inode->i_blkbits);
+ return vfs_dq_reserve_space(inode, nr << inode->i_blkbits);
}
static inline int vfs_dq_claim_block(struct inode *inode, qsize_t nr)
{
- return vfs_dq_claim_space(inode,
- nr << inode->i_blkbits);
+ return vfs_dq_claim_space(inode, nr << inode->i_blkbits);
}
static inline
@@ -452,12 +446,12 @@ void vfs_dq_release_reservation_block(struct inode *inode, qsize_t nr)
static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
{
- vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
+ vfs_dq_free_space_nodirty(inode, nr << inode->i_blkbits);
}
static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
{
- vfs_dq_free_space(inode, nr << inode->i_sb->s_blocksize_bits);
+ vfs_dq_free_space(inode, nr << inode->i_blkbits);
}
/*
--
1.6.0.2
ext3_dquot_initialize() and ext3_dquot_drop() is no longer
needed because of modified quota locking.
Signed-off-by: Jan Kara <[email protected]>
---
fs/ext3/super.c | 44 ++------------------------------------------
1 files changed, 2 insertions(+), 42 deletions(-)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b70d90e..ec410a9 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -707,8 +707,6 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
-static int ext3_dquot_initialize(struct inode *inode, int type);
-static int ext3_dquot_drop(struct inode *inode);
static int ext3_write_dquot(struct dquot *dquot);
static int ext3_acquire_dquot(struct dquot *dquot);
static int ext3_release_dquot(struct dquot *dquot);
@@ -723,8 +721,8 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off);
static struct dquot_operations ext3_quota_operations = {
- .initialize = ext3_dquot_initialize,
- .drop = ext3_dquot_drop,
+ .initialize = dquot_initialize,
+ .drop = dquot_drop,
.alloc_space = dquot_alloc_space,
.alloc_inode = dquot_alloc_inode,
.free_space = dquot_free_space,
@@ -2713,44 +2711,6 @@ static inline struct inode *dquot_to_inode(struct dquot *dquot)
return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
}
-static int ext3_dquot_initialize(struct inode *inode, int type)
-{
- handle_t *handle;
- int ret, err;
-
- /* We may create quota structure so we need to reserve enough blocks */
- handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
- ret = dquot_initialize(inode, type);
- err = ext3_journal_stop(handle);
- if (!ret)
- ret = err;
- return ret;
-}
-
-static int ext3_dquot_drop(struct inode *inode)
-{
- handle_t *handle;
- int ret, err;
-
- /* We may delete quota structure so we need to reserve enough blocks */
- handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
- if (IS_ERR(handle)) {
- /*
- * We call dquot_drop() anyway to at least release references
- * to quota structures so that umount does not hang.
- */
- dquot_drop(inode);
- return PTR_ERR(handle);
- }
- ret = dquot_drop(inode);
- err = ext3_journal_stop(handle);
- if (!ret)
- ret = err;
- return ret;
-}
-
static int ext3_write_dquot(struct dquot *dquot)
{
int ret, err;
--
1.6.0.2
From: Mingming Cao <[email protected]>
Reserved quota will be claimed at the block allocation time. Over-booked
quota could be returned back with the release callback function.
Signed-off-by: Mingming Cao <[email protected]>
Signed-off-by: Jan Kara <[email protected]>
---
fs/dquot.c | 110 ++++++++++++++++++++++++++++++++++++++++++++--
include/linux/quota.h | 6 +++
include/linux/quotaops.h | 53 ++++++++++++++++++++++
3 files changed, 165 insertions(+), 4 deletions(-)
diff --git a/fs/dquot.c b/fs/dquot.c
index 9b1c4d3..2916f91 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -904,6 +904,23 @@ static inline void dquot_resv_space(struct dquot *dquot, qsize_t number)
dquot->dq_dqb.dqb_rsvspace += number;
}
+/*
+ * Claim reserved quota space
+ */
+static void dquot_claim_reserved_space(struct dquot *dquot,
+ qsize_t number)
+{
+ WARN_ON(dquot->dq_dqb.dqb_rsvspace < number);
+ dquot->dq_dqb.dqb_curspace += number;
+ dquot->dq_dqb.dqb_rsvspace -= number;
+}
+
+static inline
+void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
+{
+ dquot->dq_dqb.dqb_rsvspace -= number;
+}
+
static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
{
if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
@@ -1452,6 +1469,72 @@ warn_put_all:
return ret;
}
+int dquot_claim_space(struct inode *inode, qsize_t number)
+{
+ int cnt;
+ int ret = QUOTA_OK;
+
+ if (IS_NOQUOTA(inode)) {
+ inode_add_bytes(inode, number);
+ goto out;
+ }
+
+ down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ if (IS_NOQUOTA(inode)) {
+ up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ inode_add_bytes(inode, number);
+ goto out;
+ }
+
+ spin_lock(&dq_data_lock);
+ /* Claim reserved quotas to allocated quotas */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (inode->i_dquot[cnt] != NODQUOT)
+ dquot_claim_reserved_space(inode->i_dquot[cnt],
+ number);
+ }
+ /* Update inode bytes */
+ inode_add_bytes(inode, number);
+ spin_unlock(&dq_data_lock);
+ /* Dirtify all the dquots - this can block when journalling */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (inode->i_dquot[cnt])
+ mark_dquot_dirty(inode->i_dquot[cnt]);
+ up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+out:
+ return ret;
+}
+EXPORT_SYMBOL(dquot_claim_space);
+
+/*
+ * Release reserved quota space
+ */
+void dquot_release_reserved_space(struct inode *inode, qsize_t number)
+{
+ int cnt;
+
+ if (IS_NOQUOTA(inode))
+ goto out;
+
+ down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ if (IS_NOQUOTA(inode))
+ goto out_unlock;
+
+ spin_lock(&dq_data_lock);
+ /* Release reserved dquots */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (inode->i_dquot[cnt] != NODQUOT)
+ dquot_free_reserved_space(inode->i_dquot[cnt], number);
+ }
+ spin_unlock(&dq_data_lock);
+
+out_unlock:
+ up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+out:
+ return;
+}
+EXPORT_SYMBOL(dquot_release_reserved_space);
+
/*
* This operation can block, but only after everything is updated
*/
@@ -1529,6 +1612,19 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
}
/*
+ * call back function, get reserved quota space from underlying fs
+ */
+qsize_t dquot_get_reserved_space(struct inode *inode)
+{
+ qsize_t reserved_space = 0;
+
+ if (sb_any_quota_active(inode->i_sb) &&
+ inode->i_sb->dq_op->get_reserved_space)
+ reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
+ return reserved_space;
+}
+
+/*
* Transfer the number of inode and blocks from one diskquota to an other.
*
* This operation can block, but only after everything is updated
@@ -1536,7 +1632,8 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
*/
int dquot_transfer(struct inode *inode, struct iattr *iattr)
{
- qsize_t space;
+ qsize_t space, cur_space;
+ qsize_t rsv_space = 0;
struct dquot *transfer_from[MAXQUOTAS];
struct dquot *transfer_to[MAXQUOTAS];
int cnt, ret = QUOTA_OK;
@@ -1575,7 +1672,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
goto put_all;
}
spin_lock(&dq_data_lock);
- space = inode_get_bytes(inode);
+ cur_space = inode_get_bytes(inode);
+ rsv_space = dquot_get_reserved_space(inode);
+ space = cur_space + rsv_space;
/* Build the transfer_from list and check the limits */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (transfer_to[cnt] == NODQUOT)
@@ -1604,11 +1703,14 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
warntype_from_space[cnt] =
info_bdq_free(transfer_from[cnt], space);
dquot_decr_inodes(transfer_from[cnt], 1);
- dquot_decr_space(transfer_from[cnt], space);
+ dquot_decr_space(transfer_from[cnt], cur_space);
+ dquot_free_reserved_space(transfer_from[cnt],
+ rsv_space);
}
dquot_incr_inodes(transfer_to[cnt], 1);
- dquot_incr_space(transfer_to[cnt], space);
+ dquot_incr_space(transfer_to[cnt], cur_space);
+ dquot_resv_space(transfer_to[cnt], rsv_space);
inode->i_dquot[cnt] = transfer_to[cnt];
}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 54b837f..a510d91 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -311,6 +311,12 @@ struct dquot_operations {
int (*write_info) (struct super_block *, int); /* Write of quota "superblock" */
/* reserve quota for delayed block allocation */
int (*reserve_space) (struct inode *, qsize_t, int);
+ /* claim reserved quota for delayed alloc */
+ int (*claim_space) (struct inode *, qsize_t);
+ /* release rsved quota for delayed alloc */
+ void (*release_rsv) (struct inode *, qsize_t);
+ /* get reserved quota for delayed alloc */
+ qsize_t (*get_reserved_space) (struct inode *);
};
/* Operations handling requests from userspace */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 3e3a0d2..7369d04 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -35,6 +35,11 @@ void dquot_destroy(struct dquot *dquot);
int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
int dquot_alloc_inode(const struct inode *inode, qsize_t number);
+int dquot_reserve_space(struct inode *inode, qsize_t number, int prealloc);
+int dquot_claim_space(struct inode *inode, qsize_t number);
+void dquot_release_reserved_space(struct inode *inode, qsize_t number);
+qsize_t dquot_get_reserved_space(struct inode *inode);
+
int dquot_free_space(struct inode *inode, qsize_t number);
int dquot_free_inode(const struct inode *inode, qsize_t number);
@@ -203,6 +208,31 @@ static inline int vfs_dq_alloc_inode(struct inode *inode)
return 0;
}
+/*
+ * Convert in-memory reserved quotas to real consumed quotas
+ */
+static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
+{
+ if (sb_any_quota_active(inode->i_sb)) {
+ if (inode->i_sb->dq_op->claim_space(inode, nr) == NO_QUOTA)
+ return 1;
+ } else
+ inode_add_bytes(inode, nr);
+
+ mark_inode_dirty(inode);
+ return 0;
+}
+
+/*
+ * Release reserved (in-memory) quotas
+ */
+static inline
+void vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
+{
+ if (sb_any_quota_active(inode->i_sb))
+ inode->i_sb->dq_op->release_rsv(inode, nr);
+}
+
static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
{
if (sb_any_quota_active(inode->i_sb))
@@ -354,6 +384,17 @@ static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
return 0;
}
+static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
+{
+ return vfs_dq_alloc_space(inode, nr);
+}
+
+static inline
+int vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
+{
+ return 0;
+}
+
static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
{
inode_sub_bytes(inode, nr);
@@ -397,6 +438,18 @@ static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr)
nr << inode->i_blkbits);
}
+static inline int vfs_dq_claim_block(struct inode *inode, qsize_t nr)
+{
+ return vfs_dq_claim_space(inode,
+ nr << inode->i_blkbits);
+}
+
+static inline
+void vfs_dq_release_reservation_block(struct inode *inode, qsize_t nr)
+{
+ vfs_dq_release_reservation_space(inode, nr << inode->i_blkbits);
+}
+
static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
{
vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
--
1.6.0.2
From: Mingming Cao <[email protected]>
Delayed allocation defers the block allocation at the dirty pages
flush-out time, doing quota charge/check at that time is too late.
But we can't charge the quota blocks until blocks are really allocated,
otherwise users could get overcharged after reboot from system crash.
This patch adds quota reservation for delayed allocation. Quota blocks
are reserved in memory, inode and quota won't gets dirtied until later
block allocation time.
Signed-off-by: Mingming Cao <[email protected]>
Signed-off-by: Jan Kara <[email protected]>
---
fs/dquot.c | 117 +++++++++++++++++++++++++++++++++------------
include/linux/quota.h | 3 +
include/linux/quotaops.h | 21 ++++++++
3 files changed, 110 insertions(+), 31 deletions(-)
diff --git a/fs/dquot.c b/fs/dquot.c
index bca3cac..9b1c4d3 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -899,6 +899,11 @@ static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
dquot->dq_dqb.dqb_curspace += number;
}
+static inline void dquot_resv_space(struct dquot *dquot, qsize_t number)
+{
+ dquot->dq_dqb.dqb_rsvspace += number;
+}
+
static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
{
if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
@@ -1068,7 +1073,11 @@ err_out:
kfree_skb(skb);
}
#endif
-
+/*
+ * Write warnings to the console and send warning messages over netlink.
+ *
+ * Note that this function can sleep.
+ */
static inline void flush_warnings(struct dquot * const *dquots, char *warntype)
{
int i;
@@ -1129,13 +1138,18 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
/* needs dq_data_lock */
static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
{
+ qsize_t tspace;
+
*warntype = QUOTA_NL_NOWARN;
if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
test_bit(DQ_FAKE_B, &dquot->dq_flags))
return QUOTA_OK;
+ tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
+ + space;
+
if (dquot->dq_dqb.dqb_bhardlimit &&
- dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bhardlimit &&
+ tspace > dquot->dq_dqb.dqb_bhardlimit &&
!ignore_hardlimit(dquot)) {
if (!prealloc)
*warntype = QUOTA_NL_BHARDWARN;
@@ -1143,7 +1157,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
- dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
+ tspace > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_btime && get_seconds() >= dquot->dq_dqb.dqb_btime &&
!ignore_hardlimit(dquot)) {
if (!prealloc)
@@ -1152,7 +1166,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
- dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
+ tspace > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_btime == 0) {
if (!prealloc) {
*warntype = QUOTA_NL_BSOFTWARN;
@@ -1306,51 +1320,92 @@ void vfs_dq_drop(struct inode *inode)
/*
* This operation can block, but only after everything is updated
*/
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
+int __dquot_alloc_space(struct inode *inode, qsize_t number,
+ int warn, int reserve)
{
- int cnt, ret = NO_QUOTA;
+ int cnt, ret = QUOTA_OK;
char warntype[MAXQUOTAS];
- /* First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex */
- if (IS_NOQUOTA(inode)) {
-out_add:
- inode_add_bytes(inode, number);
- return QUOTA_OK;
- }
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warntype[cnt] = QUOTA_NL_NOWARN;
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode)) { /* Now we can do reliable test... */
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- goto out_add;
- }
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] == NODQUOT)
continue;
- if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) == NO_QUOTA)
- goto warn_put_all;
+ if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
+ == NO_QUOTA) {
+ ret = NO_QUOTA;
+ goto out_unlock;
+ }
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] == NODQUOT)
continue;
- dquot_incr_space(inode->i_dquot[cnt], number);
+ if (reserve)
+ dquot_resv_space(inode->i_dquot[cnt], number);
+ else
+ dquot_incr_space(inode->i_dquot[cnt], number);
}
- inode_add_bytes(inode, number);
- ret = QUOTA_OK;
-warn_put_all:
+ if (!reserve)
+ inode_add_bytes(inode, number);
+out_unlock:
spin_unlock(&dq_data_lock);
- if (ret == QUOTA_OK)
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt])
- mark_dquot_dirty(inode->i_dquot[cnt]);
flush_warnings(inode->i_dquot, warntype);
+ return ret;
+}
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
+{
+ int cnt, ret = QUOTA_OK;
+
+ /*
+ * First test before acquiring mutex - solves deadlocks when we
+ * re-enter the quota code and are already holding the mutex
+ */
+ if (IS_NOQUOTA(inode)) {
+ inode_add_bytes(inode, number);
+ goto out;
+ }
+
+ down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ if (IS_NOQUOTA(inode)) {
+ inode_add_bytes(inode, number);
+ goto out_unlock;
+ }
+
+ ret = __dquot_alloc_space(inode, number, warn, 0);
+ if (ret == NO_QUOTA)
+ goto out_unlock;
+
+ /* Dirtify all the dquots - this can block when journalling */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (inode->i_dquot[cnt])
+ mark_dquot_dirty(inode->i_dquot[cnt]);
+out_unlock:
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+out:
+ return ret;
+}
+
+int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
+{
+ int ret = QUOTA_OK;
+
+ if (IS_NOQUOTA(inode))
+ goto out;
+
+ down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ if (IS_NOQUOTA(inode))
+ goto out_unlock;
+
+ ret = __dquot_alloc_space(inode, number, warn, 1);
+out_unlock:
+ up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+out:
return ret;
}
+EXPORT_SYMBOL(dquot_reserve_space);
/*
* This operation can block, but only after everything is updated
@@ -2057,7 +2112,7 @@ static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
spin_lock(&dq_data_lock);
di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit);
di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit);
- di->dqb_curspace = dm->dqb_curspace;
+ di->dqb_curspace = dm->dqb_curspace + dm->dqb_rsvspace;
di->dqb_ihardlimit = dm->dqb_ihardlimit;
di->dqb_isoftlimit = dm->dqb_isoftlimit;
di->dqb_curinodes = dm->dqb_curinodes;
@@ -2097,7 +2152,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
spin_lock(&dq_data_lock);
if (di->dqb_valid & QIF_SPACE) {
- dm->dqb_curspace = di->dqb_curspace;
+ dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace;
check_blim = 1;
__set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index d72d5d8..54b837f 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -198,6 +198,7 @@ struct mem_dqblk {
qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */
qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */
qsize_t dqb_curspace; /* current used space */
+ qsize_t dqb_rsvspace; /* current reserved space for delalloc*/
qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */
qsize_t dqb_isoftlimit; /* preferred inode limit */
qsize_t dqb_curinodes; /* current # allocated inodes */
@@ -308,6 +309,8 @@ struct dquot_operations {
int (*release_dquot) (struct dquot *); /* Quota is going to be deleted from disk */
int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */
int (*write_info) (struct super_block *, int); /* Write of quota "superblock" */
+ /* reserve quota for delayed block allocation */
+ int (*reserve_space) (struct inode *, qsize_t, int);
};
/* Operations handling requests from userspace */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 0b35b3a..3e3a0d2 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -183,6 +183,16 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
return ret;
}
+static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
+{
+ if (sb_any_quota_active(inode->i_sb)) {
+ /* Used space is updated in alloc_space() */
+ if (inode->i_sb->dq_op->reserve_space(inode, nr, 0) == NO_QUOTA)
+ return 1;
+ }
+ return 0;
+}
+
static inline int vfs_dq_alloc_inode(struct inode *inode)
{
if (sb_any_quota_active(inode->i_sb)) {
@@ -339,6 +349,11 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
return 0;
}
+static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
+{
+ return 0;
+}
+
static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
{
inode_sub_bytes(inode, nr);
@@ -376,6 +391,12 @@ static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
nr << inode->i_sb->s_blocksize_bits);
}
+static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr)
+{
+ return vfs_dq_reserve_space(inode,
+ nr << inode->i_blkbits);
+}
+
static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
{
vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
--
1.6.0.2
From: Mingming Cao <[email protected]>
According to checkpatch: EXPORT_SYMBOL(foo); should immediately follow its
function/variable
Signed-off-by: Mingming Cao <[email protected]>
Signed-off-by: Jan Kara <[email protected]>
---
fs/dquot.c | 69 ++++++++++++++++++++++++++++-------------------------------
1 files changed, 33 insertions(+), 36 deletions(-)
diff --git a/fs/dquot.c b/fs/dquot.c
index 2916f91..28aa146 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -132,6 +132,7 @@
static DEFINE_SPINLOCK(dq_list_lock);
static DEFINE_SPINLOCK(dq_state_lock);
DEFINE_SPINLOCK(dq_data_lock);
+EXPORT_SYMBOL(dq_data_lock);
static char *quotatypes[] = INITQFNAMES;
static struct quota_format_type *quota_formats; /* List of registered formats */
@@ -148,6 +149,7 @@ int register_quota_format(struct quota_format_type *fmt)
spin_unlock(&dq_list_lock);
return 0;
}
+EXPORT_SYMBOL(register_quota_format);
void unregister_quota_format(struct quota_format_type *fmt)
{
@@ -159,6 +161,7 @@ void unregister_quota_format(struct quota_format_type *fmt)
*actqf = (*actqf)->qf_next;
spin_unlock(&dq_list_lock);
}
+EXPORT_SYMBOL(unregister_quota_format);
static struct quota_format_type *find_quota_format(int id)
{
@@ -215,6 +218,7 @@ static unsigned int dq_hash_bits, dq_hash_mask;
static struct hlist_head *dquot_hash;
struct dqstats dqstats;
+EXPORT_SYMBOL(dqstats);
static inline unsigned int
hashfn(const struct super_block *sb, unsigned int id, int type)
@@ -309,6 +313,7 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
spin_unlock(&dq_list_lock);
return 0;
}
+EXPORT_SYMBOL(dquot_mark_dquot_dirty);
/* This function needs dq_list_lock */
static inline int clear_dquot_dirty(struct dquot *dquot)
@@ -360,6 +365,7 @@ out_iolock:
mutex_unlock(&dquot->dq_lock);
return ret;
}
+EXPORT_SYMBOL(dquot_acquire);
/*
* Write dquot to disk
@@ -389,6 +395,7 @@ out_sem:
mutex_unlock(&dqopt->dqio_mutex);
return ret;
}
+EXPORT_SYMBOL(dquot_commit);
/*
* Release dquot
@@ -417,6 +424,7 @@ out_dqlock:
mutex_unlock(&dquot->dq_lock);
return ret;
}
+EXPORT_SYMBOL(dquot_release);
void dquot_destroy(struct dquot *dquot)
{
@@ -516,6 +524,7 @@ out:
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return ret;
}
+EXPORT_SYMBOL(dquot_scan_active);
int vfs_quota_sync(struct super_block *sb, int type)
{
@@ -563,6 +572,7 @@ int vfs_quota_sync(struct super_block *sb, int type)
return 0;
}
+EXPORT_SYMBOL(vfs_quota_sync);
/* Free unused dquots from cache */
static void prune_dqcache(int count)
@@ -672,6 +682,7 @@ we_slept:
put_dquot_last(dquot);
spin_unlock(&dq_list_lock);
}
+EXPORT_SYMBOL(dqput);
struct dquot *dquot_alloc(struct super_block *sb, int type)
{
@@ -767,6 +778,7 @@ out:
return dquot;
}
+EXPORT_SYMBOL(dqget);
static int dqinit_needed(struct inode *inode, int type)
{
@@ -1282,6 +1294,7 @@ out_err:
dqput(got[cnt]);
return ret;
}
+EXPORT_SYMBOL(dquot_initialize);
/*
* Release all quotas referenced by inode
@@ -1302,6 +1315,7 @@ int dquot_drop(struct inode *inode)
dqput(put[cnt]);
return 0;
}
+EXPORT_SYMBOL(dquot_drop);
/* Wrapper to remove references to quota structures from inode */
void vfs_dq_drop(struct inode *inode)
@@ -1324,6 +1338,7 @@ void vfs_dq_drop(struct inode *inode)
inode->i_sb->dq_op->drop(inode);
}
}
+EXPORT_SYMBOL(vfs_dq_drop);
/*
* Following four functions update i_blocks+i_bytes fields and
@@ -1404,6 +1419,7 @@ out_unlock:
out:
return ret;
}
+EXPORT_SYMBOL(dquot_alloc_space);
int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
{
@@ -1468,6 +1484,7 @@ warn_put_all:
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return ret;
}
+EXPORT_SYMBOL(dquot_alloc_inode);
int dquot_claim_space(struct inode *inode, qsize_t number)
{
@@ -1574,6 +1591,7 @@ out_sub:
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
+EXPORT_SYMBOL(dquot_free_space);
/*
* This operation can block, but only after everything is updated
@@ -1610,6 +1628,7 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
+EXPORT_SYMBOL(dquot_free_inode);
/*
* call back function, get reserved quota space from underlying fs
@@ -1746,6 +1765,7 @@ over_quota:
ret = NO_QUOTA;
goto warn_put_all;
}
+EXPORT_SYMBOL(dquot_transfer);
/* Wrapper for transferring ownership of an inode */
int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
@@ -1757,7 +1777,7 @@ int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
}
return 0;
}
-
+EXPORT_SYMBOL(vfs_dq_transfer);
/*
* Write info of quota file to disk
@@ -1772,6 +1792,7 @@ int dquot_commit_info(struct super_block *sb, int type)
mutex_unlock(&dqopt->dqio_mutex);
return ret;
}
+EXPORT_SYMBOL(dquot_commit_info);
/*
* Definitions of diskquota operations.
@@ -1924,13 +1945,14 @@ put_inodes:
}
return ret;
}
+EXPORT_SYMBOL(vfs_quota_disable);
int vfs_quota_off(struct super_block *sb, int type, int remount)
{
return vfs_quota_disable(sb, type, remount ? DQUOT_SUSPENDED :
(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED));
}
-
+EXPORT_SYMBOL(vfs_quota_off);
/*
* Turn quotas on on a device
*/
@@ -2087,6 +2109,7 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
DQUOT_LIMITS_ENABLED);
return error;
}
+EXPORT_SYMBOL(vfs_quota_on_path);
int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
int remount)
@@ -2104,6 +2127,7 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
}
return error;
}
+EXPORT_SYMBOL(vfs_quota_on);
/*
* More powerful function for turning on quotas allowing setting
@@ -2150,6 +2174,7 @@ out_lock:
load_quota:
return vfs_load_quota_inode(inode, type, format_id, flags);
}
+EXPORT_SYMBOL(vfs_quota_enable);
/*
* This function is used when filesystem needs to initialize quotas
@@ -2179,6 +2204,7 @@ out:
dput(dentry);
return error;
}
+EXPORT_SYMBOL(vfs_quota_on_mount);
/* Wrapper to turn on quotas when remounting rw */
int vfs_dq_quota_on_remount(struct super_block *sb)
@@ -2195,6 +2221,7 @@ int vfs_dq_quota_on_remount(struct super_block *sb)
}
return ret;
}
+EXPORT_SYMBOL(vfs_dq_quota_on_remount);
static inline qsize_t qbtos(qsize_t blocks)
{
@@ -2236,6 +2263,7 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
return 0;
}
+EXPORT_SYMBOL(vfs_get_dqblk);
/* Generic routine for setting common part of quota structure */
static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
@@ -2327,6 +2355,7 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
out:
return rc;
}
+EXPORT_SYMBOL(vfs_set_dqblk);
/* Generic routine for getting common part of quota file information */
int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
@@ -2348,6 +2377,7 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return 0;
}
+EXPORT_SYMBOL(vfs_get_dqinfo);
/* Generic routine for setting common part of quota file information */
int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
@@ -2376,6 +2406,7 @@ out:
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return err;
}
+EXPORT_SYMBOL(vfs_set_dqinfo);
struct quotactl_ops vfs_quotactl_ops = {
.quota_on = vfs_quota_on,
@@ -2531,37 +2562,3 @@ static int __init dquot_init(void)
return 0;
}
module_init(dquot_init);
-
-EXPORT_SYMBOL(register_quota_format);
-EXPORT_SYMBOL(unregister_quota_format);
-EXPORT_SYMBOL(dqstats);
-EXPORT_SYMBOL(dq_data_lock);
-EXPORT_SYMBOL(vfs_quota_enable);
-EXPORT_SYMBOL(vfs_quota_on);
-EXPORT_SYMBOL(vfs_quota_on_path);
-EXPORT_SYMBOL(vfs_quota_on_mount);
-EXPORT_SYMBOL(vfs_quota_disable);
-EXPORT_SYMBOL(vfs_quota_off);
-EXPORT_SYMBOL(dquot_scan_active);
-EXPORT_SYMBOL(vfs_quota_sync);
-EXPORT_SYMBOL(vfs_get_dqinfo);
-EXPORT_SYMBOL(vfs_set_dqinfo);
-EXPORT_SYMBOL(vfs_get_dqblk);
-EXPORT_SYMBOL(vfs_set_dqblk);
-EXPORT_SYMBOL(dquot_commit);
-EXPORT_SYMBOL(dquot_commit_info);
-EXPORT_SYMBOL(dquot_acquire);
-EXPORT_SYMBOL(dquot_release);
-EXPORT_SYMBOL(dquot_mark_dquot_dirty);
-EXPORT_SYMBOL(dquot_initialize);
-EXPORT_SYMBOL(dquot_drop);
-EXPORT_SYMBOL(vfs_dq_drop);
-EXPORT_SYMBOL(dqget);
-EXPORT_SYMBOL(dqput);
-EXPORT_SYMBOL(dquot_alloc_space);
-EXPORT_SYMBOL(dquot_alloc_inode);
-EXPORT_SYMBOL(dquot_free_space);
-EXPORT_SYMBOL(dquot_free_inode);
-EXPORT_SYMBOL(dquot_transfer);
-EXPORT_SYMBOL(vfs_dq_transfer);
-EXPORT_SYMBOL(vfs_dq_quota_on_remount);
--
1.6.0.2
ext4_dquot_initialize() and ext4_dquot_drop() is no longer
needed because of modified quota locking.
Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/super.c | 44 ++------------------------------------------
1 files changed, 2 insertions(+), 42 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e5f06a5..f0785fd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -926,8 +926,6 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_
#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
-static int ext4_dquot_initialize(struct inode *inode, int type);
-static int ext4_dquot_drop(struct inode *inode);
static int ext4_write_dquot(struct dquot *dquot);
static int ext4_acquire_dquot(struct dquot *dquot);
static int ext4_release_dquot(struct dquot *dquot);
@@ -942,8 +940,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off);
static struct dquot_operations ext4_quota_operations = {
- .initialize = ext4_dquot_initialize,
- .drop = ext4_dquot_drop,
+ .initialize = dquot_initialize,
+ .drop = dquot_drop,
.alloc_space = dquot_alloc_space,
.alloc_inode = dquot_alloc_inode,
.free_space = dquot_free_space,
@@ -3378,44 +3376,6 @@ static inline struct inode *dquot_to_inode(struct dquot *dquot)
return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
}
-static int ext4_dquot_initialize(struct inode *inode, int type)
-{
- handle_t *handle;
- int ret, err;
-
- /* We may create quota structure so we need to reserve enough blocks */
- handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
- ret = dquot_initialize(inode, type);
- err = ext4_journal_stop(handle);
- if (!ret)
- ret = err;
- return ret;
-}
-
-static int ext4_dquot_drop(struct inode *inode)
-{
- handle_t *handle;
- int ret, err;
-
- /* We may delete quota structure so we need to reserve enough blocks */
- handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
- if (IS_ERR(handle)) {
- /*
- * We call dquot_drop() anyway to at least release references
- * to quota structures so that umount does not hang.
- */
- dquot_drop(inode);
- return PTR_ERR(handle);
- }
- ret = dquot_drop(inode);
- err = ext4_journal_stop(handle);
- if (!ret)
- ret = err;
- return ret;
-}
-
static int ext4_write_dquot(struct dquot *dquot)
{
int ret, err;
--
1.6.0.2
We implement dqget() and dqput() that need neither dqonoff_mutex nor dqptr_sem.
Then move dqget() and dqput() calls so that they are not called from under
dqptr_sem. This is important because filesystem callbacks aren't called from
under dqptr_sem which used to cause *lots* of problems with lock ranking
(and with OCFS2 they became close to unsolvable).
The patch also removes two functions which were introduced solely because OCFS2
needed them to cope with the old locking scheme. As time showed, they were not
enough for OCFS2 anyway and it would be unnecessary work to adapt them to the
new locking scheme in which they aren't needed. As a result OCFS2 needs the
following patch to compile properly with quotas. Sorry to any bisecters which
hit this in advance.
Signed-off-by: Jan Kara <[email protected]>
---
fs/dquot.c | 218 ++++++++++++++++++++++++++--------------------
include/linux/quotaops.h | 2 -
2 files changed, 122 insertions(+), 98 deletions(-)
diff --git a/fs/dquot.c b/fs/dquot.c
index 48c0571..bca3cac 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -87,14 +87,17 @@
#define __DQUOT_PARANOIA
/*
- * There are two quota SMP locks. dq_list_lock protects all lists with quotas
- * and quota formats and also dqstats structure containing statistics about the
- * lists. dq_data_lock protects data from dq_dqb and also mem_dqinfo structures
- * and also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
+ * There are three quota SMP locks. dq_list_lock protects all lists with quotas
+ * and quota formats, dqstats structure containing statistics about the lists
+ * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
+ * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
* i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
- * in inode_add_bytes() and inode_sub_bytes().
+ * in inode_add_bytes() and inode_sub_bytes(). dq_state_lock protects
+ * modifications of quota state (on quotaon and quotaoff) and readers who care
+ * about latest values take it as well.
*
- * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock
+ * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock,
+ * dq_list_lock > dq_state_lock
*
* Note that some things (eg. sb pointer, type, id) doesn't change during
* the life of the dquot structure and so needn't to be protected by a lock
@@ -103,12 +106,7 @@
* operation is just reading pointers from inode (or not using them at all) the
* read lock is enough. If pointers are altered function must hold write lock
* (these locking rules also apply for S_NOQUOTA flag in the inode - note that
- * for altering the flag i_mutex is also needed). If operation is holding
- * reference to dquot in other way (e.g. quotactl ops) it must be guarded by
- * dqonoff_mutex.
- * This locking assures that:
- * a) update/access to dquot pointers in inode is serialized
- * b) everyone is guarded against invalidate_dquots()
+ * for altering the flag i_mutex is also needed).
*
* Each dquot has its dq_lock mutex. Locked dquots might not be referenced
* from inodes (dquot_alloc_space() and such don't check the dq_lock).
@@ -122,10 +120,17 @@
* Lock ordering (including related VFS locks) is the following:
* i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock >
* dqio_mutex
+ * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem >
+ * dqptr_sem. But filesystem has to count with the fact that functions such as
+ * dquot_alloc_space() acquire dqptr_sem and they usually have to be called
+ * from inside a transaction to keep filesystem consistency after a crash. Also
+ * filesystems usually want to do some IO on dquot from ->mark_dirty which is
+ * called with dqptr_sem held.
* i_mutex on quota files is special (it's below dqio_mutex)
*/
static DEFINE_SPINLOCK(dq_list_lock);
+static DEFINE_SPINLOCK(dq_state_lock);
DEFINE_SPINLOCK(dq_data_lock);
static char *quotatypes[] = INITQFNAMES;
@@ -428,7 +433,7 @@ static inline void do_destroy_dquot(struct dquot *dquot)
* quota is disabled and pointers from inodes removed so there cannot be new
* quota users. There can still be some users of quotas due to inodes being
* just deleted or pruned by prune_icache() (those are not attached to any
- * list). We have to wait for such users.
+ * list) or parallel quotactl call. We have to wait for such users.
*/
static void invalidate_dquots(struct super_block *sb, int type)
{
@@ -600,7 +605,6 @@ static struct shrinker dqcache_shrinker = {
/*
* Put reference to dquot
* NOTE: If you change this function please check whether dqput_blocks() works right...
- * MUST be called with either dqptr_sem or dqonoff_mutex held
*/
void dqput(struct dquot *dquot)
{
@@ -697,36 +701,30 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
}
/*
- * Check whether dquot is in memory.
- * MUST be called with either dqptr_sem or dqonoff_mutex held
- */
-int dquot_is_cached(struct super_block *sb, unsigned int id, int type)
-{
- unsigned int hashent = hashfn(sb, id, type);
- int ret = 0;
-
- if (!sb_has_quota_active(sb, type))
- return 0;
- spin_lock(&dq_list_lock);
- if (find_dquot(hashent, sb, id, type) != NODQUOT)
- ret = 1;
- spin_unlock(&dq_list_lock);
- return ret;
-}
-
-/*
* Get reference to dquot
- * MUST be called with either dqptr_sem or dqonoff_mutex held
+ *
+ * Locking is slightly tricky here. We are guarded from parallel quotaoff()
+ * destroying our dquot by:
+ * a) checking for quota flags under dq_list_lock and
+ * b) getting a reference to dquot before we release dq_list_lock
*/
struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
{
unsigned int hashent = hashfn(sb, id, type);
- struct dquot *dquot, *empty = NODQUOT;
+ struct dquot *dquot = NODQUOT, *empty = NODQUOT;
if (!sb_has_quota_active(sb, type))
return NODQUOT;
we_slept:
spin_lock(&dq_list_lock);
+ spin_lock(&dq_state_lock);
+ if (!sb_has_quota_active(sb, type)) {
+ spin_unlock(&dq_state_lock);
+ spin_unlock(&dq_list_lock);
+ goto out;
+ }
+ spin_unlock(&dq_state_lock);
+
if ((dquot = find_dquot(hashent, sb, id, type)) == NODQUOT) {
if (empty == NODQUOT) {
spin_unlock(&dq_list_lock);
@@ -735,6 +733,7 @@ we_slept:
goto we_slept;
}
dquot = empty;
+ empty = NODQUOT;
dquot->dq_id = id;
/* all dquots go on the inuse_list */
put_inuse(dquot);
@@ -749,8 +748,6 @@ we_slept:
dqstats.cache_hits++;
dqstats.lookups++;
spin_unlock(&dq_list_lock);
- if (empty)
- do_destroy_dquot(empty);
}
/* Wait for dq_lock - after this we know that either dquot_release() is already
* finished or it will be canceled due to dq_count > 1 test */
@@ -758,11 +755,15 @@ we_slept:
/* Read the dquot and instantiate it (everything done only if needed) */
if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && sb->dq_op->acquire_dquot(dquot) < 0) {
dqput(dquot);
- return NODQUOT;
+ dquot = NODQUOT;
+ goto out;
}
#ifdef __DQUOT_PARANOIA
BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */
#endif
+out:
+ if (empty)
+ do_destroy_dquot(empty);
return dquot;
}
@@ -1198,63 +1199,76 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
}
/*
* Initialize quota pointers in inode
- * Transaction must be started at entry
+ * We do things in a bit complicated way but by that we avoid calling
+ * dqget() and thus filesystem callbacks under dqptr_sem.
*/
int dquot_initialize(struct inode *inode, int type)
{
unsigned int id = 0;
int cnt, ret = 0;
+ struct dquot *got[MAXQUOTAS] = { NODQUOT, NODQUOT };
+ struct super_block *sb = inode->i_sb;
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode))
return 0;
- down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+
+ /* First get references to structures we might need. */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (type != -1 && cnt != type)
+ continue;
+ switch (cnt) {
+ case USRQUOTA:
+ id = inode->i_uid;
+ break;
+ case GRPQUOTA:
+ id = inode->i_gid;
+ break;
+ }
+ got[cnt] = dqget(sb, id, cnt);
+ }
+
+ down_write(&sb_dqopt(sb)->dqptr_sem);
/* Having dqptr_sem we know NOQUOTA flags can't be altered... */
if (IS_NOQUOTA(inode))
goto out_err;
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
continue;
+ /* Avoid races with quotaoff() */
+ if (!sb_has_quota_active(sb, cnt))
+ continue;
if (inode->i_dquot[cnt] == NODQUOT) {
- switch (cnt) {
- case USRQUOTA:
- id = inode->i_uid;
- break;
- case GRPQUOTA:
- id = inode->i_gid;
- break;
- }
- inode->i_dquot[cnt] = dqget(inode->i_sb, id, cnt);
+ inode->i_dquot[cnt] = got[cnt];
+ got[cnt] = NODQUOT;
}
}
out_err:
- up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ up_write(&sb_dqopt(sb)->dqptr_sem);
+ /* Drop unused references */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ dqput(got[cnt]);
return ret;
}
/*
* Release all quotas referenced by inode
- * Transaction must be started at an entry
*/
-int dquot_drop_locked(struct inode *inode)
+int dquot_drop(struct inode *inode)
{
int cnt;
+ struct dquot *put[MAXQUOTAS];
+ down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] != NODQUOT) {
- dqput(inode->i_dquot[cnt]);
- inode->i_dquot[cnt] = NODQUOT;
- }
+ put[cnt] = inode->i_dquot[cnt];
+ inode->i_dquot[cnt] = NODQUOT;
}
- return 0;
-}
-
-int dquot_drop(struct inode *inode)
-{
- down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
- dquot_drop_locked(inode);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ dqput(put[cnt]);
return 0;
}
@@ -1470,8 +1484,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
qsize_t space;
struct dquot *transfer_from[MAXQUOTAS];
struct dquot *transfer_to[MAXQUOTAS];
- int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
- chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
+ int cnt, ret = QUOTA_OK;
+ int chuid = iattr->ia_valid & ATTR_UID && inode->i_uid != iattr->ia_uid,
+ chgid = iattr->ia_valid & ATTR_GID && inode->i_gid != iattr->ia_gid;
char warntype_to[MAXQUOTAS];
char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
@@ -1479,21 +1494,11 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode))
return QUOTA_OK;
- /* Clear the arrays */
+ /* Initialize the arrays */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
+ transfer_from[cnt] = NODQUOT;
+ transfer_to[cnt] = NODQUOT;
warntype_to[cnt] = QUOTA_NL_NOWARN;
- }
- down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
- /* Now recheck reliably when holding dqptr_sem */
- if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
- up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
- return QUOTA_OK;
- }
- /* First build the transfer_to list - here we can block on
- * reading/instantiating of dquots. We know that the transaction for
- * us was already started so we don't violate lock ranking here */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
switch (cnt) {
case USRQUOTA:
if (!chuid)
@@ -1507,6 +1512,13 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
break;
}
}
+
+ down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ /* Now recheck reliably when holding dqptr_sem */
+ if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
+ up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ goto put_all;
+ }
spin_lock(&dq_data_lock);
space = inode_get_bytes(inode);
/* Build the transfer_from list and check the limits */
@@ -1517,7 +1529,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
warntype_to + cnt) == NO_QUOTA)
- goto warn_put_all;
+ goto over_quota;
}
/*
@@ -1545,28 +1557,37 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
inode->i_dquot[cnt] = transfer_to[cnt];
}
- ret = QUOTA_OK;
-warn_put_all:
spin_unlock(&dq_data_lock);
+ up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+
/* Dirtify all the dquots - this can block when journalling */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (transfer_from[cnt])
mark_dquot_dirty(transfer_from[cnt]);
- if (transfer_to[cnt])
+ if (transfer_to[cnt]) {
mark_dquot_dirty(transfer_to[cnt]);
+ /* The reference we got is transferred to the inode */
+ transfer_to[cnt] = NODQUOT;
+ }
}
+warn_put_all:
flush_warnings(transfer_to, warntype_to);
flush_warnings(transfer_from, warntype_from_inodes);
flush_warnings(transfer_from, warntype_from_space);
-
+put_all:
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
- dqput(transfer_from[cnt]);
- if (ret == NO_QUOTA && transfer_to[cnt] != NODQUOT)
- dqput(transfer_to[cnt]);
+ dqput(transfer_from[cnt]);
+ dqput(transfer_to[cnt]);
}
- up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
return ret;
+over_quota:
+ spin_unlock(&dq_data_lock);
+ up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ /* Clear dquot pointers we don't want to dqput() */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ transfer_from[cnt] = NODQUOT;
+ ret = NO_QUOTA;
+ goto warn_put_all;
}
/* Wrapper for transferring ownership of an inode */
@@ -1651,19 +1672,24 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
continue;
if (flags & DQUOT_SUSPENDED) {
+ spin_lock(&dq_state_lock);
dqopt->flags |=
dquot_state_flag(DQUOT_SUSPENDED, cnt);
+ spin_unlock(&dq_state_lock);
} else {
+ spin_lock(&dq_state_lock);
dqopt->flags &= ~dquot_state_flag(flags, cnt);
/* Turning off suspended quotas? */
if (!sb_has_quota_loaded(sb, cnt) &&
sb_has_quota_suspended(sb, cnt)) {
dqopt->flags &= ~dquot_state_flag(
DQUOT_SUSPENDED, cnt);
+ spin_unlock(&dq_state_lock);
iput(dqopt->files[cnt]);
dqopt->files[cnt] = NULL;
continue;
}
+ spin_unlock(&dq_state_lock);
}
/* We still have to keep quota loaded? */
@@ -1830,7 +1856,9 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
}
mutex_unlock(&dqopt->dqio_mutex);
mutex_unlock(&inode->i_mutex);
+ spin_lock(&dq_state_lock);
dqopt->flags |= dquot_state_flag(flags, type);
+ spin_unlock(&dq_state_lock);
add_dquot_ref(sb, type);
mutex_unlock(&dqopt->dqonoff_mutex);
@@ -1872,9 +1900,11 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
}
inode = dqopt->files[type];
dqopt->files[type] = NULL;
+ spin_lock(&dq_state_lock);
flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
DQUOT_LIMITS_ENABLED, type);
dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type);
+ spin_unlock(&dq_state_lock);
mutex_unlock(&dqopt->dqonoff_mutex);
flags = dquot_generic_flag(flags, type);
@@ -1952,7 +1982,9 @@ int vfs_quota_enable(struct inode *inode, int type, int format_id,
ret = -EBUSY;
goto out_lock;
}
+ spin_lock(&dq_state_lock);
sb_dqopt(sb)->flags |= dquot_state_flag(flags, type);
+ spin_unlock(&dq_state_lock);
out_lock:
mutex_unlock(&dqopt->dqonoff_mutex);
return ret;
@@ -2039,14 +2071,12 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
{
struct dquot *dquot;
- mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
- if (!(dquot = dqget(sb, id, type))) {
- mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+ dquot = dqget(sb, id, type);
+ if (dquot == NODQUOT)
return -ESRCH;
- }
do_get_dqblk(dquot, di);
dqput(dquot);
- mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+
return 0;
}
@@ -2130,7 +2160,6 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
struct dquot *dquot;
int rc;
- mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
dquot = dqget(sb, id, type);
if (!dquot) {
rc = -ESRCH;
@@ -2139,7 +2168,6 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
rc = do_set_dqblk(dquot, di);
dqput(dquot);
out:
- mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return rc;
}
@@ -2370,11 +2398,9 @@ EXPORT_SYMBOL(dquot_release);
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
EXPORT_SYMBOL(dquot_initialize);
EXPORT_SYMBOL(dquot_drop);
-EXPORT_SYMBOL(dquot_drop_locked);
EXPORT_SYMBOL(vfs_dq_drop);
EXPORT_SYMBOL(dqget);
EXPORT_SYMBOL(dqput);
-EXPORT_SYMBOL(dquot_is_cached);
EXPORT_SYMBOL(dquot_alloc_space);
EXPORT_SYMBOL(dquot_alloc_inode);
EXPORT_SYMBOL(dquot_free_space);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 21b781a..0b35b3a 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -24,10 +24,8 @@ void sync_dquots(struct super_block *sb, int type);
int dquot_initialize(struct inode *inode, int type);
int dquot_drop(struct inode *inode);
-int dquot_drop_locked(struct inode *inode);
struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
void dqput(struct dquot *dquot);
-int dquot_is_cached(struct super_block *sb, unsigned int id, int type);
int dquot_scan_active(struct super_block *sb,
int (*fn)(struct dquot *dquot, unsigned long priv),
unsigned long priv);
--
1.6.0.2
在 2009-01-16五的 19:08 +0100,Jan Kara写道:
> ext4_dquot_initialize() and ext4_dquot_drop() is no longer
> needed because of modified quota locking.
>
> Signed-off-by: Jan Kara <[email protected]>
Reviewed_by: Mingming Cao <[email protected]>
> ---
> fs/ext4/super.c | 44 ++------------------------------------------
> 1 files changed, 2 insertions(+), 42 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index e5f06a5..f0785fd 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -926,8 +926,6 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_
> #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
> #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
>
> -static int ext4_dquot_initialize(struct inode *inode, int type);
> -static int ext4_dquot_drop(struct inode *inode);
> static int ext4_write_dquot(struct dquot *dquot);
> static int ext4_acquire_dquot(struct dquot *dquot);
> static int ext4_release_dquot(struct dquot *dquot);
> @@ -942,8 +940,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
> const char *data, size_t len, loff_t off);
>
> static struct dquot_operations ext4_quota_operations = {
> - .initialize = ext4_dquot_initialize,
> - .drop = ext4_dquot_drop,
> + .initialize = dquot_initialize,
> + .drop = dquot_drop,
> .alloc_space = dquot_alloc_space,
> .alloc_inode = dquot_alloc_inode,
> .free_space = dquot_free_space,
> @@ -3378,44 +3376,6 @@ static inline struct inode *dquot_to_inode(struct dquot *dquot)
> return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
> }
>
> -static int ext4_dquot_initialize(struct inode *inode, int type)
> -{
> - handle_t *handle;
> - int ret, err;
> -
> - /* We may create quota structure so we need to reserve enough blocks */
> - handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb));
> - if (IS_ERR(handle))
> - return PTR_ERR(handle);
> - ret = dquot_initialize(inode, type);
> - err = ext4_journal_stop(handle);
> - if (!ret)
> - ret = err;
> - return ret;
> -}
> -
> -static int ext4_dquot_drop(struct inode *inode)
> -{
> - handle_t *handle;
> - int ret, err;
> -
> - /* We may delete quota structure so we need to reserve enough blocks */
> - handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
> - if (IS_ERR(handle)) {
> - /*
> - * We call dquot_drop() anyway to at least release references
> - * to quota structures so that umount does not hang.
> - */
> - dquot_drop(inode);
> - return PTR_ERR(handle);
> - }
> - ret = dquot_drop(inode);
> - err = ext4_journal_stop(handle);
> - if (!ret)
> - ret = err;
> - return ret;
> -}
> -
> static int ext4_write_dquot(struct dquot *dquot)
> {
> int ret, err;
在 2009-01-16五的 19:08 +0100,Jan Kara写道:
> ext3_dquot_initialize() and ext3_dquot_drop() is no longer
> needed because of modified quota locking.
>
> Signed-off-by: Jan Kara <[email protected]>
Reviewed_by: Mingming Cao <[email protected]>
> ---
> fs/ext3/super.c | 44 ++------------------------------------------
> 1 files changed, 2 insertions(+), 42 deletions(-)
>
> diff --git a/fs/ext3/super.c b/fs/ext3/super.c
> index b70d90e..ec410a9 100644
> --- a/fs/ext3/super.c
> +++ b/fs/ext3/super.c
> @@ -707,8 +707,6 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
> #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
> #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
>
> -static int ext3_dquot_initialize(struct inode *inode, int type);
> -static int ext3_dquot_drop(struct inode *inode);
> static int ext3_write_dquot(struct dquot *dquot);
> static int ext3_acquire_dquot(struct dquot *dquot);
> static int ext3_release_dquot(struct dquot *dquot);
> @@ -723,8 +721,8 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
> const char *data, size_t len, loff_t off);
>
> static struct dquot_operations ext3_quota_operations = {
> - .initialize = ext3_dquot_initialize,
> - .drop = ext3_dquot_drop,
> + .initialize = dquot_initialize,
> + .drop = dquot_drop,
> .alloc_space = dquot_alloc_space,
> .alloc_inode = dquot_alloc_inode,
> .free_space = dquot_free_space,
> @@ -2713,44 +2711,6 @@ static inline struct inode *dquot_to_inode(struct dquot *dquot)
> return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
> }
>
> -static int ext3_dquot_initialize(struct inode *inode, int type)
> -{
> - handle_t *handle;
> - int ret, err;
> -
> - /* We may create quota structure so we need to reserve enough blocks */
> - handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
> - if (IS_ERR(handle))
> - return PTR_ERR(handle);
> - ret = dquot_initialize(inode, type);
> - err = ext3_journal_stop(handle);
> - if (!ret)
> - ret = err;
> - return ret;
> -}
> -
> -static int ext3_dquot_drop(struct inode *inode)
> -{
> - handle_t *handle;
> - int ret, err;
> -
> - /* We may delete quota structure so we need to reserve enough blocks */
> - handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
> - if (IS_ERR(handle)) {
> - /*
> - * We call dquot_drop() anyway to at least release references
> - * to quota structures so that umount does not hang.
> - */
> - dquot_drop(inode);
> - return PTR_ERR(handle);
> - }
> - ret = dquot_drop(inode);
> - err = ext3_journal_stop(handle);
> - if (!ret)
> - ret = err;
> - return ret;
> -}
> -
> static int ext3_write_dquot(struct dquot *dquot)
> {
> int ret, err;
On Fri, 16 Jan 2009 19:08:09 +0100 Jan Kara <[email protected]> wrote:
> static DEFINE_SPINLOCK(dq_list_lock);
> +static DEFINE_SPINLOCK(dq_state_lock);
> DEFINE_SPINLOCK(dq_data_lock);
The chances are very good that two or even three of these locks will
all get placed into the same cacheline in main memory. The effects
will be quite bad if different CPUs (or, worse, different nodes) are
taking these locks.
For single, kernel-wide locks like these I think we should almost
always pad out to a cacheline.
With __cacheline_aligned_in_smp, rather than __cacheline_aligned.
Because spinlocks do take space even in uniprocessor builds.
There are probably lots of existing locks which should be converted.
On Fri 23-01-09 23:49:12, Andrew Morton wrote:
> On Fri, 16 Jan 2009 19:08:09 +0100 Jan Kara <[email protected]> wrote:
>
> > static DEFINE_SPINLOCK(dq_list_lock);
> > +static DEFINE_SPINLOCK(dq_state_lock);
> > DEFINE_SPINLOCK(dq_data_lock);
>
> The chances are very good that two or even three of these locks will
> all get placed into the same cacheline in main memory. The effects
> will be quite bad if different CPUs (or, worse, different nodes) are
> taking these locks.
>
> For single, kernel-wide locks like these I think we should almost
> always pad out to a cacheline.
I never thought about this. Thanks for the idea.
> With __cacheline_aligned_in_smp, rather than __cacheline_aligned.
> Because spinlocks do take space even in uniprocessor builds.
I've added this to my list of quota cleanups.
Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR