Hello,
this is the second version of my patches to fix the race in ext4 xattr handling
that led to assertion failure in jbd2 Ritesh has reported. The series is
completely reworked. This time it passes beating with "stress-ng --xattr 16".
Also I'm somewhat happier about the current solution because, although it is
still not trivial to use mbcache correctly, it is at least harder to use it
in a racy way :). Please let me know what you think about this series.
Changes since v1:
* Reworked the series to fix all corner cases and make API less errorprone.
Honza
Previous versions:
Link: http://lore.kernel.org/r/[email protected] # v1
Currently we remove EA inode from mbcache as soon as its xattr refcount
drops to zero. However there can be pending attempts to reuse the inode
and thus refcount handling code has to handle the situation when
refcount increases from zero anyway. So save some work and just keep EA
inode in mbcache until it is getting evicted. At that moment we are sure
following iget() of EA inode will fail anyway (or wait for eviction to
finish and load things from the disk again) and so removing mbcache
entry at that moment is fine and simplifies the code a bit.
CC: [email protected]
Fixes: 82939d7999df ("ext4: convert to mbcache2")
Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/inode.c | 2 ++
fs/ext4/xattr.c | 24 ++++++++----------------
fs/ext4/xattr.h | 1 +
3 files changed, 11 insertions(+), 16 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3dce7d058985..7450ee734262 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -177,6 +177,8 @@ void ext4_evict_inode(struct inode *inode)
trace_ext4_evict_inode(inode);
+ if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
+ ext4_evict_ea_inode(inode);
if (inode->i_nlink) {
/*
* When journalling data dirty buffers are tracked only in the
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 042325349098..7fc40fb1e6b3 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -436,6 +436,14 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
return err;
}
+/* Remove entry from mbcache when EA inode is getting evicted */
+void ext4_evict_ea_inode(struct inode *inode)
+{
+ if (EA_INODE_CACHE(inode))
+ mb_cache_entry_delete(EA_INODE_CACHE(inode),
+ ext4_xattr_inode_get_hash(inode), inode->i_ino);
+}
+
static int
ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
struct ext4_xattr_entry *entry, void *buffer,
@@ -976,10 +984,8 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
int ref_change)
{
- struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
struct ext4_iloc iloc;
s64 ref_count;
- u32 hash;
int ret;
inode_lock(ea_inode);
@@ -1002,14 +1008,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
set_nlink(ea_inode, 1);
ext4_orphan_del(handle, ea_inode);
-
- if (ea_inode_cache) {
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_create(ea_inode_cache,
- GFP_NOFS, hash,
- ea_inode->i_ino,
- true /* reusable */);
- }
}
} else {
WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
@@ -1022,12 +1020,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
clear_nlink(ea_inode);
ext4_orphan_add(handle, ea_inode);
-
- if (ea_inode_cache) {
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_delete(ea_inode_cache, hash,
- ea_inode->i_ino);
- }
}
}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 77efb9a627ad..060b7a2f485c 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -178,6 +178,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
+extern void ext4_evict_ea_inode(struct inode *inode);
extern const struct xattr_handler *ext4_xattr_handlers[];
--
2.35.3
Add function mb_cache_entry_try_delete() to delete mbcache entry if it
is unused and also add a function to wait for entry to become unused -
mb_cache_entry_wait_unused(). We do not share code between the two
deleting function as one of them will go away soon.
CC: [email protected]
Fixes: 82939d7999df ("ext4: convert to mbcache2")
Signed-off-by: Jan Kara <[email protected]>
---
fs/mbcache.c | 63 ++++++++++++++++++++++++++++++++++++++++-
include/linux/mbcache.h | 10 ++++++-
2 files changed, 71 insertions(+), 2 deletions(-)
diff --git a/fs/mbcache.c b/fs/mbcache.c
index cfc28129fb6f..1ae66b2c75f4 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -125,6 +125,19 @@ void __mb_cache_entry_free(struct mb_cache_entry *entry)
}
EXPORT_SYMBOL(__mb_cache_entry_free);
+/*
+ * mb_cache_entry_wait_unused - wait to be the last user of the entry
+ *
+ * @entry - entry to work on
+ *
+ * Wait to be the last user of the entry.
+ */
+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
+{
+ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
+}
+EXPORT_SYMBOL(mb_cache_entry_wait_unused);
+
static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
struct mb_cache_entry *entry,
u32 key)
@@ -217,7 +230,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
}
EXPORT_SYMBOL(mb_cache_entry_get);
-/* mb_cache_entry_delete - remove a cache entry
+/* mb_cache_entry_delete - try to remove a cache entry
* @cache - cache we work with
* @key - key
* @value - value
@@ -254,6 +267,54 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
}
EXPORT_SYMBOL(mb_cache_entry_delete);
+/* mb_cache_entry_try_delete - try to remove a cache entry
+ * @cache - cache we work with
+ * @key - key
+ * @value - value
+ *
+ * Remove entry from cache @cache with key @key and value @value. The removal
+ * happens only if the entry is unused. The function returns NULL in case the
+ * entry was successfully removed or there's no entry in cache. Otherwise the
+ * function returns the entry that we failed to delete because it has users.
+ */
+struct mb_cache_entry *mb_cache_entry_try_delete(struct mb_cache *cache,
+ u32 key, u64 value)
+{
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+ struct mb_cache_entry *entry;
+
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+ if (entry->e_key == key && entry->e_value == value) {
+ if (atomic_read(&entry->e_refcnt) > 2) {
+ atomic_inc(&entry->e_refcnt);
+ hlist_bl_unlock(head);
+ return entry;
+ }
+ /* We keep hash list reference to keep entry alive */
+ hlist_bl_del_init(&entry->e_hash_list);
+ hlist_bl_unlock(head);
+ spin_lock(&cache->c_list_lock);
+ if (!list_empty(&entry->e_list)) {
+ list_del_init(&entry->e_list);
+ if (!WARN_ONCE(cache->c_entry_count == 0,
+ "mbcache: attempt to decrement c_entry_count past zero"))
+ cache->c_entry_count--;
+ atomic_dec(&entry->e_refcnt);
+ }
+ spin_unlock(&cache->c_list_lock);
+ mb_cache_entry_put(cache, entry);
+ return NULL;
+ }
+ }
+ hlist_bl_unlock(head);
+
+ return NULL;
+}
+EXPORT_SYMBOL(mb_cache_entry_try_delete);
+
/* mb_cache_entry_touch - cache entry got used
* @cache - cache the entry belongs to
* @entry - entry that got used
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 20f1e3ff6013..1176fdfb8d53 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache);
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
u64 value, bool reusable);
void __mb_cache_entry_free(struct mb_cache_entry *entry);
+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
static inline int mb_cache_entry_put(struct mb_cache *cache,
struct mb_cache_entry *entry)
{
- if (!atomic_dec_and_test(&entry->e_refcnt))
+ unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
+
+ if (cnt > 0) {
+ if (cnt <= 3)
+ wake_up_var(&entry->e_refcnt);
return 0;
+ }
__mb_cache_entry_free(entry);
return 1;
}
+struct mb_cache_entry *mb_cache_entry_try_delete(struct mb_cache *cache,
+ u32 key, u64 value);
void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
u64 value);
--
2.35.3
Use the fact that entries with elevated refcount are not removed from
the hash and just move removal of the entry from the hash to the entry
freeing time. When doing this we also change the generic code to hold
one reference to the cache entry, not two of them, which makes code
somewhat more obvious.
Signed-off-by: Jan Kara <[email protected]>
---
fs/mbcache.c | 108 +++++++++++++++-------------------------
include/linux/mbcache.h | 25 ++++++----
2 files changed, 55 insertions(+), 78 deletions(-)
diff --git a/fs/mbcache.c b/fs/mbcache.c
index c7b28a4e96da..b854ad93d6c9 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -90,7 +90,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
return -ENOMEM;
INIT_LIST_HEAD(&entry->e_list);
- /* One ref for hash, one ref returned */
+ /* Initial hash reference */
atomic_set(&entry->e_refcnt, 1);
entry->e_key = key;
entry->e_value = value;
@@ -106,21 +106,28 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
}
}
hlist_bl_add_head(&entry->e_hash_list, head);
- hlist_bl_unlock(head);
-
+ /*
+ * Add entry to LRU list before it can be found by
+ * mb_cache_entry_delete() to avoid races
+ */
spin_lock(&cache->c_list_lock);
list_add_tail(&entry->e_list, &cache->c_list);
- /* Grab ref for LRU list */
- atomic_inc(&entry->e_refcnt);
cache->c_entry_count++;
spin_unlock(&cache->c_list_lock);
+ hlist_bl_unlock(head);
return 0;
}
EXPORT_SYMBOL(mb_cache_entry_create);
-void __mb_cache_entry_free(struct mb_cache_entry *entry)
+void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
{
+ struct hlist_bl_head *head;
+
+ head = mb_cache_entry_head(cache, entry->e_key);
+ hlist_bl_lock(head);
+ hlist_bl_del(&entry->e_hash_list);
+ hlist_bl_unlock(head);
kmem_cache_free(mb_entry_cache, entry);
}
EXPORT_SYMBOL(__mb_cache_entry_free);
@@ -134,7 +141,7 @@ EXPORT_SYMBOL(__mb_cache_entry_free);
*/
void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
{
- wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
+ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
}
EXPORT_SYMBOL(mb_cache_entry_wait_unused);
@@ -155,10 +162,9 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
while (node) {
entry = hlist_bl_entry(node, struct mb_cache_entry,
e_hash_list);
- if (entry->e_key == key && entry->e_reusable) {
- atomic_inc(&entry->e_refcnt);
+ if (entry->e_key == key && entry->e_reusable &&
+ atomic_inc_not_zero(&entry->e_refcnt))
goto out;
- }
node = node->next;
}
entry = NULL;
@@ -218,10 +224,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
head = mb_cache_entry_head(cache, key);
hlist_bl_lock(head);
hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
- if (entry->e_key == key && entry->e_value == value) {
- atomic_inc(&entry->e_refcnt);
+ if (entry->e_key == key && entry->e_value == value &&
+ atomic_inc_not_zero(&entry->e_refcnt))
goto out;
- }
}
entry = NULL;
out:
@@ -243,37 +248,25 @@ EXPORT_SYMBOL(mb_cache_entry_get);
struct mb_cache_entry *mb_cache_entry_try_delete(struct mb_cache *cache,
u32 key, u64 value)
{
- struct hlist_bl_node *node;
- struct hlist_bl_head *head;
struct mb_cache_entry *entry;
- head = mb_cache_entry_head(cache, key);
- hlist_bl_lock(head);
- hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
- if (entry->e_key == key && entry->e_value == value) {
- if (atomic_read(&entry->e_refcnt) > 2) {
- atomic_inc(&entry->e_refcnt);
- hlist_bl_unlock(head);
- return entry;
- }
- /* We keep hash list reference to keep entry alive */
- hlist_bl_del_init(&entry->e_hash_list);
- hlist_bl_unlock(head);
- spin_lock(&cache->c_list_lock);
- if (!list_empty(&entry->e_list)) {
- list_del_init(&entry->e_list);
- if (!WARN_ONCE(cache->c_entry_count == 0,
- "mbcache: attempt to decrement c_entry_count past zero"))
- cache->c_entry_count--;
- atomic_dec(&entry->e_refcnt);
- }
- spin_unlock(&cache->c_list_lock);
- mb_cache_entry_put(cache, entry);
- return NULL;
- }
- }
- hlist_bl_unlock(head);
+ entry = mb_cache_entry_get(cache, key, value);
+ if (!entry)
+ return NULL;
+
+ /*
+ * Drop the ref we got from mb_cache_entry_get() and the initial hash
+ * ref if we are the last user
+ */
+ if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
+ return entry;
+ spin_lock(&cache->c_list_lock);
+ if (!list_empty(&entry->e_list))
+ list_del_init(&entry->e_list);
+ cache->c_entry_count--;
+ spin_unlock(&cache->c_list_lock);
+ __mb_cache_entry_free(cache, entry);
return NULL;
}
EXPORT_SYMBOL(mb_cache_entry_try_delete);
@@ -305,42 +298,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
unsigned long nr_to_scan)
{
struct mb_cache_entry *entry;
- struct hlist_bl_head *head;
unsigned long shrunk = 0;
spin_lock(&cache->c_list_lock);
while (nr_to_scan-- && !list_empty(&cache->c_list)) {
entry = list_first_entry(&cache->c_list,
struct mb_cache_entry, e_list);
- if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) {
+ /* Drop initial hash reference if there is no user */
+ if (entry->e_referenced ||
+ atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
entry->e_referenced = 0;
list_move_tail(&entry->e_list, &cache->c_list);
continue;
}
list_del_init(&entry->e_list);
cache->c_entry_count--;
- /*
- * We keep LRU list reference so that entry doesn't go away
- * from under us.
- */
spin_unlock(&cache->c_list_lock);
- head = mb_cache_entry_head(cache, entry->e_key);
- hlist_bl_lock(head);
- /* Now a reliable check if the entry didn't get used... */
- if (atomic_read(&entry->e_refcnt) > 2) {
- hlist_bl_unlock(head);
- spin_lock(&cache->c_list_lock);
- list_add_tail(&entry->e_list, &cache->c_list);
- cache->c_entry_count++;
- continue;
- }
- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
- hlist_bl_del_init(&entry->e_hash_list);
- atomic_dec(&entry->e_refcnt);
- }
- hlist_bl_unlock(head);
- if (mb_cache_entry_put(cache, entry))
- shrunk++;
+ __mb_cache_entry_free(cache, entry);
+ shrunk++;
cond_resched();
spin_lock(&cache->c_list_lock);
}
@@ -432,11 +407,6 @@ void mb_cache_destroy(struct mb_cache *cache)
* point.
*/
list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
- hlist_bl_del_init(&entry->e_hash_list);
- atomic_dec(&entry->e_refcnt);
- } else
- WARN_ON(1);
list_del(&entry->e_list);
WARN_ON(atomic_read(&entry->e_refcnt) != 1);
mb_cache_entry_put(cache, entry);
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 3b25c3004ea9..87155712310c 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -13,8 +13,16 @@ struct mb_cache;
struct mb_cache_entry {
/* List of entries in cache - protected by cache->c_list_lock */
struct list_head e_list;
- /* Hash table list - protected by hash chain bitlock */
+ /*
+ * Hash table list - protected by hash chain bitlock. The entry is
+ * guaranteed to be hashed while e_refcnt > 0.
+ */
struct hlist_bl_node e_hash_list;
+ /*
+ * Entry refcount. Once it reaches zero, entry is unhashed and freed.
+ * While refcount > 0, the entry is guaranteed to stay in the hash and
+ * e.g. mb_cache_entry_try_delete() will fail.
+ */
atomic_t e_refcnt;
/* Key in hash - stable during lifetime of the entry */
u32 e_key;
@@ -29,22 +37,21 @@ void mb_cache_destroy(struct mb_cache *cache);
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
u64 value, bool reusable);
-void __mb_cache_entry_free(struct mb_cache_entry *entry);
+void __mb_cache_entry_free(struct mb_cache *cache,
+ struct mb_cache_entry *entry);
void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
-static inline int mb_cache_entry_put(struct mb_cache *cache,
- struct mb_cache_entry *entry)
+static inline void mb_cache_entry_put(struct mb_cache *cache,
+ struct mb_cache_entry *entry)
{
unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
if (cnt > 0) {
- if (cnt <= 3)
+ if (cnt <= 2)
wake_up_var(&entry->e_refcnt);
- return 0;
+ return;
}
- __mb_cache_entry_free(entry);
- return 1;
+ __mb_cache_entry_free(cache, entry);
}
-
struct mb_cache_entry *mb_cache_entry_try_delete(struct mb_cache *cache,
u32 key, u64 value);
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
--
2.35.3
Free of xattr block reference is opencode in two places. Factor it out
into a separate function and use it.
Signed-off-by: Jan Kara <[email protected]>
---
fs/ext2/xattr.c | 90 +++++++++++++++++++++----------------------------
1 file changed, 38 insertions(+), 52 deletions(-)
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 841fa6d9d744..9885294993ef 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -651,6 +651,42 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
return error;
}
+static void ext2_xattr_release_block(struct inode *inode,
+ struct buffer_head *bh)
+{
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
+
+ lock_buffer(bh);
+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for
+ * ext2_xattr_set2() to reliably detect freed block
+ */
+ mb_cache_entry_delete(ea_block_cache, hash,
+ bh->b_blocknr);
+ /* Free the old block. */
+ ea_bdebug(bh, "freeing");
+ ext2_free_blocks(inode, bh->b_blocknr, 1);
+ /* We let our caller release bh, so we
+ * need to duplicate the buffer before. */
+ get_bh(bh);
+ bforget(bh);
+ unlock_buffer(bh);
+ } else {
+ /* Decrement the refcount only. */
+ le32_add_cpu(&HDR(bh)->h_refcount, -1);
+ dquot_free_block(inode, 1);
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+ ea_bdebug(bh, "refcount now=%d",
+ le32_to_cpu(HDR(bh)->h_refcount));
+ if (IS_SYNC(inode))
+ sync_dirty_buffer(bh);
+ }
+}
+
/*
* Second half of ext2_xattr_set(): Update the file system.
*/
@@ -747,34 +783,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
* If there was an old block and we are no longer using it,
* release the old block.
*/
- lock_buffer(old_bh);
- if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
- __u32 hash = le32_to_cpu(HDR(old_bh)->h_hash);
-
- /*
- * This must happen under buffer lock for
- * ext2_xattr_set2() to reliably detect freed block
- */
- mb_cache_entry_delete(ea_block_cache, hash,
- old_bh->b_blocknr);
- /* Free the old block. */
- ea_bdebug(old_bh, "freeing");
- ext2_free_blocks(inode, old_bh->b_blocknr, 1);
- mark_inode_dirty(inode);
- /* We let our caller release old_bh, so we
- * need to duplicate the buffer before. */
- get_bh(old_bh);
- bforget(old_bh);
- } else {
- /* Decrement the refcount only. */
- le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
- dquot_free_block_nodirty(inode, 1);
- mark_inode_dirty(inode);
- mark_buffer_dirty(old_bh);
- ea_bdebug(old_bh, "refcount now=%d",
- le32_to_cpu(HDR(old_bh)->h_refcount));
- }
- unlock_buffer(old_bh);
+ ext2_xattr_release_block(inode, old_bh);
}
cleanup:
@@ -828,30 +837,7 @@ ext2_xattr_delete_inode(struct inode *inode)
EXT2_I(inode)->i_file_acl);
goto cleanup;
}
- lock_buffer(bh);
- if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
- __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
-
- /*
- * This must happen under buffer lock for ext2_xattr_set2() to
- * reliably detect freed block
- */
- mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
- bh->b_blocknr);
- ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
- get_bh(bh);
- bforget(bh);
- unlock_buffer(bh);
- } else {
- le32_add_cpu(&HDR(bh)->h_refcount, -1);
- ea_bdebug(bh, "refcount now=%d",
- le32_to_cpu(HDR(bh)->h_refcount));
- unlock_buffer(bh);
- mark_buffer_dirty(bh);
- if (IS_SYNC(inode))
- sync_dirty_buffer(bh);
- dquot_free_block_nodirty(inode, 1);
- }
+ ext2_xattr_release_block(inode, bh);
EXT2_I(inode)->i_file_acl = 0;
cleanup:
--
2.35.3
Nobody uses mb_cache_entry_delete() anymore. Remove it.
Signed-off-by: Jan Kara <[email protected]>
---
fs/mbcache.c | 41 ++---------------------------------------
include/linux/mbcache.h | 1 -
2 files changed, 2 insertions(+), 40 deletions(-)
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 1ae66b2c75f4..c7b28a4e96da 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -11,7 +11,7 @@
/*
* Mbcache is a simple key-value store. Keys need not be unique, however
* key-value pairs are expected to be unique (we use this fact in
- * mb_cache_entry_delete()).
+ * mb_cache_entry_try_delete()).
*
* Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
* Ext4 also uses it for deduplication of xattr values stored in inodes.
@@ -230,44 +230,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
}
EXPORT_SYMBOL(mb_cache_entry_get);
-/* mb_cache_entry_delete - try to remove a cache entry
- * @cache - cache we work with
- * @key - key
- * @value - value
- *
- * Remove entry from cache @cache with key @key and value @value.
- */
-void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
-{
- struct hlist_bl_node *node;
- struct hlist_bl_head *head;
- struct mb_cache_entry *entry;
-
- head = mb_cache_entry_head(cache, key);
- hlist_bl_lock(head);
- hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
- if (entry->e_key == key && entry->e_value == value) {
- /* We keep hash list reference to keep entry alive */
- hlist_bl_del_init(&entry->e_hash_list);
- hlist_bl_unlock(head);
- spin_lock(&cache->c_list_lock);
- if (!list_empty(&entry->e_list)) {
- list_del_init(&entry->e_list);
- if (!WARN_ONCE(cache->c_entry_count == 0,
- "mbcache: attempt to decrement c_entry_count past zero"))
- cache->c_entry_count--;
- atomic_dec(&entry->e_refcnt);
- }
- spin_unlock(&cache->c_list_lock);
- mb_cache_entry_put(cache, entry);
- return;
- }
- }
- hlist_bl_unlock(head);
-}
-EXPORT_SYMBOL(mb_cache_entry_delete);
-
-/* mb_cache_entry_try_delete - try to remove a cache entry
+/* mb_cache_entry_try_delete - remove a cache entry
* @cache - cache we work with
* @key - key
* @value - value
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 1176fdfb8d53..3b25c3004ea9 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -47,7 +47,6 @@ static inline int mb_cache_entry_put(struct mb_cache *cache,
struct mb_cache_entry *mb_cache_entry_try_delete(struct mb_cache *cache,
u32 key, u64 value);
-void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
u64 value);
struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
--
2.35.3
On 22/06/14 06:05PM, Jan Kara wrote:
> Hello,
>
> this is the second version of my patches to fix the race in ext4 xattr handling
> that led to assertion failure in jbd2 Ritesh has reported. The series is
> completely reworked. This time it passes beating with "stress-ng --xattr 16".
> Also I'm somewhat happier about the current solution because, although it is
> still not trivial to use mbcache correctly, it is at least harder to use it
> in a racy way :). Please let me know what you think about this series.
I have tested this on my setup where I was able to reproduce the problem with
stress-ng. It ran for several hours and also passed fstests (quick).
So feel free to -
Tested-by: Ritesh Harjani (IBM) <[email protected]>
-ritesh
>
> Changes since v1:
> * Reworked the series to fix all corner cases and make API less errorprone.
>
> Honza
>
> Previous versions:
> Link: http://lore.kernel.org/r/[email protected] # v1
On Thu 16-06-22 17:24:40, Ritesh Harjani wrote:
> On 22/06/14 06:05PM, Jan Kara wrote:
> > Hello,
> >
> > this is the second version of my patches to fix the race in ext4 xattr handling
> > that led to assertion failure in jbd2 Ritesh has reported. The series is
> > completely reworked. This time it passes beating with "stress-ng --xattr 16".
> > Also I'm somewhat happier about the current solution because, although it is
> > still not trivial to use mbcache correctly, it is at least harder to use it
> > in a racy way :). Please let me know what you think about this series.
>
> I have tested this on my setup where I was able to reproduce the problem with
> stress-ng. It ran for several hours and also passed fstests (quick).
>
> So feel free to -
> Tested-by: Ritesh Harjani (IBM) <[email protected]>
Thanks for testing!
Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR
On 22/06/14 06:05PM, Jan Kara wrote:
> Add function mb_cache_entry_try_delete() to delete mbcache entry if it
> is unused and also add a function to wait for entry to become unused -
> mb_cache_entry_wait_unused(). We do not share code between the two
> deleting function as one of them will go away soon.
>
> CC: [email protected]
> Fixes: 82939d7999df ("ext4: convert to mbcache2")
> Signed-off-by: Jan Kara <[email protected]>
> ---
> fs/mbcache.c | 63 ++++++++++++++++++++++++++++++++++++++++-
> include/linux/mbcache.h | 10 ++++++-
> 2 files changed, 71 insertions(+), 2 deletions(-)
>
> diff --git a/fs/mbcache.c b/fs/mbcache.c
> index cfc28129fb6f..1ae66b2c75f4 100644
> --- a/fs/mbcache.c
> +++ b/fs/mbcache.c
> @@ -125,6 +125,19 @@ void __mb_cache_entry_free(struct mb_cache_entry *entry)
> }
> EXPORT_SYMBOL(__mb_cache_entry_free);
>
> +/*
> + * mb_cache_entry_wait_unused - wait to be the last user of the entry
> + *
> + * @entry - entry to work on
> + *
> + * Wait to be the last user of the entry.
> + */
> +void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
> +{
> + wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
> +}
> +EXPORT_SYMBOL(mb_cache_entry_wait_unused);
> +
> static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
> struct mb_cache_entry *entry,
> u32 key)
> @@ -217,7 +230,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
> }
> EXPORT_SYMBOL(mb_cache_entry_get);
>
> -/* mb_cache_entry_delete - remove a cache entry
> +/* mb_cache_entry_delete - try to remove a cache entry
> * @cache - cache we work with
> * @key - key
> * @value - value
> @@ -254,6 +267,54 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
> }
> EXPORT_SYMBOL(mb_cache_entry_delete);
>
> +/* mb_cache_entry_try_delete - try to remove a cache entry
> + * @cache - cache we work with
> + * @key - key
> + * @value - value
> + *
> + * Remove entry from cache @cache with key @key and value @value. The removal
> + * happens only if the entry is unused. The function returns NULL in case the
> + * entry was successfully removed or there's no entry in cache. Otherwise the
> + * function returns the entry that we failed to delete because it has users.
"...Also increment it's refcnt in case if the entry is returned. So the caller
is responsible to call for mb_cache_entry_put() later."
Do you think comment should be added too? And the api naming should be
mb_cache_entry_try_delete_or_get().
Looks like e_refcnt increment is done quitely in case of the entry is found
where as the api just says mb_cache_entry_try_delete().
Other than that, all other code logic looks right to me.
-ritesh
> + */
> +struct mb_cache_entry *mb_cache_entry_try_delete(struct mb_cache *cache,
> + u32 key, u64 value)
> +{
> + struct hlist_bl_node *node;
> + struct hlist_bl_head *head;
> + struct mb_cache_entry *entry;
> +
> + head = mb_cache_entry_head(cache, key);
> + hlist_bl_lock(head);
> + hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
> + if (entry->e_key == key && entry->e_value == value) {
> + if (atomic_read(&entry->e_refcnt) > 2) {
> + atomic_inc(&entry->e_refcnt);
> + hlist_bl_unlock(head);
> + return entry;
> + }
> + /* We keep hash list reference to keep entry alive */
> + hlist_bl_del_init(&entry->e_hash_list);
> + hlist_bl_unlock(head);
> + spin_lock(&cache->c_list_lock);
> + if (!list_empty(&entry->e_list)) {
> + list_del_init(&entry->e_list);
> + if (!WARN_ONCE(cache->c_entry_count == 0,
> + "mbcache: attempt to decrement c_entry_count past zero"))
> + cache->c_entry_count--;
> + atomic_dec(&entry->e_refcnt);
> + }
> + spin_unlock(&cache->c_list_lock);
> + mb_cache_entry_put(cache, entry);
> + return NULL;
> + }
> + }
> + hlist_bl_unlock(head);
> +
> + return NULL;
> +}
> +EXPORT_SYMBOL(mb_cache_entry_try_delete);
> +
> /* mb_cache_entry_touch - cache entry got used
> * @cache - cache the entry belongs to
> * @entry - entry that got used
> diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
> index 20f1e3ff6013..1176fdfb8d53 100644
> --- a/include/linux/mbcache.h
> +++ b/include/linux/mbcache.h
> @@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache);
> int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
> u64 value, bool reusable);
> void __mb_cache_entry_free(struct mb_cache_entry *entry);
> +void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
> static inline int mb_cache_entry_put(struct mb_cache *cache,
> struct mb_cache_entry *entry)
> {
> - if (!atomic_dec_and_test(&entry->e_refcnt))
> + unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
> +
> + if (cnt > 0) {
> + if (cnt <= 3)
> + wake_up_var(&entry->e_refcnt);
> return 0;
> + }
> __mb_cache_entry_free(entry);
> return 1;
> }
>
> +struct mb_cache_entry *mb_cache_entry_try_delete(struct mb_cache *cache,
> + u32 key, u64 value);
> void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
> struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
> u64 value);
> --
> 2.35.3
>
On 22/06/14 06:05PM, Jan Kara wrote:
> Currently we remove EA inode from mbcache as soon as its xattr refcount
> drops to zero. However there can be pending attempts to reuse the inode
> and thus refcount handling code has to handle the situation when
> refcount increases from zero anyway. So save some work and just keep EA
> inode in mbcache until it is getting evicted. At that moment we are sure
> following iget() of EA inode will fail anyway (or wait for eviction to
> finish and load things from the disk again) and so removing mbcache
> entry at that moment is fine and simplifies the code a bit.
>
> CC: [email protected]
> Fixes: 82939d7999df ("ext4: convert to mbcache2")
> Signed-off-by: Jan Kara <[email protected]>
> ---
> fs/ext4/inode.c | 2 ++
> fs/ext4/xattr.c | 24 ++++++++----------------
> fs/ext4/xattr.h | 1 +
> 3 files changed, 11 insertions(+), 16 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 3dce7d058985..7450ee734262 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -177,6 +177,8 @@ void ext4_evict_inode(struct inode *inode)
>
> trace_ext4_evict_inode(inode);
>
> + if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
> + ext4_evict_ea_inode(inode);
> if (inode->i_nlink) {
> /*
> * When journalling data dirty buffers are tracked only in the
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 042325349098..7fc40fb1e6b3 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -436,6 +436,14 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
> return err;
> }
>
> +/* Remove entry from mbcache when EA inode is getting evicted */
> +void ext4_evict_ea_inode(struct inode *inode)
> +{
> + if (EA_INODE_CACHE(inode))
> + mb_cache_entry_delete(EA_INODE_CACHE(inode),
> + ext4_xattr_inode_get_hash(inode), inode->i_ino);
> +}
> +
> static int
> ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
> struct ext4_xattr_entry *entry, void *buffer,
> @@ -976,10 +984,8 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
> static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
> int ref_change)
> {
> - struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
> struct ext4_iloc iloc;
> s64 ref_count;
> - u32 hash;
> int ret;
>
> inode_lock(ea_inode);
> @@ -1002,14 +1008,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
>
> set_nlink(ea_inode, 1);
> ext4_orphan_del(handle, ea_inode);
> -
> - if (ea_inode_cache) {
> - hash = ext4_xattr_inode_get_hash(ea_inode);
> - mb_cache_entry_create(ea_inode_cache,
> - GFP_NOFS, hash,
> - ea_inode->i_ino,
> - true /* reusable */);
> - }
Ok, so if I understand this correctly, since we are not immediately removing the
ea_inode_cache entry when the recount reaches 0, hence when the refcount is
reaches 1 from 0, we need not create mb_cache entry is it?
Is this since the entry never got deleted in the first place?
But what happens when the entry is created the very first time?
I might need to study xattr code to understand how this condition is taken care.
-ritesh
> }
> } else {
> WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
> @@ -1022,12 +1020,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
>
> clear_nlink(ea_inode);
> ext4_orphan_add(handle, ea_inode);
> -
> - if (ea_inode_cache) {
> - hash = ext4_xattr_inode_get_hash(ea_inode);
> - mb_cache_entry_delete(ea_inode_cache, hash,
> - ea_inode->i_ino);
> - }
> }
> }
>
> diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
> index 77efb9a627ad..060b7a2f485c 100644
> --- a/fs/ext4/xattr.h
> +++ b/fs/ext4/xattr.h
> @@ -178,6 +178,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
>
> extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
> struct ext4_inode *raw_inode, handle_t *handle);
> +extern void ext4_evict_ea_inode(struct inode *inode);
>
> extern const struct xattr_handler *ext4_xattr_handlers[];
>
> --
> 2.35.3
>
On Thu 16-06-22 20:17:22, Ritesh Harjani wrote:
> On 22/06/14 06:05PM, Jan Kara wrote:
> > Add function mb_cache_entry_try_delete() to delete mbcache entry if it
> > is unused and also add a function to wait for entry to become unused -
> > mb_cache_entry_wait_unused(). We do not share code between the two
> > deleting function as one of them will go away soon.
> >
> > CC: [email protected]
> > Fixes: 82939d7999df ("ext4: convert to mbcache2")
> > Signed-off-by: Jan Kara <[email protected]>
...
> > +/* mb_cache_entry_try_delete - try to remove a cache entry
> > + * @cache - cache we work with
> > + * @key - key
> > + * @value - value
> > + *
> > + * Remove entry from cache @cache with key @key and value @value. The removal
> > + * happens only if the entry is unused. The function returns NULL in case the
> > + * entry was successfully removed or there's no entry in cache. Otherwise the
> > + * function returns the entry that we failed to delete because it has users.
>
> "...Also increment it's refcnt in case if the entry is returned. So the
> caller is responsible to call for mb_cache_entry_put() later."
Definitely, I'll expand the comment.
> Do you think comment should be added too? And the api naming should be
> mb_cache_entry_try_delete_or_get().
>
> Looks like e_refcnt increment is done quitely in case of the entry is found
> where as the api just says mb_cache_entry_try_delete().
It's a bit long name but I agree it describes the function better. OK,
let's rename.
> Other than that, all other code logic looks right to me.
Thanks for review!
Honza
> > + */
> > +struct mb_cache_entry *mb_cache_entry_try_delete(struct mb_cache *cache,
> > + u32 key, u64 value)
> > +{
> > + struct hlist_bl_node *node;
> > + struct hlist_bl_head *head;
> > + struct mb_cache_entry *entry;
> > +
> > + head = mb_cache_entry_head(cache, key);
> > + hlist_bl_lock(head);
> > + hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
> > + if (entry->e_key == key && entry->e_value == value) {
> > + if (atomic_read(&entry->e_refcnt) > 2) {
> > + atomic_inc(&entry->e_refcnt);
> > + hlist_bl_unlock(head);
> > + return entry;
> > + }
> > + /* We keep hash list reference to keep entry alive */
> > + hlist_bl_del_init(&entry->e_hash_list);
> > + hlist_bl_unlock(head);
> > + spin_lock(&cache->c_list_lock);
> > + if (!list_empty(&entry->e_list)) {
> > + list_del_init(&entry->e_list);
> > + if (!WARN_ONCE(cache->c_entry_count == 0,
> > + "mbcache: attempt to decrement c_entry_count past zero"))
> > + cache->c_entry_count--;
> > + atomic_dec(&entry->e_refcnt);
> > + }
> > + spin_unlock(&cache->c_list_lock);
> > + mb_cache_entry_put(cache, entry);
> > + return NULL;
> > + }
> > + }
> > + hlist_bl_unlock(head);
> > +
> > + return NULL;
> > +}
> > +EXPORT_SYMBOL(mb_cache_entry_try_delete);
> > +
> > /* mb_cache_entry_touch - cache entry got used
> > * @cache - cache the entry belongs to
> > * @entry - entry that got used
> > diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
> > index 20f1e3ff6013..1176fdfb8d53 100644
> > --- a/include/linux/mbcache.h
> > +++ b/include/linux/mbcache.h
> > @@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache);
> > int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
> > u64 value, bool reusable);
> > void __mb_cache_entry_free(struct mb_cache_entry *entry);
> > +void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
> > static inline int mb_cache_entry_put(struct mb_cache *cache,
> > struct mb_cache_entry *entry)
> > {
> > - if (!atomic_dec_and_test(&entry->e_refcnt))
> > + unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
> > +
> > + if (cnt > 0) {
> > + if (cnt <= 3)
> > + wake_up_var(&entry->e_refcnt);
> > return 0;
> > + }
> > __mb_cache_entry_free(entry);
> > return 1;
> > }
> >
> > +struct mb_cache_entry *mb_cache_entry_try_delete(struct mb_cache *cache,
> > + u32 key, u64 value);
> > void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
> > struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
> > u64 value);
> > --
> > 2.35.3
> >
--
Jan Kara <[email protected]>
SUSE Labs, CR
On Thu 16-06-22 20:31:18, Ritesh Harjani wrote:
> On 22/06/14 06:05PM, Jan Kara wrote:
> > Currently we remove EA inode from mbcache as soon as its xattr refcount
> > drops to zero. However there can be pending attempts to reuse the inode
> > and thus refcount handling code has to handle the situation when
> > refcount increases from zero anyway. So save some work and just keep EA
> > inode in mbcache until it is getting evicted. At that moment we are sure
> > following iget() of EA inode will fail anyway (or wait for eviction to
> > finish and load things from the disk again) and so removing mbcache
> > entry at that moment is fine and simplifies the code a bit.
> >
> > CC: [email protected]
> > Fixes: 82939d7999df ("ext4: convert to mbcache2")
> > Signed-off-by: Jan Kara <[email protected]>
> > ---
> > fs/ext4/inode.c | 2 ++
> > fs/ext4/xattr.c | 24 ++++++++----------------
> > fs/ext4/xattr.h | 1 +
> > 3 files changed, 11 insertions(+), 16 deletions(-)
> >
> > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> > index 3dce7d058985..7450ee734262 100644
> > --- a/fs/ext4/inode.c
> > +++ b/fs/ext4/inode.c
> > @@ -177,6 +177,8 @@ void ext4_evict_inode(struct inode *inode)
> >
> > trace_ext4_evict_inode(inode);
> >
> > + if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
> > + ext4_evict_ea_inode(inode);
> > if (inode->i_nlink) {
> > /*
> > * When journalling data dirty buffers are tracked only in the
> > diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> > index 042325349098..7fc40fb1e6b3 100644
> > --- a/fs/ext4/xattr.c
> > +++ b/fs/ext4/xattr.c
> > @@ -436,6 +436,14 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
> > return err;
> > }
> >
> > +/* Remove entry from mbcache when EA inode is getting evicted */
> > +void ext4_evict_ea_inode(struct inode *inode)
> > +{
> > + if (EA_INODE_CACHE(inode))
> > + mb_cache_entry_delete(EA_INODE_CACHE(inode),
> > + ext4_xattr_inode_get_hash(inode), inode->i_ino);
> > +}
> > +
> > static int
> > ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
> > struct ext4_xattr_entry *entry, void *buffer,
> > @@ -976,10 +984,8 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
> > static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
> > int ref_change)
> > {
> > - struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
> > struct ext4_iloc iloc;
> > s64 ref_count;
> > - u32 hash;
> > int ret;
> >
> > inode_lock(ea_inode);
> > @@ -1002,14 +1008,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
> >
> > set_nlink(ea_inode, 1);
> > ext4_orphan_del(handle, ea_inode);
> > -
> > - if (ea_inode_cache) {
> > - hash = ext4_xattr_inode_get_hash(ea_inode);
> > - mb_cache_entry_create(ea_inode_cache,
> > - GFP_NOFS, hash,
> > - ea_inode->i_ino,
> > - true /* reusable */);
> > - }
>
> Ok, so if I understand this correctly, since we are not immediately removing the
> ea_inode_cache entry when the recount reaches 0, hence when the refcount is
> reaches 1 from 0, we need not create mb_cache entry is it?
> Is this since the entry never got deleted in the first place?
Correct.
> But what happens when the entry is created the very first time?
> I might need to study xattr code to understand how this condition is
> taken care.
There are other places that take care of creating the entry in that case.
E.g. ext4_xattr_inode_get() or ext4_xattr_inode_lookup_create().
Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR