From: Baokun Li <[email protected]>
Hi all!
There are some fixes for some cachefiles generic processes. We found these
issues when testing the on-demand mode, but the non-on-demand mode is also
involved. The following is a brief overview of the patches, see the patches
for more details.
Patch 1-2: Add fscache_try_get_volume() helper function to avoid
fscache_volume use-after-free on cache withdrawal.
Patch 3: Fix cachefiles_lookup_cookie() and cachefiles_withdraw_cache()
concurrency causing cachefiles_volume use-after-free.
Patch 4-5: Propagate error codes returned by vfs_getxattr() to avoid
endless loops.
Comments and questions are, as always, welcome.
Thanks,
Baokun
Baokun Li (5):
netfs, fscache: export fscache_put_volume() and add
fscache_try_get_volume()
cachefiles: fix slab-use-after-free in fscache_withdraw_volume()
cachefiles: fix slab-use-after-free in cachefiles_withdraw_cookie()
cachefiles: correct the return value of
cachefiles_check_volume_xattr()
cachefiles: correct the return value of cachefiles_check_auxdata()
fs/cachefiles/cache.c | 45 +++++++++++++++++++++++++++++++++-
fs/cachefiles/volume.c | 1 -
fs/cachefiles/xattr.c | 5 +++-
fs/netfs/fscache_volume.c | 14 +++++++++++
fs/netfs/internal.h | 2 --
include/linux/fscache-cache.h | 6 +++++
include/trace/events/fscache.h | 4 +++
7 files changed, 72 insertions(+), 5 deletions(-)
--
2.39.2
From: Baokun Li <[email protected]>
We got the following issue in our fault injection stress test:
==================================================================
BUG: KASAN: slab-use-after-free in fscache_withdraw_volume+0x2e1/0x370
Read of size 4 at addr ffff88810680be08 by task ondemand-04-dae/5798
CPU: 0 PID: 5798 Comm: ondemand-04-dae Not tainted 6.8.0-dirty #565
Call Trace:
kasan_check_range+0xf6/0x1b0
fscache_withdraw_volume+0x2e1/0x370
cachefiles_withdraw_volume+0x31/0x50
cachefiles_withdraw_cache+0x3ad/0x900
cachefiles_put_unbind_pincount+0x1f6/0x250
cachefiles_daemon_release+0x13b/0x290
__fput+0x204/0xa00
task_work_run+0x139/0x230
Allocated by task 5820:
__kmalloc+0x1df/0x4b0
fscache_alloc_volume+0x70/0x600
__fscache_acquire_volume+0x1c/0x610
erofs_fscache_register_volume+0x96/0x1a0
erofs_fscache_register_fs+0x49a/0x690
erofs_fc_fill_super+0x6c0/0xcc0
vfs_get_super+0xa9/0x140
vfs_get_tree+0x8e/0x300
do_new_mount+0x28c/0x580
[...]
Freed by task 5820:
kfree+0xf1/0x2c0
fscache_put_volume.part.0+0x5cb/0x9e0
erofs_fscache_unregister_fs+0x157/0x1b0
erofs_kill_sb+0xd9/0x1c0
deactivate_locked_super+0xa3/0x100
vfs_get_super+0x105/0x140
vfs_get_tree+0x8e/0x300
do_new_mount+0x28c/0x580
[...]
==================================================================
Following is the process that triggers the issue:
mount failed | daemon exit
------------------------------------------------------------
deactivate_locked_super cachefiles_daemon_release
erofs_kill_sb
erofs_fscache_unregister_fs
fscache_relinquish_volume
__fscache_relinquish_volume
fscache_put_volume(fscache_volume, fscache_volume_put_relinquish)
zero = __refcount_dec_and_test(&fscache_volume->ref, &ref);
cachefiles_put_unbind_pincount
cachefiles_daemon_unbind
cachefiles_withdraw_cache
cachefiles_withdraw_volumes
list_del_init(&volume->cache_link)
fscache_free_volume(fscache_volume)
cache->ops->free_volume
cachefiles_free_volume
list_del_init(&cachefiles_volume->cache_link);
kfree(fscache_volume)
cachefiles_withdraw_volume
fscache_withdraw_volume
fscache_volume->n_accesses
// fscache_volume UAF !!!
The fscache_volume in cache->volumes must not have been freed yet, but its
reference count may be 0. So use the new fscache_try_get_volume() helper
function try to get its reference count.
If the reference count of fscache_volume is 0, fscache_put_volume() is
freeing it, so wait for it to be removed from cache->volumes.
If its reference count is not 0, call cachefiles_withdraw_volume() with
reference count protection to avoid the above issue.
Fixes: fe2140e2f57f ("cachefiles: Implement volume support")
Signed-off-by: Baokun Li <[email protected]>
---
fs/cachefiles/cache.c | 10 ++++++++++
include/trace/events/fscache.h | 4 ++++
2 files changed, 14 insertions(+)
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index f449f7340aad..56ef519a36a0 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/statfs.h>
#include <linux/namei.h>
+#include <trace/events/fscache.h>
#include "internal.h"
/*
@@ -319,12 +320,20 @@ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
_enter("");
for (;;) {
+ struct fscache_volume *vcookie = NULL;
struct cachefiles_volume *volume = NULL;
spin_lock(&cache->object_list_lock);
if (!list_empty(&cache->volumes)) {
volume = list_first_entry(&cache->volumes,
struct cachefiles_volume, cache_link);
+ vcookie = fscache_try_get_volume(volume->vcookie,
+ fscache_volume_get_withdraw);
+ if (!vcookie) {
+ spin_unlock(&cache->object_list_lock);
+ cpu_relax();
+ continue;
+ }
list_del_init(&volume->cache_link);
}
spin_unlock(&cache->object_list_lock);
@@ -332,6 +341,7 @@ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
break;
cachefiles_withdraw_volume(volume);
+ fscache_put_volume(vcookie, fscache_volume_put_withdraw);
}
_leave("");
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index a6190aa1b406..f1a73aa83fbb 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -35,12 +35,14 @@ enum fscache_volume_trace {
fscache_volume_get_cookie,
fscache_volume_get_create_work,
fscache_volume_get_hash_collision,
+ fscache_volume_get_withdraw,
fscache_volume_free,
fscache_volume_new_acquire,
fscache_volume_put_cookie,
fscache_volume_put_create_work,
fscache_volume_put_hash_collision,
fscache_volume_put_relinquish,
+ fscache_volume_put_withdraw,
fscache_volume_see_create_work,
fscache_volume_see_hash_wake,
fscache_volume_wait_create_work,
@@ -120,12 +122,14 @@ enum fscache_access_trace {
EM(fscache_volume_get_cookie, "GET cook ") \
EM(fscache_volume_get_create_work, "GET creat") \
EM(fscache_volume_get_hash_collision, "GET hcoll") \
+ EM(fscache_volume_get_withdraw, "GET withd") \
EM(fscache_volume_free, "FREE ") \
EM(fscache_volume_new_acquire, "NEW acq ") \
EM(fscache_volume_put_cookie, "PUT cook ") \
EM(fscache_volume_put_create_work, "PUT creat") \
EM(fscache_volume_put_hash_collision, "PUT hcoll") \
EM(fscache_volume_put_relinquish, "PUT relnq") \
+ EM(fscache_volume_put_withdraw, "PUT withd") \
EM(fscache_volume_see_create_work, "SEE creat") \
EM(fscache_volume_see_hash_wake, "SEE hwake") \
E_(fscache_volume_wait_create_work, "WAIT crea")
--
2.39.2
From: Baokun Li <[email protected]>
We got the following issue in our fault injection stress test:
==================================================================
BUG: KASAN: slab-use-after-free in cachefiles_withdraw_cookie+0x4d9/0x600
Read of size 8 at addr ffff888118efc000 by task kworker/u78:0/109
CPU: 13 PID: 109 Comm: kworker/u78:0 Not tainted 6.8.0-dirty #566
Call Trace:
<TASK>
kasan_report+0x93/0xc0
cachefiles_withdraw_cookie+0x4d9/0x600
fscache_cookie_state_machine+0x5c8/0x1230
fscache_cookie_worker+0x91/0x1c0
process_one_work+0x7fa/0x1800
[...]
Allocated by task 117:
kmalloc_trace+0x1b3/0x3c0
cachefiles_acquire_volume+0xf3/0x9c0
fscache_create_volume_work+0x97/0x150
process_one_work+0x7fa/0x1800
[...]
Freed by task 120301:
kfree+0xf1/0x2c0
cachefiles_withdraw_cache+0x3fa/0x920
cachefiles_put_unbind_pincount+0x1f6/0x250
cachefiles_daemon_release+0x13b/0x290
__fput+0x204/0xa00
task_work_run+0x139/0x230
do_exit+0x87a/0x29b0
[...]
==================================================================
Following is the process that triggers the issue:
p1 | p2
------------------------------------------------------------
fscache_begin_lookup
fscache_begin_volume_access
fscache_cache_is_live(fscache_cache)
cachefiles_daemon_release
cachefiles_put_unbind_pincount
cachefiles_daemon_unbind
cachefiles_withdraw_cache
fscache_withdraw_cache
fscache_set_cache_state(cache, FSCACHE_CACHE_IS_WITHDRAWN);
cachefiles_withdraw_objects(cache)
fscache_wait_for_objects(fscache)
atomic_read(&fscache_cache->object_count) == 0
fscache_perform_lookup
cachefiles_lookup_cookie
cachefiles_alloc_object
refcount_set(&object->ref, 1);
object->volume = volume
fscache_count_object(vcookie->cache);
atomic_inc(&fscache_cache->object_count)
cachefiles_withdraw_volumes
cachefiles_withdraw_volume
fscache_withdraw_volume
__cachefiles_free_volume
kfree(cachefiles_volume)
fscache_cookie_state_machine
cachefiles_withdraw_cookie
cache = object->volume->cache;
// cachefiles_volume UAF !!!
After setting FSCACHE_CACHE_IS_WITHDRAWN, wait for all the cookie lookups
to complete first, and then wait for fscache_cache->object_count == 0 to
avoid the cookie exiting after the volume has been freed and triggering
the above issue. Therefore call fscache_withdraw_volume() before calling
cachefiles_withdraw_objects().
This way, after setting FSCACHE_CACHE_IS_WITHDRAWN, only the following two
cases will occur:
1) fscache_begin_lookup fails in fscache_begin_volume_access().
2) fscache_withdraw_volume() will ensure that fscache_count_object() has
been executed before calling fscache_wait_for_objects().
Fixes: fe2140e2f57f ("cachefiles: Implement volume support")
Suggested-by: Hou Tao <[email protected]>
Signed-off-by: Baokun Li <[email protected]>
---
fs/cachefiles/cache.c | 35 ++++++++++++++++++++++++++++++++++-
fs/cachefiles/volume.c | 1 -
2 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index 56ef519a36a0..9fb06dc16520 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -313,7 +313,39 @@ static void cachefiles_withdraw_objects(struct cachefiles_cache *cache)
}
/*
- * Withdraw volumes.
+ * Withdraw fscache volumes.
+ */
+static void cachefiles_withdraw_fscache_volumes(struct cachefiles_cache *cache)
+{
+ struct list_head *cur;
+ struct cachefiles_volume *volume;
+ struct fscache_volume *vcookie;
+
+ _enter("");
+retry:
+ spin_lock(&cache->object_list_lock);
+ list_for_each(cur, &cache->volumes) {
+ volume = list_entry(cur, struct cachefiles_volume, cache_link);
+
+ if (atomic_read(&volume->vcookie->n_accesses) == 0)
+ continue;
+
+ vcookie = fscache_try_get_volume(volume->vcookie,
+ fscache_volume_get_withdraw);
+ if (vcookie) {
+ spin_unlock(&cache->object_list_lock);
+ fscache_withdraw_volume(vcookie);
+ fscache_put_volume(vcookie, fscache_volume_put_withdraw);
+ goto retry;
+ }
+ }
+ spin_unlock(&cache->object_list_lock);
+
+ _leave("");
+}
+
+/*
+ * Withdraw cachefiles volumes.
*/
static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
{
@@ -381,6 +413,7 @@ void cachefiles_withdraw_cache(struct cachefiles_cache *cache)
pr_info("File cache on %s unregistering\n", fscache->name);
fscache_withdraw_cache(fscache);
+ cachefiles_withdraw_fscache_volumes(cache);
/* we now have to destroy all the active objects pertaining to this
* cache - which we do by passing them off to thread pool to be
diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c
index 89df0ba8ba5e..781aac4ef274 100644
--- a/fs/cachefiles/volume.c
+++ b/fs/cachefiles/volume.c
@@ -133,7 +133,6 @@ void cachefiles_free_volume(struct fscache_volume *vcookie)
void cachefiles_withdraw_volume(struct cachefiles_volume *volume)
{
- fscache_withdraw_volume(volume->vcookie);
cachefiles_set_volume_xattr(volume);
__cachefiles_free_volume(volume);
}
--
2.39.2
From: Baokun Li <[email protected]>
In cachefiles_check_volume_xattr(), the error returned by vfs_getxattr is
not passed to ret, so it ends up returning -ESTALE, which leads to an
endless loop as follows:
cachefiles_acquire_volume
retry:
cachefiles_check_volume_xattr
// return -ESTALE
cachefiles_bury_object
// EIO causes rename failure
goto retry;
So pass the error code to ret when xlen < 0 to avoid the above problem.
Fixes: 32e150037dce ("fscache, cachefiles: Store the volume coherency data")
Signed-off-by: Baokun Li <[email protected]>
---
fs/cachefiles/xattr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index bcb6173943ee..20e4a4391090 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -252,6 +252,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len);
if (xlen != len) {
if (xlen < 0) {
+ ret = xlen;
trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen,
cachefiles_trace_getxattr_error);
if (xlen == -EIO)
--
2.39.2
From: Baokun Li <[email protected]>
Pass the error code to ret when xlen < 0 to avoid misleading the caller.
Fixes: 72b957856b0c ("cachefiles: Implement metadata/coherency data storage in xattrs")
Signed-off-by: Baokun Li <[email protected]>
---
fs/cachefiles/xattr.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 20e4a4391090..4dd8a993c60a 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -110,9 +110,11 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file
if (xlen == 0)
xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen);
if (xlen != tlen) {
- if (xlen < 0)
+ if (xlen < 0) {
+ ret = xlen;
trace_cachefiles_vfs_error(object, file_inode(file), xlen,
cachefiles_trace_getxattr_error);
+ }
if (xlen == -EIO)
cachefiles_io_error_obj(
object,
--
2.39.2