When an afs_volume struct is put, its refcount is reduced to 0 before the
cell->volume_lock is taken and the volume removed from the cell->volumes
tree. Unfortunately, this means that the lookup code can race and see a
volume with a zero ref in the tree, resulting in a use-after-free:
refcount_t: addition on 0; use-after-free.
WARNING: CPU: 3 PID: 130782 at lib/refcount.c:25 refcount_warn_saturate+0x7a/0xda
...
RIP: 0010:refcount_warn_saturate+0x7a/0xda
...
Call Trace:
<TASK>
? __warn+0x8b/0xf5
? report_bug+0xbf/0x11b
? refcount_warn_saturate+0x7a/0xda
? handle_bug+0x3c/0x5b
? exc_invalid_op+0x13/0x59
? asm_exc_invalid_op+0x16/0x20
? refcount_warn_saturate+0x7a/0xda
? refcount_warn_saturate+0x7a/0xda
afs_get_volume+0x3d/0x55
afs_create_volume+0x126/0x1de
afs_validate_fc+0xfe/0x130
afs_get_tree+0x20/0x2e5
vfs_get_tree+0x1d/0xc9
do_new_mount+0x13b/0x22e
do_mount+0x5d/0x8a
__do_sys_mount+0x100/0x12a
do_syscall_64+0x3a/0x94
entry_SYSCALL_64_after_hwframe+0x62/0x6a
Fix this by:
(1) When putting, use a flag to indicate if the volume has been removed
from the tree and skip the rb_erase if it has.
(2) When looking up, use a conditional ref increment and if it fails
because the refcount is 0, replace the node in the tree and set the
removal flag.
Fixes: 20325960f875 ("afs: Reorganise volume and server trees to be rooted on the cell")
Signed-off-by: David Howells <[email protected]>
cc: Marc Dionne <[email protected]>
cc: [email protected]
---
fs/afs/internal.h | 2 ++
fs/afs/volume.c | 26 +++++++++++++++++++++++---
2 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index a812952be1c9..7385d62c8cf5 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -586,6 +586,7 @@ struct afs_volume {
#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */
#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */
#define AFS_VOLUME_MAYBE_NO_IBULK 6 /* - T if some servers don't have InlineBulkStatus */
+#define AFS_VOLUME_RM_TREE 7 /* - Set if volume removed from cell->volumes */
#ifdef CONFIG_AFS_FSCACHE
struct fscache_volume *cache; /* Caching cookie */
#endif
@@ -1513,6 +1514,7 @@ extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
extern int afs_activate_volume(struct afs_volume *);
extern void afs_deactivate_volume(struct afs_volume *);
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason);
extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace);
extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum afs_volume_trace);
extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 29d483c80281..115c081a8e2c 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -32,8 +32,13 @@ static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell,
} else if (p->vid > volume->vid) {
pp = &(*pp)->rb_right;
} else {
- volume = afs_get_volume(p, afs_volume_trace_get_cell_insert);
- goto found;
+ if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) {
+ volume = p;
+ goto found;
+ }
+
+ set_bit(AFS_VOLUME_RM_TREE, &volume->flags);
+ rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes);
}
}
@@ -56,7 +61,8 @@ static void afs_remove_volume_from_cell(struct afs_volume *volume)
afs_volume_trace_remove);
write_seqlock(&cell->volume_lock);
hlist_del_rcu(&volume->proc_link);
- rb_erase(&volume->cell_node, &cell->volumes);
+ if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags))
+ rb_erase(&volume->cell_node, &cell->volumes);
write_sequnlock(&cell->volume_lock);
}
}
@@ -231,6 +237,20 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
_leave(" [destroyed]");
}
+/*
+ * Try to get a reference on a volume record.
+ */
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason)
+{
+ int r;
+
+ if (__refcount_inc_not_zero(&volume->ref, &r)) {
+ trace_afs_volume(volume->vid, r + 1, reason);
+ return true;
+ }
+ return false;
+}
+
/*
* Get a reference on a volume record.
*/
On 12/13/2023 5:25 AM, David Howells wrote:
> When an afs_volume struct is put, its refcount is reduced to 0 before the
> cell->volume_lock is taken and the volume removed from the cell->volumes
> tree. Unfortunately, this means that the lookup code can race and see a
> volume with a zero ref in the tree, resulting in a use-after-free:
>
> refcount_t: addition on 0; use-after-free.
> WARNING: CPU: 3 PID: 130782 at lib/refcount.c:25 refcount_warn_saturate+0x7a/0xda
> ...
> RIP: 0010:refcount_warn_saturate+0x7a/0xda
> ...
> Call Trace:
> <TASK>
> ? __warn+0x8b/0xf5
> ? report_bug+0xbf/0x11b
> ? refcount_warn_saturate+0x7a/0xda
> ? handle_bug+0x3c/0x5b
> ? exc_invalid_op+0x13/0x59
> ? asm_exc_invalid_op+0x16/0x20
> ? refcount_warn_saturate+0x7a/0xda
> ? refcount_warn_saturate+0x7a/0xda
> afs_get_volume+0x3d/0x55
> afs_create_volume+0x126/0x1de
> afs_validate_fc+0xfe/0x130
> afs_get_tree+0x20/0x2e5
> vfs_get_tree+0x1d/0xc9
> do_new_mount+0x13b/0x22e
> do_mount+0x5d/0x8a
> __do_sys_mount+0x100/0x12a
> do_syscall_64+0x3a/0x94
> entry_SYSCALL_64_after_hwframe+0x62/0x6a
>
> Fix this by:
>
> (1) When putting, use a flag to indicate if the volume has been removed
> from the tree and skip the rb_erase if it has.
>
> (2) When looking up, use a conditional ref increment and if it fails
> because the refcount is 0, replace the node in the tree and set the
> removal flag.
>
> Fixes: 20325960f875 ("afs: Reorganise volume and server trees to be rooted on the cell")
> Signed-off-by: David Howells <[email protected]>
> cc: Marc Dionne <[email protected]>
> cc: [email protected]
> ---
> fs/afs/internal.h | 2 ++
> fs/afs/volume.c | 26 +++++++++++++++++++++++---
> 2 files changed, 25 insertions(+), 3 deletions(-)
>
> diff --git a/fs/afs/internal.h b/fs/afs/internal.h
> index a812952be1c9..7385d62c8cf5 100644
> --- a/fs/afs/internal.h
> +++ b/fs/afs/internal.h
> @@ -586,6 +586,7 @@ struct afs_volume {
> #define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */
> #define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */
> #define AFS_VOLUME_MAYBE_NO_IBULK 6 /* - T if some servers don't have InlineBulkStatus */
> +#define AFS_VOLUME_RM_TREE 7 /* - Set if volume removed from cell->volumes */
> #ifdef CONFIG_AFS_FSCACHE
> struct fscache_volume *cache; /* Caching cookie */
> #endif
> @@ -1513,6 +1514,7 @@ extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
> extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
> extern int afs_activate_volume(struct afs_volume *);
> extern void afs_deactivate_volume(struct afs_volume *);
> +bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason);
> extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace);
> extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum afs_volume_trace);
> extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
> diff --git a/fs/afs/volume.c b/fs/afs/volume.c
> index 29d483c80281..115c081a8e2c 100644
> --- a/fs/afs/volume.c
> +++ b/fs/afs/volume.c
> @@ -32,8 +32,13 @@ static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell,
> } else if (p->vid > volume->vid) {
> pp = &(*pp)->rb_right;
> } else {
> - volume = afs_get_volume(p, afs_volume_trace_get_cell_insert);
> - goto found;
> + if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) {
> + volume = p;
> + goto found;
> + }
> +
> + set_bit(AFS_VOLUME_RM_TREE, &volume->flags);
> + rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes);
> }
> }
>
> @@ -56,7 +61,8 @@ static void afs_remove_volume_from_cell(struct afs_volume *volume)
> afs_volume_trace_remove);
> write_seqlock(&cell->volume_lock);
> hlist_del_rcu(&volume->proc_link);
> - rb_erase(&volume->cell_node, &cell->volumes);
> + if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags))
> + rb_erase(&volume->cell_node, &cell->volumes);
> write_sequnlock(&cell->volume_lock);
> }
> }
> @@ -231,6 +237,20 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
> _leave(" [destroyed]");
> }
>
> +/*
> + * Try to get a reference on a volume record.
> + */
> +bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason)
> +{
> + int r;
> +
> + if (__refcount_inc_not_zero(&volume->ref, &r)) {
> + trace_afs_volume(volume->vid, r + 1, reason);
> + return true;
> + }
> + return false;
> +}
> +
> /*
> * Get a reference on a volume record.
> */
>
Reviewed-by: Jeffrey Altman <[email protected]>