2022-11-18 02:16:05

by Xiubo Li

[permalink] [raw]
Subject: [PATCH 0/2 v3] ceph: fix the use-after-free bug for file_lock

From: Xiubo Li <[email protected]>

Changed in V3:
- switched to vfs_inode_has_locks() helper to fix another ceph file lock
bug, thanks Jeff!
- this patch series is based on Jeff's previous VFS lock patch:
https://patchwork.kernel.org/project/ceph-devel/list/?series=695950

Changed in V2:
- switch to file_lock.fl_u to fix the race bug
- and the most code will be in the ceph layer


Xiubo Li (2):
ceph: switch to vfs_inode_has_locks() to fix file lock bug
ceph: add ceph_lock_info support for file_lock

fs/ceph/caps.c | 2 +-
fs/ceph/locks.c | 24 ++++++++++++++++++------
fs/ceph/super.h | 1 -
include/linux/ceph/ceph_fs_fl.h | 17 +++++++++++++++++
include/linux/fs.h | 2 ++
5 files changed, 38 insertions(+), 8 deletions(-)
create mode 100644 include/linux/ceph/ceph_fs_fl.h

--
2.31.1



2022-11-18 02:16:28

by Xiubo Li

[permalink] [raw]
Subject: [PATCH 1/2 v3] ceph: switch to vfs_inode_has_locks() to fix file lock bug

From: Xiubo Li <[email protected]>

For the POSIX locks they are using the same owner, which is the
thread id. And multiple POSIX locks could be merged into single one,
so when checking whether the 'file' has locks may fail.

For a file where some openers use locking and others don't is a
really odd usage pattern though. Locks are like stoplights -- they
only work if everyone pays attention to them.

Just switch ceph_get_caps() to check whether any locks are set on
the inode. If there are POSIX/OFD/FLOCK locks on the file at the
time, we should set CHECK_FILELOCK, regardless of what fd was used
to set the lock.

Cc: [email protected]
Cc: Jeff Layton <[email protected]>
Fixes: ff5d913dfc71 ("ceph: return -EIO if read/write against filp that lost file locks")
URL: https://tracker.ceph.com/issues/57986
Signed-off-by: Xiubo Li <[email protected]>
---
fs/ceph/caps.c | 2 +-
fs/ceph/locks.c | 4 ----
fs/ceph/super.h | 1 -
3 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 065e9311b607..948136f81fc8 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2964,7 +2964,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got

while (true) {
flags &= CEPH_FILE_MODE_MASK;
- if (atomic_read(&fi->num_locks))
+ if (vfs_inode_has_locks(inode))
flags |= CHECK_FILELOCK;
_got = 0;
ret = try_get_cap_refs(inode, need, want, endoff,
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 3e2843e86e27..b191426bf880 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -32,18 +32,14 @@ void __init ceph_flock_init(void)

static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
{
- struct ceph_file_info *fi = dst->fl_file->private_data;
struct inode *inode = file_inode(dst->fl_file);
atomic_inc(&ceph_inode(inode)->i_filelock_ref);
- atomic_inc(&fi->num_locks);
}

static void ceph_fl_release_lock(struct file_lock *fl)
{
- struct ceph_file_info *fi = fl->fl_file->private_data;
struct inode *inode = file_inode(fl->fl_file);
struct ceph_inode_info *ci = ceph_inode(inode);
- atomic_dec(&fi->num_locks);
if (atomic_dec_and_test(&ci->i_filelock_ref)) {
/* clear error when all locks are released */
spin_lock(&ci->i_ceph_lock);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7b75a84ba48d..87dc55c866e9 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -803,7 +803,6 @@ struct ceph_file_info {
struct list_head rw_contexts;

u32 filp_gen;
- atomic_t num_locks;
};

struct ceph_dir_file_info {
--
2.31.1


2022-11-18 02:23:11

by Xiubo Li

[permalink] [raw]
Subject: [PATCH 2/2 v3] ceph: add ceph_lock_info support for file_lock

From: Xiubo Li <[email protected]>

When ceph releasing the file_lock it will try to get the inode pointer
from the fl->fl_file, which the memory could already be released by
another thread in filp_close(). Because in VFS layer the fl->fl_file
doesn't increase the file's reference counter.

Will switch to use ceph dedicate lock info to track the inode.

And in ceph_fl_release_lock() we should skip all the operations if
the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
inserting it to the inode lock list, which is when copying the lock.

Cc: [email protected]
Cc: Jeff Layton <[email protected]>
URL: https://tracker.ceph.com/issues/57986
Signed-off-by: Xiubo Li <[email protected]>
---
fs/ceph/locks.c | 20 ++++++++++++++++++--
include/linux/ceph/ceph_fs_fl.h | 17 +++++++++++++++++
include/linux/fs.h | 2 ++
3 files changed, 37 insertions(+), 2 deletions(-)
create mode 100644 include/linux/ceph/ceph_fs_fl.h

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index b191426bf880..621f38f10a88 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -34,18 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
{
struct inode *inode = file_inode(dst->fl_file);
atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+ dst->fl_u.ceph_fl.fl_inode = igrab(inode);
}

+/*
+ * Do not use the 'fl->fl_file' in release function, which
+ * is possibly already released by another thread.
+ */
static void ceph_fl_release_lock(struct file_lock *fl)
{
- struct inode *inode = file_inode(fl->fl_file);
- struct ceph_inode_info *ci = ceph_inode(inode);
+ struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
+ struct ceph_inode_info *ci;
+
+ /*
+ * If inode is NULL it should be a request file_lock,
+ * nothing we can do.
+ */
+ if (!inode)
+ return;
+
+ ci = ceph_inode(inode);
if (atomic_dec_and_test(&ci->i_filelock_ref)) {
/* clear error when all locks are released */
spin_lock(&ci->i_ceph_lock);
ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
spin_unlock(&ci->i_ceph_lock);
}
+ fl->fl_u.ceph_fl.fl_inode = NULL;
+ iput(inode);
}

static const struct file_lock_operations ceph_fl_lock_ops = {
diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
new file mode 100644
index 000000000000..ad1cf96329f9
--- /dev/null
+++ b/include/linux/ceph/ceph_fs_fl.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ceph_fs_fl.h - Ceph lock info
+ *
+ * LGPL2
+ */
+
+#ifndef CEPH_FS_FL_H
+#define CEPH_FS_FL_H
+
+#include <linux/fs.h>
+
+struct ceph_lock_info {
+ struct inode *fl_inode;
+};
+
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d6cb42b7e91c..2b03d5e375d7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);

/* that will die - we need it for nfs_lock_info */
#include <linux/nfs_fs_i.h>
+#include <linux/ceph/ceph_fs_fl.h>

/*
* struct file_lock represents a generic "file lock". It's used to represent
@@ -1119,6 +1120,7 @@ struct file_lock {
int state; /* state of grant or error if -ve */
unsigned int debug_id;
} afs;
+ struct ceph_lock_info ceph_fl;
} fl_u;
} __randomize_layout;

--
2.31.1


2022-12-12 18:18:30

by Ilya Dryomov

[permalink] [raw]
Subject: Re: [PATCH 2/2 v3] ceph: add ceph_lock_info support for file_lock

On Fri, Nov 18, 2022 at 3:07 AM <[email protected]> wrote:
>
> From: Xiubo Li <[email protected]>
>
> When ceph releasing the file_lock it will try to get the inode pointer
> from the fl->fl_file, which the memory could already be released by
> another thread in filp_close(). Because in VFS layer the fl->fl_file
> doesn't increase the file's reference counter.
>
> Will switch to use ceph dedicate lock info to track the inode.
>
> And in ceph_fl_release_lock() we should skip all the operations if
> the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
> the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
> inserting it to the inode lock list, which is when copying the lock.
>
> Cc: [email protected]
> Cc: Jeff Layton <[email protected]>
> URL: https://tracker.ceph.com/issues/57986
> Signed-off-by: Xiubo Li <[email protected]>
> ---
> fs/ceph/locks.c | 20 ++++++++++++++++++--
> include/linux/ceph/ceph_fs_fl.h | 17 +++++++++++++++++
> include/linux/fs.h | 2 ++
> 3 files changed, 37 insertions(+), 2 deletions(-)
> create mode 100644 include/linux/ceph/ceph_fs_fl.h
>
> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> index b191426bf880..621f38f10a88 100644
> --- a/fs/ceph/locks.c
> +++ b/fs/ceph/locks.c
> @@ -34,18 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
> {
> struct inode *inode = file_inode(dst->fl_file);
> atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> + dst->fl_u.ceph_fl.fl_inode = igrab(inode);
> }
>
> +/*
> + * Do not use the 'fl->fl_file' in release function, which
> + * is possibly already released by another thread.
> + */
> static void ceph_fl_release_lock(struct file_lock *fl)
> {
> - struct inode *inode = file_inode(fl->fl_file);
> - struct ceph_inode_info *ci = ceph_inode(inode);
> + struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
> + struct ceph_inode_info *ci;
> +
> + /*
> + * If inode is NULL it should be a request file_lock,
> + * nothing we can do.
> + */
> + if (!inode)
> + return;
> +
> + ci = ceph_inode(inode);
> if (atomic_dec_and_test(&ci->i_filelock_ref)) {
> /* clear error when all locks are released */
> spin_lock(&ci->i_ceph_lock);
> ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
> spin_unlock(&ci->i_ceph_lock);
> }
> + fl->fl_u.ceph_fl.fl_inode = NULL;
> + iput(inode);
> }
>
> static const struct file_lock_operations ceph_fl_lock_ops = {
> diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
> new file mode 100644
> index 000000000000..ad1cf96329f9
> --- /dev/null
> +++ b/include/linux/ceph/ceph_fs_fl.h
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * ceph_fs_fl.h - Ceph lock info
> + *
> + * LGPL2
> + */
> +
> +#ifndef CEPH_FS_FL_H
> +#define CEPH_FS_FL_H
> +
> +#include <linux/fs.h>
> +
> +struct ceph_lock_info {
> + struct inode *fl_inode;
> +};
> +
> +#endif
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index d6cb42b7e91c..2b03d5e375d7 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
>
> /* that will die - we need it for nfs_lock_info */
> #include <linux/nfs_fs_i.h>
> +#include <linux/ceph/ceph_fs_fl.h>
>
> /*
> * struct file_lock represents a generic "file lock". It's used to represent
> @@ -1119,6 +1120,7 @@ struct file_lock {
> int state; /* state of grant or error if -ve */
> unsigned int debug_id;
> } afs;
> + struct ceph_lock_info ceph_fl;

Hi Xiubo and Jeff,

Xiubo, instead of defining struct ceph_lock_info and including
a CephFS-specific header file in linux/fs.h, I think we should repeat
what was done for AFS -- particularly given that ceph_lock_info ends up
being a dummy type that isn't mentioned anywhere else.

Jeff, could you please ack this with your file locking hat on?

Thanks,

Ilya

2022-12-12 18:21:46

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH 2/2 v3] ceph: add ceph_lock_info support for file_lock

On Mon, 2022-12-12 at 18:56 +0100, Ilya Dryomov wrote:
> On Fri, Nov 18, 2022 at 3:07 AM <[email protected]> wrote:
> >
> > From: Xiubo Li <[email protected]>
> >
> > When ceph releasing the file_lock it will try to get the inode pointer
> > from the fl->fl_file, which the memory could already be released by
> > another thread in filp_close(). Because in VFS layer the fl->fl_file
> > doesn't increase the file's reference counter.
> >
> > Will switch to use ceph dedicate lock info to track the inode.
> >
> > And in ceph_fl_release_lock() we should skip all the operations if
> > the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
> > the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
> > inserting it to the inode lock list, which is when copying the lock.
> >
> > Cc: [email protected]
> > Cc: Jeff Layton <[email protected]>
> > URL: https://tracker.ceph.com/issues/57986
> > Signed-off-by: Xiubo Li <[email protected]>
> > ---
> > fs/ceph/locks.c | 20 ++++++++++++++++++--
> > include/linux/ceph/ceph_fs_fl.h | 17 +++++++++++++++++
> > include/linux/fs.h | 2 ++
> > 3 files changed, 37 insertions(+), 2 deletions(-)
> > create mode 100644 include/linux/ceph/ceph_fs_fl.h
> >
> > diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> > index b191426bf880..621f38f10a88 100644
> > --- a/fs/ceph/locks.c
> > +++ b/fs/ceph/locks.c
> > @@ -34,18 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
> > {
> > struct inode *inode = file_inode(dst->fl_file);
> > atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> > + dst->fl_u.ceph_fl.fl_inode = igrab(inode);
> > }
> >
> > +/*
> > + * Do not use the 'fl->fl_file' in release function, which
> > + * is possibly already released by another thread.
> > + */
> > static void ceph_fl_release_lock(struct file_lock *fl)
> > {
> > - struct inode *inode = file_inode(fl->fl_file);
> > - struct ceph_inode_info *ci = ceph_inode(inode);
> > + struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
> > + struct ceph_inode_info *ci;
> > +
> > + /*
> > + * If inode is NULL it should be a request file_lock,
> > + * nothing we can do.
> > + */
> > + if (!inode)
> > + return;
> > +
> > + ci = ceph_inode(inode);
> > if (atomic_dec_and_test(&ci->i_filelock_ref)) {
> > /* clear error when all locks are released */
> > spin_lock(&ci->i_ceph_lock);
> > ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
> > spin_unlock(&ci->i_ceph_lock);
> > }
> > + fl->fl_u.ceph_fl.fl_inode = NULL;
> > + iput(inode);
> > }
> >
> > static const struct file_lock_operations ceph_fl_lock_ops = {
> > diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
> > new file mode 100644
> > index 000000000000..ad1cf96329f9
> > --- /dev/null
> > +++ b/include/linux/ceph/ceph_fs_fl.h
> > @@ -0,0 +1,17 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * ceph_fs_fl.h - Ceph lock info
> > + *
> > + * LGPL2
> > + */
> > +
> > +#ifndef CEPH_FS_FL_H
> > +#define CEPH_FS_FL_H
> > +
> > +#include <linux/fs.h>
> > +
> > +struct ceph_lock_info {
> > + struct inode *fl_inode;
> > +};
> > +
> > +#endif
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index d6cb42b7e91c..2b03d5e375d7 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
> >
> > /* that will die - we need it for nfs_lock_info */
> > #include <linux/nfs_fs_i.h>
> > +#include <linux/ceph/ceph_fs_fl.h>
> >
> > /*
> > * struct file_lock represents a generic "file lock". It's used to represent
> > @@ -1119,6 +1120,7 @@ struct file_lock {
> > int state; /* state of grant or error if -ve */
> > unsigned int debug_id;
> > } afs;
> > + struct ceph_lock_info ceph_fl;
>
> Hi Xiubo and Jeff,
>
> Xiubo, instead of defining struct ceph_lock_info and including
> a CephFS-specific header file in linux/fs.h, I think we should repeat
> what was done for AFS -- particularly given that ceph_lock_info ends up
> being a dummy type that isn't mentioned anywhere else.
>
> Jeff, could you please ack this with your file locking hat on?
>

ACK. I think that would be cleaner.

Thanks
--
Jeff Layton <[email protected]>

2022-12-13 01:36:14

by Xiubo Li

[permalink] [raw]
Subject: Re: [PATCH 2/2 v3] ceph: add ceph_lock_info support for file_lock


On 13/12/2022 02:02, Jeff Layton wrote:
> On Mon, 2022-12-12 at 18:56 +0100, Ilya Dryomov wrote:
>> On Fri, Nov 18, 2022 at 3:07 AM <[email protected]> wrote:
>>> From: Xiubo Li <[email protected]>
>>>
>>> When ceph releasing the file_lock it will try to get the inode pointer
>>> from the fl->fl_file, which the memory could already be released by
>>> another thread in filp_close(). Because in VFS layer the fl->fl_file
>>> doesn't increase the file's reference counter.
>>>
>>> Will switch to use ceph dedicate lock info to track the inode.
>>>
>>> And in ceph_fl_release_lock() we should skip all the operations if
>>> the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
>>> the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
>>> inserting it to the inode lock list, which is when copying the lock.
>>>
>>> Cc: [email protected]
>>> Cc: Jeff Layton <[email protected]>
>>> URL: https://tracker.ceph.com/issues/57986
>>> Signed-off-by: Xiubo Li <[email protected]>
>>> ---
>>> fs/ceph/locks.c | 20 ++++++++++++++++++--
>>> include/linux/ceph/ceph_fs_fl.h | 17 +++++++++++++++++
>>> include/linux/fs.h | 2 ++
>>> 3 files changed, 37 insertions(+), 2 deletions(-)
>>> create mode 100644 include/linux/ceph/ceph_fs_fl.h
>>>
>>> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
>>> index b191426bf880..621f38f10a88 100644
>>> --- a/fs/ceph/locks.c
>>> +++ b/fs/ceph/locks.c
>>> @@ -34,18 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
>>> {
>>> struct inode *inode = file_inode(dst->fl_file);
>>> atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>>> + dst->fl_u.ceph_fl.fl_inode = igrab(inode);
>>> }
>>>
>>> +/*
>>> + * Do not use the 'fl->fl_file' in release function, which
>>> + * is possibly already released by another thread.
>>> + */
>>> static void ceph_fl_release_lock(struct file_lock *fl)
>>> {
>>> - struct inode *inode = file_inode(fl->fl_file);
>>> - struct ceph_inode_info *ci = ceph_inode(inode);
>>> + struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
>>> + struct ceph_inode_info *ci;
>>> +
>>> + /*
>>> + * If inode is NULL it should be a request file_lock,
>>> + * nothing we can do.
>>> + */
>>> + if (!inode)
>>> + return;
>>> +
>>> + ci = ceph_inode(inode);
>>> if (atomic_dec_and_test(&ci->i_filelock_ref)) {
>>> /* clear error when all locks are released */
>>> spin_lock(&ci->i_ceph_lock);
>>> ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
>>> spin_unlock(&ci->i_ceph_lock);
>>> }
>>> + fl->fl_u.ceph_fl.fl_inode = NULL;
>>> + iput(inode);
>>> }
>>>
>>> static const struct file_lock_operations ceph_fl_lock_ops = {
>>> diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
>>> new file mode 100644
>>> index 000000000000..ad1cf96329f9
>>> --- /dev/null
>>> +++ b/include/linux/ceph/ceph_fs_fl.h
>>> @@ -0,0 +1,17 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +/*
>>> + * ceph_fs_fl.h - Ceph lock info
>>> + *
>>> + * LGPL2
>>> + */
>>> +
>>> +#ifndef CEPH_FS_FL_H
>>> +#define CEPH_FS_FL_H
>>> +
>>> +#include <linux/fs.h>
>>> +
>>> +struct ceph_lock_info {
>>> + struct inode *fl_inode;
>>> +};
>>> +
>>> +#endif
>>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>>> index d6cb42b7e91c..2b03d5e375d7 100644
>>> --- a/include/linux/fs.h
>>> +++ b/include/linux/fs.h
>>> @@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
>>>
>>> /* that will die - we need it for nfs_lock_info */
>>> #include <linux/nfs_fs_i.h>
>>> +#include <linux/ceph/ceph_fs_fl.h>
>>>
>>> /*
>>> * struct file_lock represents a generic "file lock". It's used to represent
>>> @@ -1119,6 +1120,7 @@ struct file_lock {
>>> int state; /* state of grant or error if -ve */
>>> unsigned int debug_id;
>>> } afs;
>>> + struct ceph_lock_info ceph_fl;
>> Hi Xiubo and Jeff,
>>
>> Xiubo, instead of defining struct ceph_lock_info and including
>> a CephFS-specific header file in linux/fs.h, I think we should repeat
>> what was done for AFS -- particularly given that ceph_lock_info ends up
>> being a dummy type that isn't mentioned anywhere else.
>>
>> Jeff, could you please ack this with your file locking hat on?
>>
> ACK. I think that would be cleaner.

Sure, will fix this.

Thanks,

- Xiubo


> Thanks