When configfs_lookup() is executing list_for_each_entry(),
it is possible that configfs_dir_lseek() is calling list_del().
Some unfortunate interleavings of them can cause a kernel NULL
pointer dereference error
Thread 1 Thread 2
//configfs_dir_lseek() //configfs_lookup()
list_del(&cursor->s_sibling);
list_for_each_entry(sd, ...)
Fix this bug by using list_for_each_entry_safe() instead.
Reported-by: Sishuai Gong <[email protected]>
Signed-off-by: sishuaigong <[email protected]>
---
fs/configfs/dir.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index ac5e0c0e9181..8f5d0309fb4a 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -452,7 +452,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
unsigned int flags)
{
struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
- struct configfs_dirent * sd;
+ struct configfs_dirent *sd, *tmp;
int found = 0;
int err;
@@ -468,7 +468,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
if (!configfs_dirent_is_ready(parent_sd))
goto out;
- list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+ list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
if (sd->s_type & CONFIGFS_NOT_PINNED) {
const unsigned char * name = configfs_get_name(sd);
--
2.17.1
On Fri, Aug 20, 2021 at 05:44:58PM -0400, sishuaigong wrote:
> When configfs_lookup() is executing list_for_each_entry(),
> it is possible that configfs_dir_lseek() is calling list_del().
> Some unfortunate interleavings of them can cause a kernel NULL
> pointer dereference error
>
> Thread 1 Thread 2
> //configfs_dir_lseek() //configfs_lookup()
> list_del(&cursor->s_sibling);
> list_for_each_entry(sd, ...)
>
> Fix this bug by using list_for_each_entry_safe() instead.
I don't see how list_for_each_entry_safe would save you there.
You need a lock to sychronize the two, list_for_each_entry_safe
only ensures the next entry is looked up before iterating over
the current one.
On Mon, Aug 23, 2021 at 04:12:10PM +0000, Gong, Sishuai wrote:
> On Aug 23, 2021, at 3:46 AM, Christoph Hellwig <[email protected]<mailto:[email protected]>> wrote:
>
> On Fri, Aug 20, 2021 at 05:44:58PM -0400, sishuaigong wrote:
> When configfs_lookup() is executing list_for_each_entry(),
> it is possible that configfs_dir_lseek() is calling list_del().
> Some unfortunate interleavings of them can cause a kernel NULL
> pointer dereference error
>
> Thread 1 Thread 2
> //configfs_dir_lseek() //configfs_lookup()
> list_del(&cursor->s_sibling);
> list_for_each_entry(sd, ...)
>
> Fix this bug by using list_for_each_entry_safe() instead.
>
> I don't see how list_for_each_entry_safe would save you there.
> You need a lock to sychronize the two, list_for_each_entry_safe
> only ensures the next entry is looked up before iterating over
> the current one.
> Thanks for pointing that out!
>
> It looks like config_lookup() should hold configfs_dirent_lock
> when doing list_for_each_entry(), but configfs_attach_attr()
> also needs to be changed since it might be called by
> config_lookup() and then wait for configfs_dirent_lock,
> which will cause a deadlock.
>
> Do you think a future patch like this makes sense?
We can't hold a spinlock over inode allocation. So it would have to be
something like this:
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index ac5e0c0e9181..48022e27664d 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -417,44 +417,13 @@ static void configfs_remove_dir(struct config_item * item)
dput(dentry);
}
-
-/* attaches attribute's configfs_dirent to the dentry corresponding to the
- * attribute file
- */
-static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * dentry)
-{
- struct configfs_attribute * attr = sd->s_element;
- struct inode *inode;
-
- spin_lock(&configfs_dirent_lock);
- dentry->d_fsdata = configfs_get(sd);
- sd->s_dentry = dentry;
- spin_unlock(&configfs_dirent_lock);
-
- inode = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG);
- if (IS_ERR(inode)) {
- configfs_put(sd);
- return PTR_ERR(inode);
- }
- if (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) {
- inode->i_size = 0;
- inode->i_fop = &configfs_bin_file_operations;
- } else {
- inode->i_size = PAGE_SIZE;
- inode->i_fop = &configfs_file_operations;
- }
- d_add(dentry, inode);
- return 0;
-}
-
static struct dentry * configfs_lookup(struct inode *dir,
struct dentry *dentry,
unsigned int flags)
{
- struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
- struct configfs_dirent * sd;
- int found = 0;
- int err;
+ struct configfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
+ struct configfs_dirent *sd;
+ struct inode *inode = NULL;
/*
* Fake invisibility if dir belongs to a group/default groups hierarchy
@@ -464,36 +433,46 @@ static struct dentry * configfs_lookup(struct inode *dir,
* not complete their initialization, since the dentries of the
* attributes won't be instantiated.
*/
- err = -ENOENT;
if (!configfs_dirent_is_ready(parent_sd))
- goto out;
+ return ERR_PTR(-ENOENT);
+ spin_lock(&configfs_dirent_lock);
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
- if (sd->s_type & CONFIGFS_NOT_PINNED) {
- const unsigned char * name = configfs_get_name(sd);
+ if ((sd->s_type & CONFIGFS_NOT_PINNED) &&
+ !strcmp(configfs_get_name(sd), dentry->d_name.name)) {
+ struct configfs_attribute *attr = sd->s_element;
+ umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
- if (strcmp(name, dentry->d_name.name))
- continue;
+ dentry->d_fsdata = configfs_get(sd);
+ sd->s_dentry = dentry;
+ spin_unlock(&configfs_dirent_lock);
- found = 1;
- err = configfs_attach_attr(sd, dentry);
- break;
+ inode = configfs_create(dentry, mode);
+ if (IS_ERR(inode)) {
+ configfs_put(sd);
+ return ERR_CAST(inode);
+ }
+ if (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) {
+ inode->i_size = 0;
+ inode->i_fop = &configfs_bin_file_operations;
+ } else {
+ inode->i_size = PAGE_SIZE;
+ inode->i_fop = &configfs_file_operations;
+ }
+ goto done;
}
}
+ spin_unlock(&configfs_dirent_lock);
- if (!found) {
- /*
- * If it doesn't exist and it isn't a NOT_PINNED item,
- * it must be negative.
- */
- if (dentry->d_name.len > NAME_MAX)
- return ERR_PTR(-ENAMETOOLONG);
- d_add(dentry, NULL);
- return NULL;
- }
-
-out:
- return ERR_PTR(err);
+ /*
+ * If it doesn't exist and it isn't a NOT_PINNED item, it must be
+ * negative.
+ */
+ if (dentry->d_name.len > NAME_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
+done:
+ d_add(dentry, inode);
+ return NULL;
}
/*
On Aug 23, 2021, at 1:08 PM, Christoph Hellwig <[email protected]> wrote:
>
> On Mon, Aug 23, 2021 at 04:12:10PM +0000, Gong, Sishuai wrote:
>> On Aug 23, 2021, at 3:46 AM, Christoph Hellwig <[email protected]<mailto:[email protected]>> wrote:
>>
>> On Fri, Aug 20, 2021 at 05:44:58PM -0400, sishuaigong wrote:
>> When configfs_lookup() is executing list_for_each_entry(),
>> it is possible that configfs_dir_lseek() is calling list_del().
>> Some unfortunate interleavings of them can cause a kernel NULL
>> pointer dereference error
>>
>> Thread 1 Thread 2
>> //configfs_dir_lseek() //configfs_lookup()
>> list_del(&cursor->s_sibling);
>> list_for_each_entry(sd, ...)
>>
>> Fix this bug by using list_for_each_entry_safe() instead.
>>
>> I don't see how list_for_each_entry_safe would save you there.
>> You need a lock to sychronize the two, list_for_each_entry_safe
>> only ensures the next entry is looked up before iterating over
>> the current one.
>> Thanks for pointing that out!
>>
>> It looks like config_lookup() should hold configfs_dirent_lock
>> when doing list_for_each_entry(), but configfs_attach_attr()
>> also needs to be changed since it might be called by
>> config_lookup() and then wait for configfs_dirent_lock,
>> which will cause a deadlock.
>>
>> Do you think a future patch like this makes sense?
>
> We can't hold a spinlock over inode allocation. So it would have to be
> something like this:
>
Thanks for your suggestion! I will come up with a second patch soon.
> diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
> index ac5e0c0e9181..48022e27664d 100644
> --- a/fs/configfs/dir.c
> +++ b/fs/configfs/dir.c
> @@ -417,44 +417,13 @@ static void configfs_remove_dir(struct config_item * item)
> dput(dentry);
> }
>
> -
> -/* attaches attribute's configfs_dirent to the dentry corresponding to the
> - * attribute file
> - */
> -static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * dentry)
> -{
> - struct configfs_attribute * attr = sd->s_element;
> - struct inode *inode;
> -
> - spin_lock(&configfs_dirent_lock);
> - dentry->d_fsdata = configfs_get(sd);
> - sd->s_dentry = dentry;
> - spin_unlock(&configfs_dirent_lock);
> -
> - inode = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG);
> - if (IS_ERR(inode)) {
> - configfs_put(sd);
> - return PTR_ERR(inode);
> - }
> - if (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) {
> - inode->i_size = 0;
> - inode->i_fop = &configfs_bin_file_operations;
> - } else {
> - inode->i_size = PAGE_SIZE;
> - inode->i_fop = &configfs_file_operations;
> - }
> - d_add(dentry, inode);
> - return 0;
> -}
> -
> static struct dentry * configfs_lookup(struct inode *dir,
> struct dentry *dentry,
> unsigned int flags)
> {
> - struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
> - struct configfs_dirent * sd;
> - int found = 0;
> - int err;
> + struct configfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
> + struct configfs_dirent *sd;
> + struct inode *inode = NULL;
>
> /*
> * Fake invisibility if dir belongs to a group/default groups hierarchy
> @@ -464,36 +433,46 @@ static struct dentry * configfs_lookup(struct inode *dir,
> * not complete their initialization, since the dentries of the
> * attributes won't be instantiated.
> */
> - err = -ENOENT;
> if (!configfs_dirent_is_ready(parent_sd))
> - goto out;
> + return ERR_PTR(-ENOENT);
>
> + spin_lock(&configfs_dirent_lock);
> list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
> - if (sd->s_type & CONFIGFS_NOT_PINNED) {
> - const unsigned char * name = configfs_get_name(sd);
> + if ((sd->s_type & CONFIGFS_NOT_PINNED) &&
> + !strcmp(configfs_get_name(sd), dentry->d_name.name)) {
> + struct configfs_attribute *attr = sd->s_element;
> + umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
>
> - if (strcmp(name, dentry->d_name.name))
> - continue;
> + dentry->d_fsdata = configfs_get(sd);
> + sd->s_dentry = dentry;
> + spin_unlock(&configfs_dirent_lock);
>
> - found = 1;
> - err = configfs_attach_attr(sd, dentry);
> - break;
> + inode = configfs_create(dentry, mode);
> + if (IS_ERR(inode)) {
> + configfs_put(sd);
> + return ERR_CAST(inode);
This return statement from the original configfs_attach_attr() needs to be handled accordingly.
> + }
> + if (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) {
> + inode->i_size = 0;
> + inode->i_fop = &configfs_bin_file_operations;
> + } else {
> + inode->i_size = PAGE_SIZE;
> + inode->i_fop = &configfs_file_operations;
> + }
> + goto done;
> }
> }
> + spin_unlock(&configfs_dirent_lock);
>
> - if (!found) {
> - /*
> - * If it doesn't exist and it isn't a NOT_PINNED item,
> - * it must be negative.
> - */
> - if (dentry->d_name.len > NAME_MAX)
> - return ERR_PTR(-ENAMETOOLONG);
> - d_add(dentry, NULL);
> - return NULL;
> - }
> -
> -out:
> - return ERR_PTR(err);
> + /*
> + * If it doesn't exist and it isn't a NOT_PINNED item, it must be
> + * negative.
> + */
> + if (dentry->d_name.len > NAME_MAX)
> + return ERR_PTR(-ENAMETOOLONG);
> +done:
> + d_add(dentry, inode);
> + return NULL;
> }
>
> /*
On Mon, Aug 23, 2021 at 07:08:47PM +0200, Christoph Hellwig wrote:
> We can't hold a spinlock over inode allocation. So it would have to be
> something like this:
Check for -ENAMETOOLONG first; easier for analysis that way.
> + dentry->d_fsdata = configfs_get(sd);
> + sd->s_dentry = dentry;
> + spin_unlock(&configfs_dirent_lock);
>
> - found = 1;
> - err = configfs_attach_attr(sd, dentry);
> - break;
> + inode = configfs_create(dentry, mode);
> + if (IS_ERR(inode)) {
> + configfs_put(sd);
> + return ERR_CAST(inode);
Er... Won't that leave dentry with dangling ->d_fsdata?
On Wed, Aug 25, 2021 at 05:19:04AM +0000, Al Viro wrote:
> On Mon, Aug 23, 2021 at 07:08:47PM +0200, Christoph Hellwig wrote:
>
> > We can't hold a spinlock over inode allocation. So it would have to be
> > something like this:
>
> Check for -ENAMETOOLONG first; easier for analysis that way.
Indeed.
> > + dentry->d_fsdata = configfs_get(sd);
> > + sd->s_dentry = dentry;
> > + spin_unlock(&configfs_dirent_lock);
> >
> > - found = 1;
> > - err = configfs_attach_attr(sd, dentry);
> > - break;
> > + inode = configfs_create(dentry, mode);
> > + if (IS_ERR(inode)) {
> > + configfs_put(sd);
> > + return ERR_CAST(inode);
>
> Er... Won't that leave dentry with dangling ->d_fsdata?
Yes. Existing problem, though.
Sorry, this email was delayed by several months due to some network
issues on my machine.
Please simply ignore it, since the bug mentioned is fixed already by
the commit c42dd069be8dfc9b2239a5c89e73bbd08ab35de0.
> On Dec 28, 2021, at 8:35 PM, Sishuai Gong <[email protected]> wrote:
>
> From: sishuaigong <[email protected]>
>
> When configfs_lookup() is executing list_for_each_entry(),
> it is possible that configfs_dir_lseek() is calling list_del().
> Some unfortunate interleavings of them can cause a kernel NULL
> pointer dereference error
>
> Thread 1 Thread 2
> //configfs_dir_lseek() //configfs_lookup()
> list_del(&cursor->s_sibling);
> list_for_each_entry(sd, ...)
>
> Fix this bug by using list_for_each_entry_safe() instead.
>
> Reported-by: Sishuai Gong <[email protected]>
> Signed-off-by: sishuaigong <[email protected]>
> ---
> fs/configfs/dir.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
> index ac5e0c0e9181..8f5d0309fb4a 100644
> --- a/fs/configfs/dir.c
> +++ b/fs/configfs/dir.c
> @@ -452,7 +452,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
> unsigned int flags)
> {
> struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
> - struct configfs_dirent * sd;
> + struct configfs_dirent *sd, *tmp;
> int found = 0;
> int err;
>
> @@ -468,7 +468,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
> if (!configfs_dirent_is_ready(parent_sd))
> goto out;
>
> - list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
> + list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
> if (sd->s_type & CONFIGFS_NOT_PINNED) {
> const unsigned char * name = configfs_get_name(sd);
>
> --
> 2.17.1
>