2016-03-28 08:27:24

by Ming Lei

[permalink] [raw]
Subject: [PATCH] driver core: fix race between creating/querying glue dir and its cleanup

The global mutex of 'gdp_mutex' is used to serialize creating/querying
glue dir and its cleanup. Turns out it isn't a perfect way because
part(kobj_kset_leave()) of the actual cleanup action() is done inside
the release handler of the glue dir kobject. That means gdp_mutex has
to be held before releasing the last reference count of the glue dir
kobject.

This patch moves glue dir's cleanup after kobject_del() in device_del()
for avoiding the race.

Cc: Yijing Wang <[email protected]>
Reported-by: Chandra Sekhar Lingutla <[email protected]>
Signed-off-by: Ming Lei <[email protected]>
---
drivers/base/core.c | 41 +++++++++++++++++++++++++++++++----------
1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 0a8bdad..3f2e1f8 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -836,11 +836,31 @@ static struct kobject *get_device_parent(struct device *dev,
return NULL;
}

+static inline bool live_in_glue_dir(struct kobject *kobj,
+ struct device *dev)
+{
+ if (!kobj || !dev->class ||
+ kobj->kset != &dev->class->p->glue_dirs)
+ return true;
+ return false;
+}
+
+static inline struct kobject *get_glue_dir(struct device *dev)
+{
+ if (live_in_glue_dir(&dev->kobj, dev))
+ return dev->kobj.parent;
+ return NULL;
+}
+
+/*
+ * make sure cleaning up dir as the last step, we need to make
+ * sure .release handler of kobject is run with holding the
+ * global lock
+ */
static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
{
/* see if we live in a "glue" directory */
- if (!glue_dir || !dev->class ||
- glue_dir->kset != &dev->class->p->glue_dirs)
+ if (!live_in_glue_dir(glue_dir, dev))
return;

mutex_lock(&gdp_mutex);
@@ -848,11 +868,6 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
mutex_unlock(&gdp_mutex);
}

-static void cleanup_device_parent(struct device *dev)
-{
- cleanup_glue_dir(dev, dev->kobj.parent);
-}
-
static int device_add_class_symlinks(struct device *dev)
{
struct device_node *of_node = dev_of_node(dev);
@@ -1028,6 +1043,7 @@ int device_add(struct device *dev)
struct kobject *kobj;
struct class_interface *class_intf;
int error = -EINVAL;
+ struct kobject *glue_dir = NULL;

dev = get_device(dev);
if (!dev)
@@ -1072,8 +1088,10 @@ int device_add(struct device *dev)
/* first, register with generic layer. */
/* we require the name to be set before, and pass NULL */
error = kobject_add(&dev->kobj, dev->kobj.parent, NULL);
- if (error)
+ if (error) {
+ glue_dir = get_glue_dir(dev);
goto Error;
+ }

/* notify platform of device entry */
if (platform_notify)
@@ -1154,9 +1172,10 @@ done:
device_remove_file(dev, &dev_attr_uevent);
attrError:
kobject_uevent(&dev->kobj, KOBJ_REMOVE);
+ glue_dir = get_glue_dir(dev);
kobject_del(&dev->kobj);
Error:
- cleanup_device_parent(dev);
+ cleanup_glue_dir(dev, glue_dir);
put_device(parent);
name_error:
kfree(dev->p);
@@ -1232,6 +1251,7 @@ EXPORT_SYMBOL_GPL(put_device);
void device_del(struct device *dev)
{
struct device *parent = dev->parent;
+ struct kobject *glue_dir = NULL;
struct class_interface *class_intf;

/* Notify clients of device removal. This call must come
@@ -1276,8 +1296,9 @@ void device_del(struct device *dev)
blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
BUS_NOTIFY_REMOVED_DEVICE, dev);
kobject_uevent(&dev->kobj, KOBJ_REMOVE);
- cleanup_device_parent(dev);
+ glue_dir = get_glue_dir(dev);
kobject_del(&dev->kobj);
+ cleanup_glue_dir(dev, glue_dir);
put_device(parent);
}
EXPORT_SYMBOL_GPL(device_del);
--
1.9.1


2016-04-26 17:27:36

by Lingutla Chandrasekhar

[permalink] [raw]
Subject: Re: [PATCH] driver core: fix race between creating/querying glue dir and its cleanup

Hi Ming,

On 03/28/2016 01:57 PM, Ming Lei wrote:
> The global mutex of 'gdp_mutex' is used to serialize creating/querying
> glue dir and its cleanup. Turns out it isn't a perfect way because
> part(kobj_kset_leave()) of the actual cleanup action() is done inside
> the release handler of the glue dir kobject. That means gdp_mutex has
> to be held before releasing the last reference count of the glue dir
> kobject.
>
> This patch moves glue dir's cleanup after kobject_del() in device_del()
> for avoiding the race.
>
> Cc: Yijing Wang <[email protected]>
> Reported-by: Chandra Sekhar Lingutla <[email protected]>
> Signed-off-by: Ming Lei <[email protected]>
> ---
> drivers/base/core.c | 41 +++++++++++++++++++++++++++++++----------
> 1 file changed, 31 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index 0a8bdad..3f2e1f8 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -836,11 +836,31 @@ static struct kobject *get_device_parent(struct device *dev,
> return NULL;
> }
>
> +static inline bool live_in_glue_dir(struct kobject *kobj,
> + struct device *dev)
> +{
> + if (!kobj || !dev->class ||
> + kobj->kset != &dev->class->p->glue_dirs)
> + return true;
> + return false;
> +}
I think we should return false if kobj->kset != &dev->class->p->glue_dirs.
If kboj->kset points to dev->class->p->glue_dirs, then we live in glue dir.
So logic should be:
if (!kobj || !dev->class ||
kobj->kset != &dev->class->p->glue_dirs)
return false;
return true;

> +
> +static inline struct kobject *get_glue_dir(struct device *dev)
> +{
> + if (live_in_glue_dir(&dev->kobj, dev))
> + return dev->kobj.parent;
> + return NULL;
> +}
> +
> +/*
> + * make sure cleaning up dir as the last step, we need to make
> + * sure .release handler of kobject is run with holding the
> + * global lock
> + */
> static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
> {
> /* see if we live in a "glue" directory */
> - if (!glue_dir || !dev->class ||
> - glue_dir->kset != &dev->class->p->glue_dirs)
> + if (!live_in_glue_dir(glue_dir, dev))
> return;
>
> mutex_lock(&gdp_mutex);
> @@ -848,11 +868,6 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
> mutex_unlock(&gdp_mutex);
> }
>
> -static void cleanup_device_parent(struct device *dev)
> -{
> - cleanup_glue_dir(dev, dev->kobj.parent);
> -}
> -
> static int device_add_class_symlinks(struct device *dev)
> {
> struct device_node *of_node = dev_of_node(dev);
> @@ -1028,6 +1043,7 @@ int device_add(struct device *dev)
> struct kobject *kobj;
> struct class_interface *class_intf;
> int error = -EINVAL;
> + struct kobject *glue_dir = NULL;
>
> dev = get_device(dev);
> if (!dev)
> @@ -1072,8 +1088,10 @@ int device_add(struct device *dev)
> /* first, register with generic layer. */
> /* we require the name to be set before, and pass NULL */
> error = kobject_add(&dev->kobj, dev->kobj.parent, NULL);
> - if (error)
> + if (error) {
> + glue_dir = get_glue_dir(dev);
> goto Error;
> + }
>
> /* notify platform of device entry */
> if (platform_notify)
> @@ -1154,9 +1172,10 @@ done:
> device_remove_file(dev, &dev_attr_uevent);
> attrError:
> kobject_uevent(&dev->kobj, KOBJ_REMOVE);
> + glue_dir = get_glue_dir(dev);
> kobject_del(&dev->kobj);
> Error:
> - cleanup_device_parent(dev);
> + cleanup_glue_dir(dev, glue_dir);
> put_device(parent);
> name_error:
> kfree(dev->p);
> @@ -1232,6 +1251,7 @@ EXPORT_SYMBOL_GPL(put_device);
> void device_del(struct device *dev)
> {
> struct device *parent = dev->parent;
> + struct kobject *glue_dir = NULL;
> struct class_interface *class_intf;
>
> /* Notify clients of device removal. This call must come
> @@ -1276,8 +1296,9 @@ void device_del(struct device *dev)
> blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
> BUS_NOTIFY_REMOVED_DEVICE, dev);
> kobject_uevent(&dev->kobj, KOBJ_REMOVE);
> - cleanup_device_parent(dev);
> + glue_dir = get_glue_dir(dev);
> kobject_del(&dev->kobj);
> + cleanup_glue_dir(dev, glue_dir);
> put_device(parent);
> }
> EXPORT_SYMBOL_GPL(device_del);
>