There is a race condition as below when calling request_firmware():
CPU1 CPU2
write 0 > loading
mutex_lock(&fw_lock);
...
set_bit FW_STATUS_DONE class_timeout is coming
set_bit FW_STATUS_ABORT
complete_all &completion
...
mutex_unlock(&fw_lock)
In this time, the bit FW_STATUS_DONE and FW_STATUS_ABORT are set,
and request_firmware() will return failure due to condition in
_request_firmware_load():
if (!buf->size || test_bit(FW_STATUS_ABORT, &buf->status))
retval = -ENOENT;
But from the above scenerio, it should be a succcessful requesting.
So we need judge if the FW_STATUS_DONE is set before calling abort
in timeout function firmware_class_timeout().
Signed-off-by: liu chuansheng <[email protected]>
---
drivers/base/firmware_class.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 8945f4e..35fffd8 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -671,6 +671,13 @@ static void firmware_class_timeout(u_long data)
{
struct firmware_priv *fw_priv = (struct firmware_priv *) data;
+ mutex_lock(&fw_lock);
+ if (test_bit(FW_STATUS_DONE, &(fw_priv->buf->status))) {
+ mutex_unlock(&fw_lock);
+ return;
+ }
+ mutex_unlock(&fw_lock);
+
fw_load_abort(fw_priv);
}
--
1.7.0.4
On Thu, Nov 8, 2012 at 12:45 AM, Chuansheng Liu
<[email protected]> wrote:
>
> There is a race condition as below when calling request_firmware():
>
> CPU1 CPU2
> write 0 > loading
> mutex_lock(&fw_lock);
> ...
> set_bit FW_STATUS_DONE class_timeout is coming
> set_bit FW_STATUS_ABORT
> complete_all &completion
> ...
> mutex_unlock(&fw_lock)
Good catch, but your fix isn't correct.
>
> In this time, the bit FW_STATUS_DONE and FW_STATUS_ABORT are set,
> and request_firmware() will return failure due to condition in
> _request_firmware_load():
>
> if (!buf->size || test_bit(FW_STATUS_ABORT, &buf->status))
> retval = -ENOENT;
>
> But from the above scenerio, it should be a succcessful requesting.
> So we need judge if the FW_STATUS_DONE is set before calling abort
> in timeout function firmware_class_timeout().
>
> Signed-off-by: liu chuansheng <[email protected]>
> ---
> drivers/base/firmware_class.c | 7 +++++++
> 1 files changed, 7 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
> index 8945f4e..35fffd8 100644
> --- a/drivers/base/firmware_class.c
> +++ b/drivers/base/firmware_class.c
> @@ -671,6 +671,13 @@ static void firmware_class_timeout(u_long data)
> {
> struct firmware_priv *fw_priv = (struct firmware_priv *) data;
>
> + mutex_lock(&fw_lock);
mutex can't be used in interrupt context, so one candidate fix is
to convert the timeout timer into delayed work, and hold the
'fw_lock' during the work function.
> + if (test_bit(FW_STATUS_DONE, &(fw_priv->buf->status))) {
> + mutex_unlock(&fw_lock);
> + return;
> + }
> + mutex_unlock(&fw_lock);
> +
> fw_load_abort(fw_priv);
> }
Also the lock of 'fw_lock' should be held when setting 'FW_STATUS_DONE'
in _request_firmware_load().
Could you send out a new patch to fix the race?
Thanks,
--
Ming Lei
There is a race as below when calling request_firmware():
CPU1 CPU2
write 0 > loading
mutex_lock(&fw_lock)
...
set_bit FW_STATUS_DONE class_timeout is coming
set_bit FW_STATUS_ABORT
complete_all &completion
...
mutex_unlock(&fw_lock)
In this time, the bit FW_STATUS_DONE and FW_STATUS_ABORT are set,
and request_firmware() will return failure due to condition in
_request_firmware_load():
if (!buf->size || test_bit(FW_STATUS_ABORT, &buf->status))
retval = -ENOENT;
But from the above scenerio, it should be a successful requesting.
So we need judge if the bit FW_STATUS_DONE is already set before
calling fw_load_abort() in timeout function.
As Ming's proposal, we need change the timer into sched_work to
benefit from using &fw_lock mutex also.
Signed-off-by: liu chuansheng <[email protected]>
---
drivers/base/firmware_class.c | 25 +++++++++++++++++--------
1 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 8945f4e..f2caa0a 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -143,7 +143,7 @@ struct fw_cache_entry {
};
struct firmware_priv {
- struct timer_list timeout;
+ struct delayed_work timeout_work;
bool nowait;
struct device dev;
struct firmware_buf *buf;
@@ -667,10 +667,17 @@ static struct bin_attribute firmware_attr_data = {
.write = firmware_data_write,
};
-static void firmware_class_timeout(u_long data)
+static void firmware_class_timeout_work(struct work_struct *work)
{
- struct firmware_priv *fw_priv = (struct firmware_priv *) data;
+ struct firmware_priv *fw_priv = container_of(work,
+ struct firmware_priv, timeout_work.work);
+ mutex_lock(&fw_lock);
+ if (test_bit(FW_STATUS_DONE, &(fw_priv->buf->status))) {
+ mutex_unlock(&fw_lock);
+ return;
+ }
+ mutex_unlock(&fw_lock);
fw_load_abort(fw_priv);
}
@@ -690,8 +697,8 @@ fw_create_instance(struct firmware *firmware, const char *fw_name,
fw_priv->nowait = nowait;
fw_priv->fw = firmware;
- setup_timer(&fw_priv->timeout,
- firmware_class_timeout, (u_long) fw_priv);
+ INIT_DELAYED_WORK(&fw_priv->timeout_work,
+ firmware_class_timeout_work);
f_dev = &fw_priv->dev;
@@ -858,7 +865,9 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
dev_dbg(f_dev->parent, "firmware: direct-loading"
" firmware %s\n", buf->fw_id);
+ mutex_lock(&fw_lock);
set_bit(FW_STATUS_DONE, &buf->status);
+ mutex_unlock(&fw_lock);
complete_all(&buf->completion);
direct_load = 1;
goto handle_fw;
@@ -894,15 +903,15 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
dev_set_uevent_suppress(f_dev, false);
dev_dbg(f_dev, "firmware: requesting %s\n", buf->fw_id);
if (timeout != MAX_SCHEDULE_TIMEOUT)
- mod_timer(&fw_priv->timeout,
- round_jiffies_up(jiffies + timeout));
+ schedule_delayed_work(&fw_priv->timeout_work,
+ timeout * HZ);
kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD);
}
wait_for_completion(&buf->completion);
- del_timer_sync(&fw_priv->timeout);
+ cancel_delayed_work_sync(&fw_priv->timeout_work);
handle_fw:
mutex_lock(&fw_lock);
--
1.7.0.4
> -----Original Message-----
> From: Ming Lei [mailto:[email protected]]
> Sent: Wednesday, November 07, 2012 6:40 PM
> To: Liu, Chuansheng
> Cc: [email protected]; [email protected]
> Subject: Re: [PATCH] firmware loader: Fix the race FW_STATUS_DONE is
> followed by class_timeout
>
> On Thu, Nov 8, 2012 at 12:45 AM, Chuansheng Liu
> <[email protected]> wrote:
> >
> > There is a race condition as below when calling request_firmware():
> >
> > CPU1 CPU2
> > write 0 > loading
> > mutex_lock(&fw_lock);
> > ...
> > set_bit FW_STATUS_DONE class_timeout is coming
> > set_bit
> FW_STATUS_ABORT
> > complete_all &completion
> > ...
> > mutex_unlock(&fw_lock)
>
> Good catch, but your fix isn't correct.
>
> >
> > In this time, the bit FW_STATUS_DONE and FW_STATUS_ABORT are set,
> > and request_firmware() will return failure due to condition in
> > _request_firmware_load():
> >
> > if (!buf->size || test_bit(FW_STATUS_ABORT, &buf->status))
> > retval = -ENOENT;
> >
> > But from the above scenerio, it should be a succcessful requesting.
> > So we need judge if the FW_STATUS_DONE is set before calling abort
> > in timeout function firmware_class_timeout().
> >
> > Signed-off-by: liu chuansheng <[email protected]>
> > ---
> > drivers/base/firmware_class.c | 7 +++++++
> > 1 files changed, 7 insertions(+), 0 deletions(-)
> >
> > diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
> > index 8945f4e..35fffd8 100644
> > --- a/drivers/base/firmware_class.c
> > +++ b/drivers/base/firmware_class.c
> > @@ -671,6 +671,13 @@ static void firmware_class_timeout(u_long data)
> > {
> > struct firmware_priv *fw_priv = (struct firmware_priv *) data;
> >
> > + mutex_lock(&fw_lock);
>
> mutex can't be used in interrupt context, so one candidate fix is
> to convert the timeout timer into delayed work, and hold the
> 'fw_lock' during the work function.
>
> > + if (test_bit(FW_STATUS_DONE, &(fw_priv->buf->status))) {
> > + mutex_unlock(&fw_lock);
> > + return;
> > + }
> > + mutex_unlock(&fw_lock);
> > +
> > fw_load_abort(fw_priv);
> > }
>
> Also the lock of 'fw_lock' should be held when setting 'FW_STATUS_DONE'
> in _request_firmware_load().
>
> Could you send out a new patch to fix the race?
Thanks your review and proposal, has sent the patch V2 to be reviewed.
>
> Thanks,
> --
> Ming Lei
On Thu, Nov 8, 2012 at 6:29 PM, Chuansheng Liu <[email protected]> wrote:
>
> There is a race as below when calling request_firmware():
> CPU1 CPU2
> write 0 > loading
> mutex_lock(&fw_lock)
> ...
> set_bit FW_STATUS_DONE class_timeout is coming
> set_bit FW_STATUS_ABORT
> complete_all &completion
> ...
> mutex_unlock(&fw_lock)
>
> In this time, the bit FW_STATUS_DONE and FW_STATUS_ABORT are set,
> and request_firmware() will return failure due to condition in
> _request_firmware_load():
> if (!buf->size || test_bit(FW_STATUS_ABORT, &buf->status))
> retval = -ENOENT;
>
> But from the above scenerio, it should be a successful requesting.
> So we need judge if the bit FW_STATUS_DONE is already set before
> calling fw_load_abort() in timeout function.
>
> As Ming's proposal, we need change the timer into sched_work to
> benefit from using &fw_lock mutex also.
>
> Signed-off-by: liu chuansheng <[email protected]>
> ---
> drivers/base/firmware_class.c | 25 +++++++++++++++++--------
> 1 files changed, 17 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
> index 8945f4e..f2caa0a 100644
> --- a/drivers/base/firmware_class.c
> +++ b/drivers/base/firmware_class.c
> @@ -143,7 +143,7 @@ struct fw_cache_entry {
> };
>
> struct firmware_priv {
> - struct timer_list timeout;
> + struct delayed_work timeout_work;
> bool nowait;
> struct device dev;
> struct firmware_buf *buf;
> @@ -667,10 +667,17 @@ static struct bin_attribute firmware_attr_data = {
> .write = firmware_data_write,
> };
>
> -static void firmware_class_timeout(u_long data)
> +static void firmware_class_timeout_work(struct work_struct *work)
> {
> - struct firmware_priv *fw_priv = (struct firmware_priv *) data;
> + struct firmware_priv *fw_priv = container_of(work,
> + struct firmware_priv, timeout_work.work);
>
> + mutex_lock(&fw_lock);
> + if (test_bit(FW_STATUS_DONE, &(fw_priv->buf->status))) {
> + mutex_unlock(&fw_lock);
> + return;
> + }
> + mutex_unlock(&fw_lock);
> fw_load_abort(fw_priv);
fw_lock should be held when fw_load_abort(fw_priv) is running.
> }
>
> @@ -690,8 +697,8 @@ fw_create_instance(struct firmware *firmware, const char *fw_name,
>
> fw_priv->nowait = nowait;
> fw_priv->fw = firmware;
> - setup_timer(&fw_priv->timeout,
> - firmware_class_timeout, (u_long) fw_priv);
> + INIT_DELAYED_WORK(&fw_priv->timeout_work,
> + firmware_class_timeout_work);
>
> f_dev = &fw_priv->dev;
>
> @@ -858,7 +865,9 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
> dev_dbg(f_dev->parent, "firmware: direct-loading"
> " firmware %s\n", buf->fw_id);
>
> + mutex_lock(&fw_lock);
> set_bit(FW_STATUS_DONE, &buf->status);
> + mutex_unlock(&fw_lock);
> complete_all(&buf->completion);
> direct_load = 1;
> goto handle_fw;
> @@ -894,15 +903,15 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
> dev_set_uevent_suppress(f_dev, false);
> dev_dbg(f_dev, "firmware: requesting %s\n", buf->fw_id);
> if (timeout != MAX_SCHEDULE_TIMEOUT)
> - mod_timer(&fw_priv->timeout,
> - round_jiffies_up(jiffies + timeout));
> + schedule_delayed_work(&fw_priv->timeout_work,
> + timeout * HZ);
timeout is in unit of jiffies already, so multiplying by 'HZ' isn't needed.
>
> kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD);
> }
>
> wait_for_completion(&buf->completion);
>
> - del_timer_sync(&fw_priv->timeout);
> + cancel_delayed_work_sync(&fw_priv->timeout_work);
>
> handle_fw:
> mutex_lock(&fw_lock);
> --
> 1.7.0.4
>
>
>
Thanks,
--
Ming Lei
> -----Original Message-----
> From: Ming Lei [mailto:[email protected]]
> Sent: Thursday, November 08, 2012 10:02 AM
> To: Liu, Chuansheng
> Cc: [email protected]; [email protected]
> Subject: Re: [PATCH V2] firmware loader: Fix the race FW_STATUS_DONE is
> followed by class_timeout
>
> On Thu, Nov 8, 2012 at 6:29 PM, Chuansheng Liu <[email protected]>
> wrote:
> >
> > There is a race as below when calling request_firmware():
> > CPU1 CPU2
> > write 0 > loading
> > mutex_lock(&fw_lock)
> > ...
> > set_bit FW_STATUS_DONE class_timeout is coming
> > set_bit FW_STATUS_ABORT
> > complete_all &completion
> > ...
> > mutex_unlock(&fw_lock)
> >
> > In this time, the bit FW_STATUS_DONE and FW_STATUS_ABORT are set,
> > and request_firmware() will return failure due to condition in
> > _request_firmware_load():
> > if (!buf->size || test_bit(FW_STATUS_ABORT, &buf->status))
> > retval = -ENOENT;
> >
> > But from the above scenerio, it should be a successful requesting.
> > So we need judge if the bit FW_STATUS_DONE is already set before
> > calling fw_load_abort() in timeout function.
> >
> > As Ming's proposal, we need change the timer into sched_work to
> > benefit from using &fw_lock mutex also.
> >
> > Signed-off-by: liu chuansheng <[email protected]>
> > ---
> > drivers/base/firmware_class.c | 25 +++++++++++++++++--------
> > 1 files changed, 17 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
> > index 8945f4e..f2caa0a 100644
> > --- a/drivers/base/firmware_class.c
> > +++ b/drivers/base/firmware_class.c
> > @@ -143,7 +143,7 @@ struct fw_cache_entry {
> > };
> >
> > struct firmware_priv {
> > - struct timer_list timeout;
> > + struct delayed_work timeout_work;
> > bool nowait;
> > struct device dev;
> > struct firmware_buf *buf;
> > @@ -667,10 +667,17 @@ static struct bin_attribute firmware_attr_data = {
> > .write = firmware_data_write,
> > };
> >
> > -static void firmware_class_timeout(u_long data)
> > +static void firmware_class_timeout_work(struct work_struct *work)
> > {
> > - struct firmware_priv *fw_priv = (struct firmware_priv *) data;
> > + struct firmware_priv *fw_priv = container_of(work,
> > + struct firmware_priv, timeout_work.work);
> >
> > + mutex_lock(&fw_lock);
> > + if (test_bit(FW_STATUS_DONE, &(fw_priv->buf->status))) {
> > + mutex_unlock(&fw_lock);
> > + return;
> > + }
> > + mutex_unlock(&fw_lock);
> > fw_load_abort(fw_priv);
>
> fw_lock should be held when fw_load_abort(fw_priv) is running.
OK.
>
> > }
> >
> > @@ -690,8 +697,8 @@ fw_create_instance(struct firmware *firmware,
> const char *fw_name,
> >
> > fw_priv->nowait = nowait;
> > fw_priv->fw = firmware;
> > - setup_timer(&fw_priv->timeout,
> > - firmware_class_timeout, (u_long) fw_priv);
> > + INIT_DELAYED_WORK(&fw_priv->timeout_work,
> > + firmware_class_timeout_work);
> >
> > f_dev = &fw_priv->dev;
> >
> > @@ -858,7 +865,9 @@ static int _request_firmware_load(struct
> firmware_priv *fw_priv, bool uevent,
> > dev_dbg(f_dev->parent, "firmware: direct-loading"
> > " firmware %s\n", buf->fw_id);
> >
> > + mutex_lock(&fw_lock);
> > set_bit(FW_STATUS_DONE, &buf->status);
> > + mutex_unlock(&fw_lock);
> > complete_all(&buf->completion);
> > direct_load = 1;
> > goto handle_fw;
> > @@ -894,15 +903,15 @@ static int _request_firmware_load(struct
> firmware_priv *fw_priv, bool uevent,
> > dev_set_uevent_suppress(f_dev, false);
> > dev_dbg(f_dev, "firmware: requesting %s\n",
> buf->fw_id);
> > if (timeout != MAX_SCHEDULE_TIMEOUT)
> > - mod_timer(&fw_priv->timeout,
> > - round_jiffies_up(jiffies + timeout));
> > +
> schedule_delayed_work(&fw_priv->timeout_work,
> > + timeout * HZ);
>
> timeout is in unit of jiffies already, so multiplying by 'HZ' isn't needed.
Oh, sorry for that, I just tested it in one old version.
Will update the patch soon.
>
> >
> > kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD);
> > }
> >
> > wait_for_completion(&buf->completion);
> >
> > - del_timer_sync(&fw_priv->timeout);
> > + cancel_delayed_work_sync(&fw_priv->timeout_work);
> >
> > handle_fw:
> > mutex_lock(&fw_lock);
> > --
> > 1.7.0.4
> >
> >
> >
>
> Thanks,
> --
> Ming Lei
There is a race as below when calling request_firmware():
CPU1 CPU2
write 0 > loading
mutex_lock(&fw_lock)
...
set_bit FW_STATUS_DONE class_timeout is coming
set_bit FW_STATUS_ABORT
complete_all &completion
...
mutex_unlock(&fw_lock)
In this time, the bit FW_STATUS_DONE and FW_STATUS_ABORT are set,
and request_firmware() will return failure due to condition in
_request_firmware_load():
if (!buf->size || test_bit(FW_STATUS_ABORT, &buf->status))
retval = -ENOENT;
But from the above scenerio, it should be a successful requesting.
So we need judge if the bit FW_STATUS_DONE is already set before
calling fw_load_abort() in timeout function.
As Ming's proposal, we need change the timer into sched_work to
benefit from using &fw_lock mutex also.
Signed-off-by: liu chuansheng <[email protected]>
---
drivers/base/firmware_class.c | 24 ++++++++++++++++--------
1 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 8945f4e..b44ed35 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -143,7 +143,7 @@ struct fw_cache_entry {
};
struct firmware_priv {
- struct timer_list timeout;
+ struct delayed_work timeout_work;
bool nowait;
struct device dev;
struct firmware_buf *buf;
@@ -667,11 +667,18 @@ static struct bin_attribute firmware_attr_data = {
.write = firmware_data_write,
};
-static void firmware_class_timeout(u_long data)
+static void firmware_class_timeout_work(struct work_struct *work)
{
- struct firmware_priv *fw_priv = (struct firmware_priv *) data;
+ struct firmware_priv *fw_priv = container_of(work,
+ struct firmware_priv, timeout_work.work);
+ mutex_lock(&fw_lock);
+ if (test_bit(FW_STATUS_DONE, &(fw_priv->buf->status))) {
+ mutex_unlock(&fw_lock);
+ return;
+ }
fw_load_abort(fw_priv);
+ mutex_unlock(&fw_lock);
}
static struct firmware_priv *
@@ -690,8 +697,8 @@ fw_create_instance(struct firmware *firmware, const char *fw_name,
fw_priv->nowait = nowait;
fw_priv->fw = firmware;
- setup_timer(&fw_priv->timeout,
- firmware_class_timeout, (u_long) fw_priv);
+ INIT_DELAYED_WORK(&fw_priv->timeout_work,
+ firmware_class_timeout_work);
f_dev = &fw_priv->dev;
@@ -858,7 +865,9 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
dev_dbg(f_dev->parent, "firmware: direct-loading"
" firmware %s\n", buf->fw_id);
+ mutex_lock(&fw_lock);
set_bit(FW_STATUS_DONE, &buf->status);
+ mutex_unlock(&fw_lock);
complete_all(&buf->completion);
direct_load = 1;
goto handle_fw;
@@ -894,15 +903,14 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
dev_set_uevent_suppress(f_dev, false);
dev_dbg(f_dev, "firmware: requesting %s\n", buf->fw_id);
if (timeout != MAX_SCHEDULE_TIMEOUT)
- mod_timer(&fw_priv->timeout,
- round_jiffies_up(jiffies + timeout));
+ schedule_delayed_work(&fw_priv->timeout_work, timeout);
kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD);
}
wait_for_completion(&buf->completion);
- del_timer_sync(&fw_priv->timeout);
+ cancel_delayed_work_sync(&fw_priv->timeout_work);
handle_fw:
mutex_lock(&fw_lock);
--
1.7.0.4
On Thu, Nov 8, 2012 at 7:14 PM, Chuansheng Liu <[email protected]> wrote:
>
> There is a race as below when calling request_firmware():
> CPU1 CPU2
> write 0 > loading
> mutex_lock(&fw_lock)
> ...
> set_bit FW_STATUS_DONE class_timeout is coming
> set_bit FW_STATUS_ABORT
> complete_all &completion
> ...
> mutex_unlock(&fw_lock)
>
> In this time, the bit FW_STATUS_DONE and FW_STATUS_ABORT are set,
> and request_firmware() will return failure due to condition in
> _request_firmware_load():
> if (!buf->size || test_bit(FW_STATUS_ABORT, &buf->status))
> retval = -ENOENT;
>
> But from the above scenerio, it should be a successful requesting.
> So we need judge if the bit FW_STATUS_DONE is already set before
> calling fw_load_abort() in timeout function.
>
> As Ming's proposal, we need change the timer into sched_work to
> benefit from using &fw_lock mutex also.
>
> Signed-off-by: liu chuansheng <[email protected]>
Acked-by: Ming Lei <[email protected]>
Thanks,
--
Ming Lei