Simplify regulator locking by removing locking around locking. rdev->ref
is now accessed only when the lock is taken. The code still smells fishy,
but now its obvious why.
Fixes: f8702f9e4aa7 ("regulator: core: Use ww_mutex for regulators locking")
Signed-off-by: Michał Mirosław <[email protected]>
---
drivers/regulator/core.c | 37 ++++++--------------------------
include/linux/regulator/driver.h | 1 -
2 files changed, 6 insertions(+), 32 deletions(-)
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 9e18997777d3..b0662927487c 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -45,7 +45,6 @@
pr_debug("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
static DEFINE_WW_CLASS(regulator_ww_class);
-static DEFINE_MUTEX(regulator_nesting_mutex);
static DEFINE_MUTEX(regulator_list_mutex);
static LIST_HEAD(regulator_map_list);
static LIST_HEAD(regulator_ena_gpio_list);
@@ -150,32 +149,13 @@ static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops)
static inline int regulator_lock_nested(struct regulator_dev *rdev,
struct ww_acquire_ctx *ww_ctx)
{
- bool lock = false;
int ret = 0;
- mutex_lock(®ulator_nesting_mutex);
+ if (ww_ctx || !mutex_trylock_recursive(&rdev->mutex.base))
+ ret = ww_mutex_lock(&rdev->mutex, ww_ctx);
- if (ww_ctx || !ww_mutex_trylock(&rdev->mutex)) {
- if (rdev->mutex_owner == current)
- rdev->ref_cnt++;
- else
- lock = true;
-
- if (lock) {
- mutex_unlock(®ulator_nesting_mutex);
- ret = ww_mutex_lock(&rdev->mutex, ww_ctx);
- mutex_lock(®ulator_nesting_mutex);
- }
- } else {
- lock = true;
- }
-
- if (lock && ret != -EDEADLK) {
+ if (ret != -EDEADLK)
rdev->ref_cnt++;
- rdev->mutex_owner = current;
- }
-
- mutex_unlock(®ulator_nesting_mutex);
return ret;
}
@@ -205,16 +185,11 @@ EXPORT_SYMBOL_GPL(regulator_lock);
*/
void regulator_unlock(struct regulator_dev *rdev)
{
- mutex_lock(®ulator_nesting_mutex);
+ if (WARN_ON_ONCE(rdev->ref_cnt <= 0))
+ return;
- if (--rdev->ref_cnt == 0) {
- rdev->mutex_owner = NULL;
+ if (--rdev->ref_cnt == 0)
ww_mutex_unlock(&rdev->mutex);
- }
-
- WARN_ON_ONCE(rdev->ref_cnt < 0);
-
- mutex_unlock(®ulator_nesting_mutex);
}
EXPORT_SYMBOL_GPL(regulator_unlock);
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 8539f34ae42b..2fe5b1bcbe2f 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -448,7 +448,6 @@ struct regulator_dev {
struct blocking_notifier_head notifier;
struct ww_mutex mutex; /* consumer lock */
- struct task_struct *mutex_owner;
int ref_cnt;
struct module *owner;
struct device dev;
--
2.20.1
10.08.2020 00:16, Michał Mirosław пишет:
> Simplify regulator locking by removing locking around locking. rdev->ref
> is now accessed only when the lock is taken. The code still smells fishy,
> but now its obvious why.
>
> Fixes: f8702f9e4aa7 ("regulator: core: Use ww_mutex for regulators locking")
> Signed-off-by: Michał Mirosław <[email protected]>
> ---
> drivers/regulator/core.c | 37 ++++++--------------------------
> include/linux/regulator/driver.h | 1 -
> 2 files changed, 6 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
> index 9e18997777d3..b0662927487c 100644
> --- a/drivers/regulator/core.c
> +++ b/drivers/regulator/core.c
> @@ -45,7 +45,6 @@
> pr_debug("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
>
> static DEFINE_WW_CLASS(regulator_ww_class);
> -static DEFINE_MUTEX(regulator_nesting_mutex);
> static DEFINE_MUTEX(regulator_list_mutex);
> static LIST_HEAD(regulator_map_list);
> static LIST_HEAD(regulator_ena_gpio_list);
> @@ -150,32 +149,13 @@ static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops)
> static inline int regulator_lock_nested(struct regulator_dev *rdev,
> struct ww_acquire_ctx *ww_ctx)
> {
> - bool lock = false;
> int ret = 0;
>
> - mutex_lock(®ulator_nesting_mutex);
> + if (ww_ctx || !mutex_trylock_recursive(&rdev->mutex.base))
Have you seen comment to the mutex_trylock_recursive()?
https://elixir.bootlin.com/linux/v5.8/source/include/linux/mutex.h#L205
* This function should not be used, _ever_. It is purely for hysterical GEM
* raisins, and once those are gone this will be removed.
I knew about this function and I don't think it's okay to use it, hence
this is why there is that "nesting_mutex" and "owner" checking.
If you disagree, then perhaps you should make another patch to remove
the stale comment to trylock_recursive().
On Mon, Aug 10, 2020 at 12:40:04AM +0300, Dmitry Osipenko wrote:
> 10.08.2020 00:16, Michał Mirosław пишет:
> > Simplify regulator locking by removing locking around locking. rdev->ref
> > is now accessed only when the lock is taken. The code still smells fishy,
> > but now its obvious why.
> >
> > Fixes: f8702f9e4aa7 ("regulator: core: Use ww_mutex for regulators locking")
> > Signed-off-by: Michał Mirosław <[email protected]>
> > ---
> > drivers/regulator/core.c | 37 ++++++--------------------------
> > include/linux/regulator/driver.h | 1 -
> > 2 files changed, 6 insertions(+), 32 deletions(-)
> >
> > diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
> > index 9e18997777d3..b0662927487c 100644
> > --- a/drivers/regulator/core.c
> > +++ b/drivers/regulator/core.c
> > @@ -45,7 +45,6 @@
> > pr_debug("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
> >
> > static DEFINE_WW_CLASS(regulator_ww_class);
> > -static DEFINE_MUTEX(regulator_nesting_mutex);
> > static DEFINE_MUTEX(regulator_list_mutex);
> > static LIST_HEAD(regulator_map_list);
> > static LIST_HEAD(regulator_ena_gpio_list);
> > @@ -150,32 +149,13 @@ static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops)
> > static inline int regulator_lock_nested(struct regulator_dev *rdev,
> > struct ww_acquire_ctx *ww_ctx)
> > {
> > - bool lock = false;
> > int ret = 0;
> >
> > - mutex_lock(®ulator_nesting_mutex);
> > + if (ww_ctx || !mutex_trylock_recursive(&rdev->mutex.base))
>
> Have you seen comment to the mutex_trylock_recursive()?
>
> https://elixir.bootlin.com/linux/v5.8/source/include/linux/mutex.h#L205
>
> * This function should not be used, _ever_. It is purely for hysterical GEM
> * raisins, and once those are gone this will be removed.
>
> I knew about this function and I don't think it's okay to use it, hence
> this is why there is that "nesting_mutex" and "owner" checking.
>
> If you disagree, then perhaps you should make another patch to remove
> the stale comment to trylock_recursive().
I think that reimplementing the function just to not use it is not the
right solution. The whole locking protocol is problematic and this patch
just uncovers one side of it.
Best Regards,
Michał Mirosław
10.08.2020 01:30, Michał Mirosław пишет:
> On Mon, Aug 10, 2020 at 12:40:04AM +0300, Dmitry Osipenko wrote:
>> 10.08.2020 00:16, Michał Mirosław пишет:
>>> Simplify regulator locking by removing locking around locking. rdev->ref
>>> is now accessed only when the lock is taken. The code still smells fishy,
>>> but now its obvious why.
>>>
>>> Fixes: f8702f9e4aa7 ("regulator: core: Use ww_mutex for regulators locking")
>>> Signed-off-by: Michał Mirosław <[email protected]>
>>> ---
>>> drivers/regulator/core.c | 37 ++++++--------------------------
>>> include/linux/regulator/driver.h | 1 -
>>> 2 files changed, 6 insertions(+), 32 deletions(-)
>>>
>>> diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
>>> index 9e18997777d3..b0662927487c 100644
>>> --- a/drivers/regulator/core.c
>>> +++ b/drivers/regulator/core.c
>>> @@ -45,7 +45,6 @@
>>> pr_debug("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
>>>
>>> static DEFINE_WW_CLASS(regulator_ww_class);
>>> -static DEFINE_MUTEX(regulator_nesting_mutex);
>>> static DEFINE_MUTEX(regulator_list_mutex);
>>> static LIST_HEAD(regulator_map_list);
>>> static LIST_HEAD(regulator_ena_gpio_list);
>>> @@ -150,32 +149,13 @@ static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops)
>>> static inline int regulator_lock_nested(struct regulator_dev *rdev,
>>> struct ww_acquire_ctx *ww_ctx)
>>> {
>>> - bool lock = false;
>>> int ret = 0;
>>>
>>> - mutex_lock(®ulator_nesting_mutex);
>>> + if (ww_ctx || !mutex_trylock_recursive(&rdev->mutex.base))
>>
>> Have you seen comment to the mutex_trylock_recursive()?
>>
>> https://elixir.bootlin.com/linux/v5.8/source/include/linux/mutex.h#L205
>>
>> * This function should not be used, _ever_. It is purely for hysterical GEM
>> * raisins, and once those are gone this will be removed.
>>
>> I knew about this function and I don't think it's okay to use it, hence
>> this is why there is that "nesting_mutex" and "owner" checking.
>>
>> If you disagree, then perhaps you should make another patch to remove
>> the stale comment to trylock_recursive().
>
> I think that reimplementing the function just to not use it is not the
> right solution. The whole locking protocol is problematic and this patch
> just uncovers one side of it.
It's not clear to me what is uncovered, the ref_cnt was always accessed
under lock. Could you please explain in a more details?
Would be awesome if you could improve the code, but then you should
un-deprecate the trylock_recursive() before making use of it. Maybe
nobody will mind and it all will be good in the end.
On Mon, Aug 10, 2020 at 03:21:47AM +0300, Dmitry Osipenko wrote:
> 10.08.2020 01:30, Michał Mirosław пишет:
> > On Mon, Aug 10, 2020 at 12:40:04AM +0300, Dmitry Osipenko wrote:
> >> 10.08.2020 00:16, Michał Mirosław пишет:
> >>> Simplify regulator locking by removing locking around locking. rdev->ref
> >>> is now accessed only when the lock is taken. The code still smells fishy,
> >>> but now its obvious why.
> >>>
> >>> Fixes: f8702f9e4aa7 ("regulator: core: Use ww_mutex for regulators locking")
> >>> Signed-off-by: Michał Mirosław <[email protected]>
> >>> ---
> >>> drivers/regulator/core.c | 37 ++++++--------------------------
> >>> include/linux/regulator/driver.h | 1 -
> >>> 2 files changed, 6 insertions(+), 32 deletions(-)
> >>>
> >>> diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
> >>> index 9e18997777d3..b0662927487c 100644
> >>> --- a/drivers/regulator/core.c
> >>> +++ b/drivers/regulator/core.c
> >>> @@ -45,7 +45,6 @@
> >>> pr_debug("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
> >>>
> >>> static DEFINE_WW_CLASS(regulator_ww_class);
> >>> -static DEFINE_MUTEX(regulator_nesting_mutex);
> >>> static DEFINE_MUTEX(regulator_list_mutex);
> >>> static LIST_HEAD(regulator_map_list);
> >>> static LIST_HEAD(regulator_ena_gpio_list);
> >>> @@ -150,32 +149,13 @@ static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops)
> >>> static inline int regulator_lock_nested(struct regulator_dev *rdev,
> >>> struct ww_acquire_ctx *ww_ctx)
> >>> {
> >>> - bool lock = false;
> >>> int ret = 0;
> >>>
> >>> - mutex_lock(®ulator_nesting_mutex);
> >>> + if (ww_ctx || !mutex_trylock_recursive(&rdev->mutex.base))
> >>
> >> Have you seen comment to the mutex_trylock_recursive()?
> >>
> >> https://elixir.bootlin.com/linux/v5.8/source/include/linux/mutex.h#L205
> >>
> >> * This function should not be used, _ever_. It is purely for hysterical GEM
> >> * raisins, and once those are gone this will be removed.
> >>
> >> I knew about this function and I don't think it's okay to use it, hence
> >> this is why there is that "nesting_mutex" and "owner" checking.
> >>
> >> If you disagree, then perhaps you should make another patch to remove
> >> the stale comment to trylock_recursive().
> >
> > I think that reimplementing the function just to not use it is not the
> > right solution. The whole locking protocol is problematic and this patch
> > just uncovers one side of it.
>
> It's not clear to me what is uncovered, the ref_cnt was always accessed
> under lock. Could you please explain in a more details?
>
> Would be awesome if you could improve the code, but then you should
> un-deprecate the trylock_recursive() before making use of it. Maybe
> nobody will mind and it all will be good in the end.
I'm not sure why the framework wants recursive locking? If only for the
coupling case, then ww_mutex seems the right direction to replace it:
while walking the graph it will detect entering the same node
a second time. But this works only during the locking transaction (with
ww_context != NULL). Allowing recursive regulator_lock() outside of it
seems inviting trouble.
Best Regards,
Michał Mirosław
On Mon, Aug 10, 2020 at 03:21:47AM +0300, Dmitry Osipenko wrote:
> 10.08.2020 01:30, Michał Mirosław пишет:
> > On Mon, Aug 10, 2020 at 12:40:04AM +0300, Dmitry Osipenko wrote:
> >> 10.08.2020 00:16, Michał Mirosław пишет:
> >>> Simplify regulator locking by removing locking around locking. rdev->ref
> >>> is now accessed only when the lock is taken. The code still smells fishy,
> >>> but now its obvious why.
> >>>
> >>> Fixes: f8702f9e4aa7 ("regulator: core: Use ww_mutex for regulators locking")
> >>> Signed-off-by: Michał Mirosław <[email protected]>
> >>> ---
> >>> drivers/regulator/core.c | 37 ++++++--------------------------
> >>> include/linux/regulator/driver.h | 1 -
> >>> 2 files changed, 6 insertions(+), 32 deletions(-)
> >>>
> >>> diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
> >>> index 9e18997777d3..b0662927487c 100644
> >>> --- a/drivers/regulator/core.c
> >>> +++ b/drivers/regulator/core.c
> >>> @@ -45,7 +45,6 @@
> >>> pr_debug("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
> >>>
> >>> static DEFINE_WW_CLASS(regulator_ww_class);
> >>> -static DEFINE_MUTEX(regulator_nesting_mutex);
> >>> static DEFINE_MUTEX(regulator_list_mutex);
> >>> static LIST_HEAD(regulator_map_list);
> >>> static LIST_HEAD(regulator_ena_gpio_list);
> >>> @@ -150,32 +149,13 @@ static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops)
> >>> static inline int regulator_lock_nested(struct regulator_dev *rdev,
> >>> struct ww_acquire_ctx *ww_ctx)
> >>> {
> >>> - bool lock = false;
> >>> int ret = 0;
> >>>
> >>> - mutex_lock(®ulator_nesting_mutex);
> >>> + if (ww_ctx || !mutex_trylock_recursive(&rdev->mutex.base))
> >>
> >> Have you seen comment to the mutex_trylock_recursive()?
> >>
> >> https://elixir.bootlin.com/linux/v5.8/source/include/linux/mutex.h#L205
> >>
> >> * This function should not be used, _ever_. It is purely for hysterical GEM
> >> * raisins, and once those are gone this will be removed.
> >>
> >> I knew about this function and I don't think it's okay to use it, hence
> >> this is why there is that "nesting_mutex" and "owner" checking.
> >>
> >> If you disagree, then perhaps you should make another patch to remove
> >> the stale comment to trylock_recursive().
> >
> > I think that reimplementing the function just to not use it is not the
> > right solution. The whole locking protocol is problematic and this patch
> > just uncovers one side of it.
>
> It's not clear to me what is uncovered, the ref_cnt was always accessed
> under lock. Could you please explain in a more details?
>
> Would be awesome if you could improve the code, but then you should
> un-deprecate the trylock_recursive() before making use of it. Maybe
> nobody will mind and it all will be good in the end.
This might be a religious argument. Having said that: I believe using
a deprecated function is better than open coding it. Otherwise it would
be forbidden (ie. removed), not just deprecated.
Of course this assumes that you *really* need a recursive mutex here.
Best Regards,
Michał Mirosław
10.08.2020 03:59, Michał Mirosław пишет:
> On Mon, Aug 10, 2020 at 03:21:47AM +0300, Dmitry Osipenko wrote:
>> 10.08.2020 01:30, Michał Mirosław пишет:
>>> On Mon, Aug 10, 2020 at 12:40:04AM +0300, Dmitry Osipenko wrote:
>>>> 10.08.2020 00:16, Michał Mirosław пишет:
>>>>> Simplify regulator locking by removing locking around locking. rdev->ref
>>>>> is now accessed only when the lock is taken. The code still smells fishy,
>>>>> but now its obvious why.
>>>>>
>>>>> Fixes: f8702f9e4aa7 ("regulator: core: Use ww_mutex for regulators locking")
>>>>> Signed-off-by: Michał Mirosław <[email protected]>
>>>>> ---
>>>>> drivers/regulator/core.c | 37 ++++++--------------------------
>>>>> include/linux/regulator/driver.h | 1 -
>>>>> 2 files changed, 6 insertions(+), 32 deletions(-)
>>>>>
>>>>> diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
>>>>> index 9e18997777d3..b0662927487c 100644
>>>>> --- a/drivers/regulator/core.c
>>>>> +++ b/drivers/regulator/core.c
>>>>> @@ -45,7 +45,6 @@
>>>>> pr_debug("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
>>>>>
>>>>> static DEFINE_WW_CLASS(regulator_ww_class);
>>>>> -static DEFINE_MUTEX(regulator_nesting_mutex);
>>>>> static DEFINE_MUTEX(regulator_list_mutex);
>>>>> static LIST_HEAD(regulator_map_list);
>>>>> static LIST_HEAD(regulator_ena_gpio_list);
>>>>> @@ -150,32 +149,13 @@ static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops)
>>>>> static inline int regulator_lock_nested(struct regulator_dev *rdev,
>>>>> struct ww_acquire_ctx *ww_ctx)
>>>>> {
>>>>> - bool lock = false;
>>>>> int ret = 0;
>>>>>
>>>>> - mutex_lock(®ulator_nesting_mutex);
>>>>> + if (ww_ctx || !mutex_trylock_recursive(&rdev->mutex.base))
>>>>
>>>> Have you seen comment to the mutex_trylock_recursive()?
>>>>
>>>> https://elixir.bootlin.com/linux/v5.8/source/include/linux/mutex.h#L205
>>>>
>>>> * This function should not be used, _ever_. It is purely for hysterical GEM
>>>> * raisins, and once those are gone this will be removed.
>>>>
>>>> I knew about this function and I don't think it's okay to use it, hence
>>>> this is why there is that "nesting_mutex" and "owner" checking.
>>>>
>>>> If you disagree, then perhaps you should make another patch to remove
>>>> the stale comment to trylock_recursive().
>>>
>>> I think that reimplementing the function just to not use it is not the
>>> right solution. The whole locking protocol is problematic and this patch
>>> just uncovers one side of it.
>>
>> It's not clear to me what is uncovered, the ref_cnt was always accessed
>> under lock. Could you please explain in a more details?
>>
>> Would be awesome if you could improve the code, but then you should
>> un-deprecate the trylock_recursive() before making use of it. Maybe
>> nobody will mind and it all will be good in the end.
>
> I'm not sure why the framework wants recursive locking? If only for the
> coupling case, then ww_mutex seems the right direction to replace it:
> while walking the graph it will detect entering the same node
> a second time. But this works only during the locking transaction (with
> ww_context != NULL). Allowing recursive regulator_lock() outside of it
> seems inviting trouble.
Yes, it's for the coupling case. Coupled regulators may have common
ancestors and then the whole sub-tree needs to be locked while operating
with a coupled regulator.
The nested locking usage is discouraged in general because it is a
source of bugs. I guess it should be possible to get rid of all nested
lockings in the regulator core and use a pure ww_mutex, but somebody
should dedicate time to work on it.
On Mon, Aug 10, 2020 at 08:14:20AM +0300, Dmitry Osipenko wrote:
> 10.08.2020 03:59, Michał Mirosław пишет:
> > On Mon, Aug 10, 2020 at 03:21:47AM +0300, Dmitry Osipenko wrote:
> >> 10.08.2020 01:30, Michał Mirosław пишет:
> >>> On Mon, Aug 10, 2020 at 12:40:04AM +0300, Dmitry Osipenko wrote:
> >>>> 10.08.2020 00:16, Michał Mirosław пишет:
[...]
> >>>>> - mutex_lock(®ulator_nesting_mutex);
> >>>>> + if (ww_ctx || !mutex_trylock_recursive(&rdev->mutex.base))
[...]
> >>> I think that reimplementing the function just to not use it is not the
> >>> right solution. The whole locking protocol is problematic and this patch
> >>> just uncovers one side of it.
> >>
> >> It's not clear to me what is uncovered, the ref_cnt was always accessed
> >> under lock. Could you please explain in a more details?
[...]
> The nested locking usage is discouraged in general because it is a
> source of bugs. I guess it should be possible to get rid of all nested
> lockings in the regulator core and use a pure ww_mutex, but somebody
> should dedicate time to work on it.
So using a deprecated function for deprecated functionality sounds just
appropriate, doesn't it? :-)
Best Regards,
Michał Mirosław
On Mon, Aug 10, 2020 at 12:30:30AM +0200, Michał Mirosław wrote:
> On Mon, Aug 10, 2020 at 12:40:04AM +0300, Dmitry Osipenko wrote:
> > 10.08.2020 00:16, Michał Mirosław пишет:
> > > - mutex_lock(®ulator_nesting_mutex);
> > > + if (ww_ctx || !mutex_trylock_recursive(&rdev->mutex.base))
> > Have you seen comment to the mutex_trylock_recursive()?
> > https://elixir.bootlin.com/linux/v5.8/source/include/linux/mutex.h#L205
> > * This function should not be used, _ever_. It is purely for hysterical GEM
> > * raisins, and once those are gone this will be removed.
> I think that reimplementing the function just to not use it is not the
> right solution. The whole locking protocol is problematic and this patch
> just uncovers one side of it.
OTOH if this is just making the issue more obvious rather than fixing it
then it's much harder to justify adding a new usage of a function people
are trying to remove. The coupled regulator stuff is definitely a mess
as far as locking is concerned, it really interferes with the model we
had handling regulators separately as much as possible since we're no
longer just walking back up the tree. ww does seem like the way
forwards.