2024-04-02 06:25:00

by Vishal Verma

[permalink] [raw]
Subject: [PATCH] dax/bus.c: replace WARN_ON_ONCE() with lockdep asserts

In [1], Dan points out that all of the WARN_ON_ONCE() usage in the
referenced patch should be replaced with lockdep_assert_held(_write)().

Replace those, and additionally, replace a couple of other
WARN_ON_ONCE() introduced in the same patch for actual failure
cases (i.e. when acquiring a semaphore fails in a remove / unregister
path) with dev_WARN_ONCE() as is the precedent here.

Recall that previously, unregistration paths was implicitly protected by
overloading the device lock, which the patch in [1] sought to remove.
This meant adding a semaphore acquisition in these unregistration paths.
Since that can fail, and it doesn't make sense to return errors from
these paths, retain the two instances of (now) dev_WARN_ONCE().

Link: https://lore.kernel.org/r/65f0b5ef41817_aa222941a@dwillia2-mobl3.amr.corp.intel.com.notmuch [1]
Fixes: c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
Cc: Andrew Morton <[email protected]>
Reported-by: Dan Williams <[email protected]>
Signed-off-by: Vishal Verma <[email protected]>
---
drivers/dax/bus.c | 23 +++++++++++++----------
1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 797e1ebff299..d89c8c3b62f7 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -192,7 +192,7 @@ static u64 dev_dax_size(struct dev_dax *dev_dax)
u64 size = 0;
int i;

- WARN_ON_ONCE(!rwsem_is_locked(&dax_dev_rwsem));
+ lockdep_assert_held(&dax_dev_rwsem);

for (i = 0; i < dev_dax->nr_range; i++)
size += range_len(&dev_dax->ranges[i].range);
@@ -302,7 +302,7 @@ static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
resource_size_t size = resource_size(&dax_region->res);
struct resource *res;

- WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
+ lockdep_assert_held(&dax_region_rwsem);

for_each_dax_region_resource(dax_region, res)
size -= resource_size(res);
@@ -447,7 +447,7 @@ static void trim_dev_dax_range(struct dev_dax *dev_dax)
struct range *range = &dev_dax->ranges[i].range;
struct dax_region *dax_region = dev_dax->region;

- WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
+ lockdep_assert_held_write(&dax_region_rwsem);
dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
(unsigned long long)range->start,
(unsigned long long)range->end);
@@ -471,6 +471,7 @@ static void __unregister_dev_dax(void *dev)

dev_dbg(dev, "%s\n", __func__);

+ lockdep_assert_held_write(&dax_region_rwsem);
kill_dev_dax(dev_dax);
device_del(dev);
free_dev_dax_ranges(dev_dax);
@@ -482,7 +483,8 @@ static void unregister_dev_dax(void *dev)
if (rwsem_is_locked(&dax_region_rwsem))
return __unregister_dev_dax(dev);

- if (WARN_ON_ONCE(down_write_killable(&dax_region_rwsem) != 0))
+ if (dev_WARN_ONCE(dev, down_write_killable(&dax_region_rwsem) != 0,
+ "unable to acquire region rwsem\n"))
return;
__unregister_dev_dax(dev);
up_write(&dax_region_rwsem);
@@ -507,7 +509,7 @@ static int __free_dev_dax_id(struct dev_dax *dev_dax)
struct dax_region *dax_region;
int rc = dev_dax->id;

- WARN_ON_ONCE(!rwsem_is_locked(&dax_dev_rwsem));
+ lockdep_assert_held_write(&dax_dev_rwsem);

if (!dev_dax->dyn_id || dev_dax->id < 0)
return -1;
@@ -713,7 +715,7 @@ static void __unregister_dax_mapping(void *data)

dev_dbg(dev, "%s\n", __func__);

- WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
+ lockdep_assert_held_write(&dax_region_rwsem);

dev_dax->ranges[mapping->range_id].mapping = NULL;
mapping->range_id = -1;
@@ -726,7 +728,8 @@ static void unregister_dax_mapping(void *data)
if (rwsem_is_locked(&dax_region_rwsem))
return __unregister_dax_mapping(data);

- if (WARN_ON_ONCE(down_write_killable(&dax_region_rwsem) != 0))
+ if (dev_WARN_ONCE(data, down_write_killable(&dax_region_rwsem) != 0,
+ "unable to acquire region rwsem\n"))
return;
__unregister_dax_mapping(data);
up_write(&dax_region_rwsem);
@@ -830,7 +833,7 @@ static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
struct device *dev;
int rc;

- WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
+ lockdep_assert_held_write(&dax_region_rwsem);

if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
"region disabled\n"))
@@ -876,7 +879,7 @@ static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
struct resource *alloc;
int i, rc;

- WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
+ lockdep_assert_held_write(&dax_region_rwsem);

/* handle the seed alloc special case */
if (!size) {
@@ -935,7 +938,7 @@ static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, r
struct device *dev = &dev_dax->dev;
int rc;

- WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
+ lockdep_assert_held_write(&dax_region_rwsem);

if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
return -EINVAL;

---
base-commit: 39cd87c4eb2b893354f3b850f916353f2658ae6f
change-id: 20240402-vv-dax_abi_fixes-8af3b6ff2e5a

Best regards,
--
Vishal Verma <[email protected]>



2024-04-04 02:33:30

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCH] dax/bus.c: replace WARN_ON_ONCE() with lockdep asserts

Vishal Verma wrote:
> In [1], Dan points out that all of the WARN_ON_ONCE() usage in the
> referenced patch should be replaced with lockdep_assert_held(_write)().
>
> Replace those, and additionally, replace a couple of other
> WARN_ON_ONCE() introduced in the same patch for actual failure
> cases (i.e. when acquiring a semaphore fails in a remove / unregister
> path) with dev_WARN_ONCE() as is the precedent here.

/me goes to look why failures are warned vs bubbling up the error...

>
> Recall that previously, unregistration paths was implicitly protected by
> overloading the device lock, which the patch in [1] sought to remove.
> This meant adding a semaphore acquisition in these unregistration paths.
> Since that can fail, and it doesn't make sense to return errors from
> these paths, retain the two instances of (now) dev_WARN_ONCE().
>
> Link: https://lore.kernel.org/r/65f0b5ef41817_aa222941a@dwillia2-mobl3.amr.corp.intel.com.notmuch [1]
> Fixes: c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
> Cc: Andrew Morton <[email protected]>
> Reported-by: Dan Williams <[email protected]>
> Signed-off-by: Vishal Verma <[email protected]>
> ---
> drivers/dax/bus.c | 23 +++++++++++++----------
> 1 file changed, 13 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> index 797e1ebff299..d89c8c3b62f7 100644
> --- a/drivers/dax/bus.c
> +++ b/drivers/dax/bus.c
[..]
> @@ -726,7 +728,8 @@ static void unregister_dax_mapping(void *data)

..and realizes that is hunk is confused.

> if (rwsem_is_locked(&dax_region_rwsem))
> return __unregister_dax_mapping(data);

This is bug. Either it is safe to call this without the lock held, or it
must be safe to always acquire the lock. Anything else means the caller
needs to be refactored. Conditional locking is an indicator of a deeper
problem with code organization.

Yes, this was not part of the fixup, but the changelog led me here
because no WARNs should remain at the end of this effort.

I think the confusion results from replace *all* device_lock() when some
device_lock() is still needed.

> - if (WARN_ON_ONCE(down_write_killable(&dax_region_rwsem) != 0))
> + if (dev_WARN_ONCE(data, down_write_killable(&dax_region_rwsem) != 0,
> + "unable to acquire region rwsem\n"))

In a context like this that cannot return an error directly to the user
process making the request, like in a sysfs operation handler, then the
locking cannot worry about signals. This would become an uncoditional
down_write() lock. So down_write_killable() is typically for request
contexts where there is a user process waiting for a result. For
contexts like async driver probing where we might be in a kernel thread,
and definitely in functions that return 'void', it is a bug to use
fallible locks.

2024-04-04 21:25:12

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] dax/bus.c: replace WARN_ON_ONCE() with lockdep asserts

On Tue, 02 Apr 2024 00:24:28 -0600 Vishal Verma <[email protected]> wrote:

> In [1], Dan points out that all of the WARN_ON_ONCE() usage in the
> referenced patch should be replaced with lockdep_assert_held(_write)().
>
> Replace those, and additionally, replace a couple of other
> WARN_ON_ONCE() introduced in the same patch for actual failure
> cases (i.e. when acquiring a semaphore fails in a remove / unregister
> path) with dev_WARN_ONCE() as is the precedent here.
>
> Recall that previously, unregistration paths was implicitly protected by
> overloading the device lock, which the patch in [1] sought to remove.
> This meant adding a semaphore acquisition in these unregistration paths.
> Since that can fail, and it doesn't make sense to return errors from
> these paths, retain the two instances of (now) dev_WARN_ONCE().
>
> ...
>
> @@ -471,6 +471,7 @@ static void __unregister_dev_dax(void *dev)
>
> dev_dbg(dev, "%s\n", __func__);
>
> + lockdep_assert_held_write(&dax_region_rwsem);
> kill_dev_dax(dev_dax);
> device_del(dev);
> free_dev_dax_ranges(dev_dax);

This is new and unchangelogged?

I'm taking Dan's reply to your patch as Not-A-Nack ;)

2024-04-04 21:40:49

by Vishal Verma

[permalink] [raw]
Subject: Re: [PATCH] dax/bus.c: replace WARN_ON_ONCE() with lockdep asserts

On Thu, 2024-04-04 at 14:23 -0700, Andrew Morton wrote:
> On Tue, 02 Apr 2024 00:24:28 -0600 Vishal Verma <[email protected]> wrote:
>
> > In [1], Dan points out that all of the WARN_ON_ONCE() usage in the
> > referenced patch should be replaced with lockdep_assert_held(_write)().
> >
> > Replace those, and additionally, replace a couple of other
> > WARN_ON_ONCE() introduced in the same patch for actual failure
> > cases (i.e. when acquiring a semaphore fails in a remove / unregister
> > path) with dev_WARN_ONCE() as is the precedent here.
> >
> > Recall that previously, unregistration paths was implicitly protected by
> > overloading the device lock, which the patch in [1] sought to remove.
> > This meant adding a semaphore acquisition in these unregistration paths.
> > Since that can fail, and it doesn't make sense to return errors from
> > these paths, retain the two instances of (now) dev_WARN_ONCE().
> >
> > ...
> >
> > @@ -471,6 +471,7 @@ static void __unregister_dev_dax(void *dev)
> >  
> >   dev_dbg(dev, "%s\n", __func__);
> >  
> > + lockdep_assert_held_write(&dax_region_rwsem);
> >   kill_dev_dax(dev_dax);
> >   device_del(dev);
> >   free_dev_dax_ranges(dev_dax);
>
> This is new and unchangelogged?
>
> I'm taking Dan's reply to your patch as Not-A-Nack ;)
>
True, but with Dan's new feedback, that results in a bit more rework,
this will likely turn into 2-3 patches. Working on it now, will be out
shortly!