Commit c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
was a bit overzealous in eliminating device_lock() usage, and ended up
removing a couple of lock acquisitions which were needed, and as a
result, fix some of the conditional locking missteps that the above
commit introduced in unregister_dax_dev() and unregister_dax_mapping().
Fixes: c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
Reported-by: Dan Williams <[email protected]>
Signed-off-by: Vishal Verma <[email protected]>
---
drivers/dax/bus.c | 44 ++++++++++----------------------------------
1 file changed, 10 insertions(+), 34 deletions(-)
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 7924dd542a13..4e04b228b080 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -465,26 +465,17 @@ static void free_dev_dax_ranges(struct dev_dax *dev_dax)
trim_dev_dax_range(dev_dax);
}
-static void __unregister_dev_dax(void *dev)
+static void unregister_dev_dax(void *dev)
{
struct dev_dax *dev_dax = to_dev_dax(dev);
dev_dbg(dev, "%s\n", __func__);
+ down_write(&dax_region_rwsem);
kill_dev_dax(dev_dax);
device_del(dev);
free_dev_dax_ranges(dev_dax);
put_device(dev);
-}
-
-static void unregister_dev_dax(void *dev)
-{
- if (rwsem_is_locked(&dax_region_rwsem))
- return __unregister_dev_dax(dev);
-
- if (WARN_ON_ONCE(down_write_killable(&dax_region_rwsem) != 0))
- return;
- __unregister_dev_dax(dev);
up_write(&dax_region_rwsem);
}
@@ -560,15 +551,12 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
if (!victim)
return -ENXIO;
- rc = down_write_killable(&dax_region_rwsem);
- if (rc)
- return rc;
- rc = down_write_killable(&dax_dev_rwsem);
- if (rc) {
- up_write(&dax_region_rwsem);
- return rc;
- }
+ device_lock(dev);
+ device_lock(victim);
dev_dax = to_dev_dax(victim);
+ rc = down_write_killable(&dax_dev_rwsem);
+ if (rc)
+ return rc;
if (victim->driver || dev_dax_size(dev_dax))
rc = -EBUSY;
else {
@@ -589,11 +577,12 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
rc = -EBUSY;
}
up_write(&dax_dev_rwsem);
+ device_unlock(victim);
/* won the race to invalidate the device, clean it up */
if (do_del)
devm_release_action(dev, unregister_dev_dax, victim);
- up_write(&dax_region_rwsem);
+ device_unlock(dev);
put_device(victim);
return rc;
@@ -705,7 +694,7 @@ static void dax_mapping_release(struct device *dev)
put_device(parent);
}
-static void __unregister_dax_mapping(void *data)
+static void unregister_dax_mapping(void *data)
{
struct device *dev = data;
struct dax_mapping *mapping = to_dax_mapping(dev);
@@ -713,25 +702,12 @@ static void __unregister_dax_mapping(void *data)
dev_dbg(dev, "%s\n", __func__);
- lockdep_assert_held_write(&dax_region_rwsem);
-
dev_dax->ranges[mapping->range_id].mapping = NULL;
mapping->range_id = -1;
device_unregister(dev);
}
-static void unregister_dax_mapping(void *data)
-{
- if (rwsem_is_locked(&dax_region_rwsem))
- return __unregister_dax_mapping(data);
-
- if (WARN_ON_ONCE(down_write_killable(&dax_region_rwsem) != 0))
- return;
- __unregister_dax_mapping(data);
- up_write(&dax_region_rwsem);
-}
-
static struct dev_dax_range *get_dax_range(struct device *dev)
{
struct dax_mapping *mapping = to_dax_mapping(dev);
--
2.44.0
Vishal Verma wrote:
> Commit c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
> was a bit overzealous in eliminating device_lock() usage, and ended up
> removing a couple of lock acquisitions which were needed, and as a
> result, fix some of the conditional locking missteps that the above
> commit introduced in unregister_dax_dev() and unregister_dax_mapping().
I think it makes sense to tell the story a bit about why the
delete_store() conversion was problematic, because the
unregister_dev_dax() changes were just a knock-on effect to fixing the
delete_store() flow.
Something like:
---
commit c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
aimed to undo device_lock() abuses for protecting changes to dax-driver
internal data-structures like the dax_region resource tree to
device-dax-instance range structures. However, the device_lock() was legitamately
enforcing that devices to be deleted were not current actively attached
to any driver nor assigned any capacity from the region.
---
..you can fill in a couple notes about the knock-on fixups after that
was restored.
> Fixes: c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
> Reported-by: Dan Williams <[email protected]>
> Signed-off-by: Vishal Verma <[email protected]>
> ---
> drivers/dax/bus.c | 44 ++++++++++----------------------------------
> 1 file changed, 10 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> index 7924dd542a13..4e04b228b080 100644
> --- a/drivers/dax/bus.c
> +++ b/drivers/dax/bus.c
> @@ -465,26 +465,17 @@ static void free_dev_dax_ranges(struct dev_dax *dev_dax)
> trim_dev_dax_range(dev_dax);
> }
>
> -static void __unregister_dev_dax(void *dev)
> +static void unregister_dev_dax(void *dev)
> {
> struct dev_dax *dev_dax = to_dev_dax(dev);
>
> dev_dbg(dev, "%s\n", __func__);
>
> + down_write(&dax_region_rwsem);
> kill_dev_dax(dev_dax);
> device_del(dev);
> free_dev_dax_ranges(dev_dax);
> put_device(dev);
> -}
> -
> -static void unregister_dev_dax(void *dev)
> -{
> - if (rwsem_is_locked(&dax_region_rwsem))
> - return __unregister_dev_dax(dev);
> -
> - if (WARN_ON_ONCE(down_write_killable(&dax_region_rwsem) != 0))
> - return;
> - __unregister_dev_dax(dev);
> up_write(&dax_region_rwsem);
> }
>
> @@ -560,15 +551,12 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
> if (!victim)
> return -ENXIO;
>
> - rc = down_write_killable(&dax_region_rwsem);
> - if (rc)
> - return rc;
> - rc = down_write_killable(&dax_dev_rwsem);
> - if (rc) {
> - up_write(&dax_region_rwsem);
> - return rc;
> - }
> + device_lock(dev);
> + device_lock(victim);
> dev_dax = to_dev_dax(victim);
> + rc = down_write_killable(&dax_dev_rwsem);
This begs the question, why down_write_killable(), but not
device_lock_interruptible()?
I do not expect any of this is long running so likely down_write() is
sufficient here, especially since the heaviest locks to acquire are
already held by the time rwsem is considered.
Other than that this looks good to me:
You can include my Reviewed-by on the next posting.
On Mon, 2024-04-29 at 18:25 -0700, Dan Williams wrote:
> Vishal Verma wrote:
> > Commit c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
> > was a bit overzealous in eliminating device_lock() usage, and ended up
> > removing a couple of lock acquisitions which were needed, and as a
> > result, fix some of the conditional locking missteps that the above
> > commit introduced in unregister_dax_dev() and unregister_dax_mapping().
>
> I think it makes sense to tell the story a bit about why the
> delete_store() conversion was problematic, because the
> unregister_dev_dax() changes were just a knock-on effect to fixing the
> delete_store() flow.
>
> Something like:
>
> ---
> commit c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
> aimed to undo device_lock() abuses for protecting changes to dax-driver
> internal data-structures like the dax_region resource tree to
> device-dax-instance range structures. However, the device_lock() was legitamately
> enforcing that devices to be deleted were not current actively attached
> to any driver nor assigned any capacity from the region.
> ---
>
> ...you can fill in a couple notes about the knock-on fixups after that
> was restored.
Sounds good, updated!
>
> >
> > @@ -560,15 +551,12 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
> > if (!victim)
> > return -ENXIO;
> >
> > - rc = down_write_killable(&dax_region_rwsem);
> > - if (rc)
> > - return rc;
> > - rc = down_write_killable(&dax_dev_rwsem);
> > - if (rc) {
> > - up_write(&dax_region_rwsem);
> > - return rc;
> > - }
> > + device_lock(dev);
> > + device_lock(victim);
> > dev_dax = to_dev_dax(victim);
> > + rc = down_write_killable(&dax_dev_rwsem);
>
> This begs the question, why down_write_killable(), but not
> device_lock_interruptible()?
Do you mean change the device_lock()s to device_lock_interruptible() in
addition to the taking the rwsem (i.e. not instead of the rwsem..)?
I guess I just restored what was there previously - but the
interruptible variant makes sense, I can make that change.
>
> I do not expect any of this is long running so likely down_write() is
> sufficient here, especially since the heaviest locks to acquire are
> already held by the time rwsem is considered.
>
> Other than that this looks good to me:
>
> You can include my Reviewed-by on the next posting.
Thanks for the review Dan!
Verma, Vishal L wrote:
> > > @@ -560,15 +551,12 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
> > > ? if (!victim)
> > > ? return -ENXIO;
> > > ?
> > > - rc = down_write_killable(&dax_region_rwsem);
> > > - if (rc)
> > > - return rc;
> > > - rc = down_write_killable(&dax_dev_rwsem);
> > > - if (rc) {
> > > - up_write(&dax_region_rwsem);
> > > - return rc;
> > > - }
> > > + device_lock(dev);
> > > + device_lock(victim);
> > > ? dev_dax = to_dev_dax(victim);
> > > + rc = down_write_killable(&dax_dev_rwsem);
> >
> > This begs the question, why down_write_killable(), but not
> > device_lock_interruptible()?
>
> Do you mean change the device_lock()s to device_lock_interruptible() in
> addition to the taking the rwsem (i.e. not instead of the rwsem..)?
I mean convert the rwsem to drop _killable.
> I guess I just restored what was there previously - but the
> interruptible variant makes sense, I can make that change.
So the original code did device_lock(), then the rework added killable
rwsem (deleted device_lock()), and now the fixes add device_lock() back.
So now that there is a mix of killable/interruptible lock usage all the
locks should agree.
Since there really is no risk of these operations being long running
there is no driving need to make them killable/interruptible, so go with
the simple option.